Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 13:58:55 +01:00
parent 4af19165ec
commit 68073add76
12458 changed files with 12350765 additions and 2 deletions

2
tools/python/.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
osm_cache.json
transit/18*/

View file

@ -0,0 +1,57 @@
#!/usr/bin/env python3
from __future__ import print_function
import struct
import sys
import numpy
class Analyzer:
"""
The binary format is
time since the beginning of the measurement : double
unknown and irrelevant field : double
momentary consumption calculated for the current time segment : double
"""
def __init__(self):
self.duration = 0.0
self.consumption = []
self.mean = 0.0
self.std = 0.0
self.avg = 0.0
self.averages = []
def read_file(self, file_path):
binary = bytearray()
with open(file_path, "r") as f:
binary = bytearray(f.read())
for i in range(0, len(binary) - 24, 24):
res = struct.unpack(">ddd", binary[i:i+24])
current_duration = res[0]
if not current_duration > self.duration:
print("Unexpected elapsed time value, lower than the previous one.")
exit(2) # this should never happen because the file is written sequentially
current_consumption = res[2]
self.averages.append(current_consumption / (current_duration - self.duration))
self.duration = current_duration
self.consumption.append(current_consumption)
self.calculate_stats()
def calculate_stats(self):
self.mean = numpy.mean(self.averages)
self.std = numpy.std(self.averages)
self.avg = sum(self.consumption) / self.duration
if __name__ == "__main__":
for file_path in sys.argv[1:]:
analyzer = Analyzer()
analyzer.read_file(file_path)
print("{}\n\tavg: {}\n\tmean: {}\n\tstd: {}".format(file_path, analyzer.avg, analyzer.mean, analyzer.std))

14
tools/python/Util.py Normal file
View file

@ -0,0 +1,14 @@
from contextlib import contextmanager
import shutil
import tempfile
try:
from tempfile import TemporaryDirectory
except ImportError:
@contextmanager
def TemporaryDirectory():
name = tempfile.mkdtemp()
try:
yield name
finally:
shutil.rmtree(name)

0
tools/python/__init__.py Normal file
View file

View file

@ -0,0 +1,16 @@
# airmaps - building of maps using airflow.
## Storage
Repository of result and temporary files.
Currently, the storage is a webdav server.
## Description of DAGs:
1. Update_planet - updates .o5m planet file.
2. Build_coastline - builds coastline files.
3. Generate_open_source_maps - builds free maps for maps.me
All results will be published on the storage.

View file

@ -0,0 +1,12 @@
import os
from airmaps.instruments import settings
CONFIG_PATH = os.path.join(
os.path.dirname(os.path.join(os.path.realpath(__file__))),
"var",
"etc",
"airmaps.ini",
)
settings.init(CONFIG_PATH)

View file

@ -0,0 +1,93 @@
import logging
import os
import shutil
from datetime import timedelta
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
from airmaps.instruments import settings
from airmaps.instruments import storage
from airmaps.instruments.utils import get_latest_filename
from airmaps.instruments.utils import make_rm_build_task
from airmaps.instruments.utils import put_current_date_in_filename
from airmaps.instruments.utils import rm_build
from maps_generator.generator import stages_declaration as sd
from maps_generator.generator.env import Env
from maps_generator.generator.env import WORLD_COASTS_NAME
from maps_generator.maps_generator import run_generation
logger = logging.getLogger("airmaps")
DAG = DAG(
"Build_coastline",
schedule_interval=timedelta(days=1),
default_args={
"owner": "OMaps",
"depends_on_past": True,
"start_date": days_ago(0),
"email": settings.EMAILS,
"email_on_failure": True,
"email_on_retry": False,
"retries": 0,
"retry_delay": timedelta(minutes=5),
"priority_weight": 1,
},
)
COASTLINE_STORAGE_PATH = f"{settings.STORAGE_PREFIX}/coasts"
def publish_coastline(**kwargs):
build_name = kwargs["ti"].xcom_pull(key="build_name")
env = Env(build_name=build_name)
for name in (f"{WORLD_COASTS_NAME}.geom", f"{WORLD_COASTS_NAME}.rawgeom"):
coastline = put_current_date_in_filename(name)
latest = get_latest_filename(name)
coastline_full = os.path.join(env.paths.coastline_path, coastline)
latest_full = os.path.join(env.paths.coastline_path, latest)
shutil.move(os.path.join(env.paths.coastline_path, name), coastline_full)
os.symlink(coastline, latest_full)
storage.wd_publish(coastline_full, f"{COASTLINE_STORAGE_PATH}/{coastline}")
storage.wd_publish(latest_full, f"{COASTLINE_STORAGE_PATH}/{latest}")
def build_coastline(**kwargs):
env = Env()
kwargs["ti"].xcom_push(key="build_name", value=env.build_name)
run_generation(
env,
(
sd.StageDownloadAndConvertPlanet(),
sd.StageCoastline(use_old_if_fail=False),
sd.StageCleanup(),
),
)
env.finish()
BUILD_COASTLINE_TASK = PythonOperator(
task_id="Build_coastline_task",
provide_context=True,
python_callable=build_coastline,
on_failure_callback=lambda ctx: rm_build(**ctx),
dag=DAG,
)
PUBLISH_COASTLINE_TASK = PythonOperator(
task_id="Publish_coastline_task",
provide_context=True,
python_callable=publish_coastline,
dag=DAG,
)
RM_BUILD_TASK = make_rm_build_task(DAG)
BUILD_COASTLINE_TASK >> PUBLISH_COASTLINE_TASK >> RM_BUILD_TASK

View file

@ -0,0 +1,154 @@
import logging
from datetime import timedelta
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
from airmaps.instruments import settings
from airmaps.instruments import storage
from airmaps.instruments.utils import make_rm_build_task
from airmaps.instruments.utils import run_generation_from_first_stage
from maps_generator.generator import stages_declaration as sd
from maps_generator.generator.env import Env
from maps_generator.generator.env import PathProvider
from maps_generator.generator.env import get_all_countries_list
from maps_generator.maps_generator import run_generation
logger = logging.getLogger("airmaps")
MAPS_STORAGE_PATH = f"{settings.STORAGE_PREFIX}/maps"
class MapsGenerationDAG(DAG):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
build_prolog_task = PythonOperator(
task_id="Build_prolog_task",
provide_context=True,
python_callable=MapsGenerationDAG.build_prolog,
dag=self,
)
build_epilog_task = PythonOperator(
task_id="Build_epilog_task",
provide_context=True,
python_callable=MapsGenerationDAG.build_epilog,
dag=self,
)
publish_maps_task = PythonOperator(
task_id="Publish_maps_task",
provide_context=True,
python_callable=MapsGenerationDAG.publish_maps,
dag=self,
)
rm_build_task = make_rm_build_task(self)
build_epilog_task >> publish_maps_task >> rm_build_task
for country in get_all_countries_list(PathProvider.borders_path()):
build_prolog_task >> self.make_mwm_operator(country) >> build_epilog_task
@staticmethod
def get_params(namespace="env", **kwargs):
return kwargs.get("params", {}).get(namespace, {})
@staticmethod
def build_prolog(**kwargs):
params = MapsGenerationDAG.get_params(**kwargs)
env = Env(**params)
kwargs["ti"].xcom_push(key="build_name", value=env.build_name)
run_generation(
env,
(
sd.StageDownloadAndConvertPlanet(),
sd.StageCoastline(),
sd.StagePreprocess(),
sd.StageFeatures(),
sd.StageDownloadDescriptions(),
),
)
@staticmethod
def make_build_mwm_func(country):
def build_mwm(**kwargs):
build_name = kwargs["ti"].xcom_pull(key="build_name")
params = MapsGenerationDAG.get_params(**kwargs)
params.update({"build_name": build_name, "countries": [country,]})
env = Env(**params)
# We need to check existing of mwm.tmp. It is needed if we want to
# build mwms from part of planet.
tmp_mwm_name = env.get_tmp_mwm_names()
assert len(tmp_mwm_name) <= 1
if not tmp_mwm_name:
logger.warning(f"mwm.tmp does not exist for {country}.")
return
run_generation_from_first_stage(env, (sd.StageMwm(),), build_lock=False)
return build_mwm
@staticmethod
def build_epilog(**kwargs):
build_name = kwargs["ti"].xcom_pull(key="build_name")
params = MapsGenerationDAG.get_params(**kwargs)
params.update({"build_name": build_name})
env = Env(**params)
run_generation_from_first_stage(
env,
(
sd.StageCountriesTxt(),
sd.StageLocalAds(),
sd.StageStatistics(),
sd.StageCleanup(),
),
)
env.finish()
@staticmethod
def publish_maps(**kwargs):
build_name = kwargs["ti"].xcom_pull(key="build_name")
params = MapsGenerationDAG.get_params(**kwargs)
params.update({"build_name": build_name})
env = Env(**params)
subdir = MapsGenerationDAG.get_params(namespace="storage", **kwargs)["subdir"]
storage_path = f"{MAPS_STORAGE_PATH}/{subdir}"
storage.wd_publish(env.paths.mwm_path, f"{storage_path}/{env.mwm_version}/")
def make_mwm_operator(self, country):
normalized_name = "__".join(country.lower().split())
return PythonOperator(
task_id=f"Build_country_{normalized_name}_task",
provide_context=True,
python_callable=MapsGenerationDAG.make_build_mwm_func(country),
dag=self,
)
PARAMS = {"storage": {"subdir": "open_source"}}
if settings.DEBUG:
PARAMS["env"] = {
# The planet file in debug mode does not contain Russia_Moscow territory.
# It is needed for testing.
"countries": ["Cuba", "Haiti", "Jamaica", "Cayman Islands", "Russia_Moscow"]
}
OPEN_SOURCE_MAPS_GENERATION_DAG = MapsGenerationDAG(
"Generate_open_source_maps",
schedule_interval=timedelta(days=7),
default_args={
"owner": "OMaps",
"depends_on_past": True,
"start_date": days_ago(0),
"email": settings.EMAILS,
"email_on_failure": True,
"email_on_retry": False,
"retries": 0,
"retry_delay": timedelta(minutes=5),
"priority_weight": 1,
"params": PARAMS,
},
)

View file

@ -0,0 +1,83 @@
import logging
from datetime import timedelta
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
from airmaps.instruments import settings
from airmaps.instruments import storage
from airmaps.instruments.utils import make_rm_build_task
from maps_generator.generator import stages_declaration as sd
from maps_generator.generator.env import Env
from maps_generator.maps_generator import run_generation
from maps_generator.utils.md5 import md5_ext
logger = logging.getLogger("airmaps")
DAG = DAG(
"Update_planet",
schedule_interval=timedelta(days=1),
default_args={
"owner": "OMaps",
"depends_on_past": True,
"start_date": days_ago(0),
"email": settings.EMAILS,
"email_on_failure": True,
"email_on_retry": False,
"retries": 0,
"retry_delay": timedelta(minutes=5),
"priority_weight": 1,
},
)
PLANET_STORAGE_PATH = f"{settings.STORAGE_PREFIX}/planet_regular/planet-latest.o5m"
def update_planet(**kwargs):
env = Env()
kwargs["ti"].xcom_push(key="build_name", value=env.build_name)
if settings.DEBUG:
env.add_skipped_stage(sd.StageUpdatePlanet)
run_generation(
env,
(
sd.StageDownloadAndConvertPlanet(),
sd.StageUpdatePlanet(),
sd.StageCleanup(),
),
)
env.finish()
def publish_planet(**kwargs):
build_name = kwargs["ti"].xcom_pull(key="build_name")
env = Env(build_name=build_name)
storage.wd_publish(env.paths.planet_o5m, PLANET_STORAGE_PATH)
storage.wd_publish(md5_ext(env.paths.planet_o5m), md5_ext(PLANET_STORAGE_PATH))
UPDATE_PLANET_TASK = PythonOperator(
task_id="Update_planet_task",
provide_context=True,
python_callable=update_planet,
dag=DAG,
)
PUBLISH_PLANET_TASK = PythonOperator(
task_id="Publish_planet_task",
provide_context=True,
python_callable=publish_planet,
dag=DAG,
)
RM_BUILD_TASK = make_rm_build_task(DAG)
UPDATE_PLANET_TASK >> PUBLISH_PLANET_TASK >> RM_BUILD_TASK

View file

@ -0,0 +1,62 @@
import sys
from typing import AnyStr
from maps_generator.generator import settings
STORAGE_PREFIX = ""
# Storage settings
WD_HOST = ""
WD_LOGIN = ""
WD_PASSWORD = ""
# Common section
EMAILS = []
settings.LOGGING["loggers"]["airmaps"] = {
"handlers": ["stdout", "file"],
"level": "DEBUG",
"propagate": True,
}
def get_airmaps_emails(emails: AnyStr):
if not emails:
return []
for ch in [",", ";", ":"]:
emails.replace(ch, " ")
return list(filter(None, [e.strpip() for e in emails.join(" ")]))
def init(default_settings_path: AnyStr):
settings.init(default_settings_path)
# Try to read a config and to overload default settings
cfg = settings.CfgReader(default_settings_path)
# Storage section
global WD_HOST
global WD_LOGIN
global WD_PASSWORD
WD_HOST = cfg.get_opt("Storage", "WD_HOST", WD_HOST)
WD_LOGIN = cfg.get_opt("Storage", "WD_LOGIN", WD_LOGIN)
WD_PASSWORD = cfg.get_opt("Storage", "WD_PASSWORD", WD_PASSWORD)
# Common section
global EMAILS
emails = cfg.get_opt("Common", "EMAILS", "")
EMAILS = get_airmaps_emails(emails)
# Import all contains from maps_generator.generator.settings.
thismodule = sys.modules[__name__]
for name in dir(settings):
if not name.startswith("_") and name.isupper():
value = getattr(settings, name)
setattr(thismodule, name, value)
global STORAGE_PREFIX
if settings.DEBUG:
STORAGE_PREFIX = "/tests"

View file

@ -0,0 +1,27 @@
import logging
import webdav.client as wc
from airmaps.instruments import settings
logger = logging.getLogger("airmaps")
WD_OPTIONS = {
"webdav_hostname": settings.WD_HOST,
"webdav_login": settings.WD_LOGIN,
"webdav_password": settings.WD_PASSWORD,
}
def wd_fetch(src, dst):
logger.info(f"Fetch form {src} to {dst} with options {WD_OPTIONS}.")
client = wc.Client(WD_OPTIONS)
client.download_sync(src, dst)
def wd_publish(src, dst):
logger.info(f"Publish form {src} to {dst} with options {WD_OPTIONS}.")
client = wc.Client(WD_OPTIONS)
tmp = f"{dst[:-1]}__/" if dst[-1] == "/" else f"{dst}__"
client.upload_sync(local_path=src, remote_path=tmp)
client.move(remote_path_from=tmp, remote_path_to=dst)

View file

@ -0,0 +1,48 @@
import os
import shutil
from datetime import datetime
from typing import Iterable
from airflow.operators.python_operator import PythonOperator
from maps_generator.generator.env import Env
from maps_generator.generator.stages import Stage
from maps_generator.generator.stages import get_stage_name
from maps_generator.maps_generator import run_generation
def put_current_date_in_filename(filename):
path, name = os.path.split(filename)
parts = name.split(".", maxsplit=1)
parts[0] += f"__{datetime.today().strftime('%Y_%m_%d')}"
return os.path.join(path, ".".join(parts))
def get_latest_filename(filename, prefix=""):
path, name = os.path.split(filename)
parts = name.split(".", maxsplit=1)
assert len(parts) != 0, parts
parts[0] = f"{prefix}latest"
return os.path.join(path, ".".join(parts))
def rm_build(**kwargs):
build_name = kwargs["ti"].xcom_pull(key="build_name")
env = Env(build_name=build_name)
shutil.rmtree(env.build_path)
def make_rm_build_task(dag):
return PythonOperator(
task_id="Rm_build_task",
provide_context=True,
python_callable=rm_build,
dag=dag,
)
def run_generation_from_first_stage(
env: Env, stages: Iterable[Stage], build_lock: bool = True
):
from_stage = get_stage_name(next(iter(stages)))
run_generation(env, stages, from_stage, build_lock)

View file

@ -0,0 +1,6 @@
omim-data-all
omim-maps_generator
apache-airflow [postgres]==1.10.10
psycopg2-binary==2.8.4
cryptography>=41.0.0
webdavclient==1.0.8

View file

@ -0,0 +1,5 @@
-r ../maps_generator/requirements_dev.txt
apache-airflow [postgres]==1.10.10
psycopg2-binary==2.8.4
cryptography>=41.0.0
webdavclient==1.0.8

View file

@ -0,0 +1,4 @@
./.git*
./android
./iphone
./xcode

View file

@ -0,0 +1,92 @@
# Sandbox
This project can show how airmaps works on your computer.
## Setup
You must have [docker](https://docs.docker.com/get-docker/) and [docker-compose](https://docs.docker.com/compose/install/).
0. Change working directory:
```sh
$ cd omim/tools/python/airmaps/sandbox
```
1. Build airmaps service:
```sh
sandbox$ ./build.sh
```
2. Create storage(sandbox/storage directory):
```sh
sandbox$ ./create_storage.sh
```
Note: May be you need `sudo`, because `./create_storage.sh` to try change an owner of `sandbox/storage/tests` directory.
## Usage
### Starting
0. Change working directory:
```sh
$ cd omim/tools/python/airmaps/sandbox
```
1. Run all services:
```sh
sandbox$ docker-compose up
```
2. Open http://localhost in your browser.
Note: You can see the results of airmaps working in `sandbox/storage/tests`.
### Stopping
0. Change working directory:
```sh
$ cd omim/tools/python/airmaps/sandbox
```
1. Stop all services:
Push Ctrl+C and
```sh
sandbox$ docker-compose down
```
### Clean
#### Clean storage and intermediate files:
0. Change working directory:
```sh
$ cd omim/tools/python/airmaps/sandbox
```
1. Clean storage and intermediate files:
```sh
sandbox$ ./clean.sh
```
#### Remove images:
0. Change working directory:
```sh
$ cd omim/tools/python/airmaps/sandbox
```
1. Remove images:
```sh
sandbox$ docker-compose rm
```

View file

@ -0,0 +1,28 @@
FROM python:3.6
ARG TZ=Etc/UTC
WORKDIR /omim/
ADD . .
RUN apt-get update && apt-get install -y \
build-essential \
cmake \
libgl1-mesa-dev \
libsqlite3-dev \
qt5-default \
zlib1g-dev \
tzdata \
locales-all
RUN ln -fs /usr/share/zoneinfo/$TZ /etc/localtime && \
dpkg-reconfigure --frontend noninteractive tzdata
RUN ./tools/unix/build_omim.sh -rs generator_tool
RUN pip install --upgrade pip
RUN pip install werkzeug==0.16.0 \
SQLAlchemy==1.3.15 \
-r ./tools/python/airmaps/requirements_dev.txt

View file

@ -0,0 +1,856 @@
[core]
# The folder where your airflow pipelines live, most likely a
# subfolder in a code repository
# This path must be absolute
dags_folder = /omim/tools/python/airmaps/dags
# The folder where airflow should store its log files
# This path must be absolute
base_log_folder = /airflow_home/logs
# Airflow can store logs remotely in AWS S3, Google Cloud Storage or Elastic Search.
# Users must supply an Airflow connection id that provides access to the storage
# location. If remote_logging is set to true, see UPDATING.md for additional
# configuration requirements.
remote_logging = False
remote_log_conn_id =
remote_base_log_folder =
encrypt_s3_logs = False
# Logging level
logging_level = INFO
fab_logging_level = WARN
# Logging class
# Specify the class that will specify the logging configuration
# This class has to be on the python classpath
# logging_config_class = my.path.default_local_settings.LOGGING_CONFIG
logging_config_class =
# Log format
# Colour the logs when the controlling terminal is a TTY.
colored_console_log = True
colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s
colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter
log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
# Log filename format
log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log
log_processor_filename_template = {{ filename }}.log
dag_processor_manager_log_location = /airflow_home/logs/dag_processor_manager/dag_processor_manager.log
# Hostname by providing a path to a callable, which will resolve the hostname
# The format is "package:function". For example,
# default value "socket:getfqdn" means that result from getfqdn() of "socket" package will be used as hostname
# No argument should be required in the function specified.
# If using IP address as hostname is preferred, use value "airflow.utils.net:get_host_ip_address"
hostname_callable = socket:getfqdn
# Default timezone in case supplied date times are naive
# can be utc (default), system, or any IANA timezone string (e.g. Europe/Amsterdam)
default_timezone = system
# The executor class that airflow should use. Choices include
# SequentialExecutor, LocalExecutor, CeleryExecutor, DaskExecutor, KubernetesExecutor
executor = LocalExecutor
# The SqlAlchemy connection string to the metadata database.
# SqlAlchemy supports many different database engine, more information
# their website
sql_alchemy_conn = postgresql+psycopg2://postgres:postgres@db:5432/airflow
# The encoding for the databases
sql_engine_encoding = utf-8
# If SqlAlchemy should pool database connections.
sql_alchemy_pool_enabled = True
# The SqlAlchemy pool size is the maximum number of database connections
# in the pool. 0 indicates no limit.
sql_alchemy_pool_size = 5
# The maximum overflow size of the pool.
# When the number of checked-out connections reaches the size set in pool_size,
# additional connections will be returned up to this limit.
# When those additional connections are returned to the pool, they are disconnected and discarded.
# It follows then that the total number of simultaneous connections the pool will allow is pool_size + max_overflow,
# and the total number of "sleeping" connections the pool will allow is pool_size.
# max_overflow can be set to -1 to indicate no overflow limit;
# no limit will be placed on the total number of concurrent connections. Defaults to 10.
sql_alchemy_max_overflow = 10
# The SqlAlchemy pool recycle is the number of seconds a connection
# can be idle in the pool before it is invalidated. This config does
# not apply to sqlite. If the number of DB connections is ever exceeded,
# a lower config value will allow the system to recover faster.
sql_alchemy_pool_recycle = 1800
# Check connection at the start of each connection pool checkout.
# Typically, this is a simple statement like “SELECT 1”.
# More information here: https://docs.sqlalchemy.org/en/13/core/pooling.html#disconnect-handling-pessimistic
sql_alchemy_pool_pre_ping = True
# The schema to use for the metadata database
# SqlAlchemy supports databases with the concept of multiple schemas.
sql_alchemy_schema =
# The amount of parallelism as a setting to the executor. This defines
# the max number of task instances that should run simultaneously
# on this airflow installation
parallelism = 32
# The number of task instances allowed to run concurrently by the scheduler
dag_concurrency = 16
# Are DAGs paused by default at creation
dags_are_paused_at_creation = True
# The maximum number of active DAG runs per DAG
max_active_runs_per_dag = 16
# Whether to load the examples that ship with Airflow. It's good to
# get started, but you probably want to set this to False in a production
# environment
load_examples = False
# Where your Airflow plugins are stored
plugins_folder = /airflow_home/plugins
# Secret key to save connection passwords in the db
fernet_key = uoTKzPCjhVBsERkDylXY5g1hYeg7OAYjk_a_ek2YMwQ=
# Whether to disable pickling dags
donot_pickle = False
# How long before timing out a python file import
dagbag_import_timeout = 30
# How long before timing out a DagFileProcessor, which processes a dag file
dag_file_processor_timeout = 50
# The class to use for running task instances in a subprocess
task_runner = StandardTaskRunner
# If set, tasks without a `run_as_user` argument will be run with this user
# Can be used to de-elevate a sudo user running Airflow when executing tasks
default_impersonation =
# What security module to use (for example kerberos):
security =
# If set to False enables some unsecure features like Charts and Ad Hoc Queries.
# In 2.0 will default to True.
secure_mode = False
# Turn unit test mode on (overwrites many configuration options with test
# values at runtime)
unit_test_mode = False
# Name of handler to read task instance logs.
# Default to use task handler.
task_log_reader = task
# Whether to enable pickling for xcom (note that this is insecure and allows for
# RCE exploits). This will be deprecated in Airflow 2.0 (be forced to False).
enable_xcom_pickling = True
# When a task is killed forcefully, this is the amount of time in seconds that
# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED
killed_task_cleanup_time = 60
# Whether to override params with dag_run.conf. If you pass some key-value pairs through `airflow backfill -c` or
# `airflow trigger_dag -c`, the key-value pairs will override the existing ones in params.
dag_run_conf_overrides_params = False
# Worker initialisation check to validate Metadata Database connection
worker_precheck = False
# When discovering DAGs, ignore any files that don't contain the strings `DAG` and `airflow`.
dag_discovery_safe_mode = True
# The number of retries each task is going to have by default. Can be overridden at dag or task level.
default_task_retries = 0
[cli]
# In what way should the cli access the API. The LocalClient will use the
# database directly, while the json_client will use the api running on the
# webserver
api_client = airflow.api.client.local_client
# If you set web_server_url_prefix, do NOT forget to append it here, ex:
# endpoint_url = http://localhost:8080/myroot
# So api will look like: http://localhost:8080/myroot/api/experimental/...
endpoint_url = http://localhost:8080
[api]
# How to authenticate users of the API
auth_backend = airflow.api.auth.backend.default
[lineage]
# what lineage backend to use
backend =
[atlas]
sasl_enabled = False
host =
port = 21000
username =
password =
[operators]
# The default owner assigned to each new operator, unless
# provided explicitly or passed via `default_args`
default_owner = airflow
default_cpus = 1
default_ram = 512
default_disk = 512
default_gpus = 0
[hive]
# Default mapreduce queue for HiveOperator tasks
default_hive_mapred_queue =
[webserver]
# The base url of your website as airflow cannot guess what domain or
# cname you are using. This is used in automated emails that
# airflow sends to point links to the right web server
base_url = http://localhost:8080
# Default timezone to display all dates in the UI, can be UTC, system, or
# any IANA timezone string (e.g. Europe/Amsterdam). If left empty the
# default value of core/default_timezone will be used
# Example: default_ui_timezone = America/New_York
default_ui_timezone = system
# The ip specified when starting the web server
web_server_host = 0.0.0.0
# The port on which to run the web server
web_server_port = 8080
# Paths to the SSL certificate and key for the web server. When both are
# provided SSL will be enabled. This does not change the web server port.
web_server_ssl_cert =
web_server_ssl_key =
# Number of seconds the webserver waits before killing gunicorn master that doesn't respond
web_server_master_timeout = 120
# Number of seconds the gunicorn webserver waits before timing out on a worker
web_server_worker_timeout = 120
# Number of workers to refresh at a time. When set to 0, worker refresh is
# disabled. When nonzero, airflow periodically refreshes webserver workers by
# bringing up new ones and killing old ones.
worker_refresh_batch_size = 1
# Number of seconds to wait before refreshing a batch of workers.
worker_refresh_interval = 30
# Secret key used to run your flask app
secret_key = temporary_key
# Number of workers to run the Gunicorn web server
workers = 4
# The worker class gunicorn should use. Choices include
# sync (default), eventlet, gevent
worker_class = sync
# Log files for the gunicorn webserver. '-' means log to stderr.
access_logfile = -
error_logfile = -
# Expose the configuration file in the web server
# This is only applicable for the flask-admin based web UI (non FAB-based).
# In the FAB-based web UI with RBAC feature,
# access to configuration is controlled by role permissions.
expose_config = False
# Set to true to turn on authentication:
# https://airflow.apache.org/security.html#web-authentication
authenticate = False
# Filter the list of dags by owner name (requires authentication to be enabled)
filter_by_owner = False
# Filtering mode. Choices include user (default) and ldapgroup.
# Ldap group filtering requires using the ldap backend
#
# Note that the ldap server needs the "memberOf" overlay to be set up
# in order to user the ldapgroup mode.
owner_mode = user
# Default DAG view. Valid values are:
# tree, graph, duration, gantt, landing_times
dag_default_view = tree
# Default DAG orientation. Valid values are:
# LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top)
dag_orientation = LR
# Puts the webserver in demonstration mode; blurs the names of Operators for
# privacy.
demo_mode = False
# The amount of time (in secs) webserver will wait for initial handshake
# while fetching logs from other worker machine
log_fetch_timeout_sec = 5
# By default, the webserver shows paused DAGs. Flip this to hide paused
# DAGs by default
hide_paused_dags_by_default = False
# Consistent page size across all listing views in the UI
page_size = 100
# Use FAB-based webserver with RBAC feature
rbac = False
# Define the color of navigation bar
navbar_color = #007A87
# Default dagrun to show in UI
default_dag_run_display_number = 25
# Enable werkzeug `ProxyFix` middleware
enable_proxy_fix = False
# Set secure flag on session cookie
cookie_secure = False
# Set samesite policy on session cookie
cookie_samesite =
# Default setting for wrap toggle on DAG code and TI log views.
default_wrap = False
# Send anonymous user activity to your analytics tool
# analytics_tool = # choose from google_analytics, segment, or metarouter
# analytics_id = XXXXXXXXXXX
[email]
email_backend = airflow.utils.email.send_email_smtp
[smtp]
# If you want airflow to send emails on retries, failure, and you want to use
# the airflow.utils.email.send_email_smtp function, you have to configure an
# smtp server here
smtp_host = localhost
smtp_starttls = True
smtp_ssl = False
# Uncomment and set the user/pass settings if you want to use SMTP AUTH
# smtp_user = airflow
# smtp_password = airflow
smtp_port = 25
smtp_mail_from = airflow@example.com
[sentry]
# Sentry (https://docs.sentry.io) integration
sentry_dsn =
[celery]
# This section only applies if you are using the CeleryExecutor in
# [core] section above
# The app name that will be used by celery
celery_app_name = airflow.executors.celery_executor
# The concurrency that will be used when starting workers with the
# "airflow worker" command. This defines the number of task instances that
# a worker will take, so size up your workers based on the resources on
# your worker box and the nature of your tasks
worker_concurrency = 40
# The maximum and minimum concurrency that will be used when starting workers with the
# "airflow worker" command (always keep minimum processes, but grow to maximum if necessary).
# Note the value should be "max_concurrency,min_concurrency"
# Pick these numbers based on resources on worker box and the nature of the task.
# If autoscale option is available, worker_concurrency will be ignored.
# http://docs.celeryproject.org/en/latest/reference/celery.bin.worker.html#cmdoption-celery-worker-autoscale
# worker_autoscale = 16,12
# When you start an airflow worker, airflow starts a tiny web server
# subprocess to serve the workers local log files to the airflow main
# web server, who then builds pages and sends them to users. This defines
# the port on which the logs are served. It needs to be unused, and open
# visible from the main web server to connect into the workers.
worker_log_server_port = 8793
# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally
# a sqlalchemy database. Refer to the Celery documentation for more
# information.
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#broker-settings
broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow
# The Celery result_backend. When a job finishes, it needs to update the
# metadata of the job. Therefore it will post a message on a message bus,
# or insert it into a database (depending of the backend)
# This status is used by the scheduler to update the state of the task
# The use of a database is highly recommended
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings
result_backend = db+mysql://airflow:airflow@localhost:3306/airflow
# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
# it `airflow flower`. This defines the IP that Celery Flower runs on
flower_host = 0.0.0.0
# The root URL for Flower
# Ex: flower_url_prefix = /flower
flower_url_prefix =
# This defines the port that Celery Flower runs on
flower_port = 5555
# Securing Flower with Basic Authentication
# Accepts user:password pairs separated by a comma
# Example: flower_basic_auth = user1:password1,user2:password2
flower_basic_auth =
# Default queue that tasks get assigned to and that worker listen on.
default_queue = default
# How many processes CeleryExecutor uses to sync task state.
# 0 means to use max(1, number of cores - 1) processes.
sync_parallelism = 0
# Import path for celery configuration options
celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG
# In case of using SSL
ssl_active = False
ssl_key =
ssl_cert =
ssl_cacert =
# Celery Pool implementation.
# Choices include: prefork (default), eventlet, gevent or solo.
# See:
# https://docs.celeryproject.org/en/latest/userguide/workers.html#concurrency
# https://docs.celeryproject.org/en/latest/userguide/concurrency/eventlet.html
pool = prefork
[celery_broker_transport_options]
# This section is for specifying options which can be passed to the
# underlying celery broker transport. See:
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-broker_transport_options
# The visibility timeout defines the number of seconds to wait for the worker
# to acknowledge the task before the message is redelivered to another worker.
# Make sure to increase the visibility timeout to match the time of the longest
# ETA you're planning to use.
#
# visibility_timeout is only supported for Redis and SQS celery brokers.
# See:
# http://docs.celeryproject.org/en/master/userguide/configuration.html#std:setting-broker_transport_options
#
#visibility_timeout = 21600
[dask]
# This section only applies if you are using the DaskExecutor in
# [core] section above
# The IP address and port of the Dask cluster's scheduler.
cluster_address = 127.0.0.1:8786
# TLS/ SSL settings to access a secured Dask scheduler.
tls_ca =
tls_cert =
tls_key =
[scheduler]
# Task instances listen for external kill signal (when you clear tasks
# from the CLI or the UI), this defines the frequency at which they should
# listen (in seconds).
job_heartbeat_sec = 5
# The scheduler constantly tries to trigger new tasks (look at the
# scheduler section in the docs for more information). This defines
# how often the scheduler should run (in seconds).
scheduler_heartbeat_sec = 5
# after how much time should the scheduler terminate in seconds
# -1 indicates to run continuously (see also num_runs)
run_duration = -1
# The number of times to try to schedule each DAG file
# -1 indicates unlimited number
num_runs = -1
# The number of seconds to wait between consecutive DAG file processing
processor_poll_interval = 1
# after how much time (seconds) a new DAGs should be picked up from the filesystem
min_file_process_interval = 0
# How often (in seconds) to scan the DAGs directory for new files. Default to 5 minutes.
dag_dir_list_interval = 300
# How often should stats be printed to the logs
print_stats_interval = 30
# If the last scheduler heartbeat happened more than scheduler_health_check_threshold ago (in seconds),
# scheduler is considered unhealthy.
# This is used by the health check in the "/health" endpoint
scheduler_health_check_threshold = 30
child_process_log_directory = /airflow_home/logs/scheduler
# Local task jobs periodically heartbeat to the DB. If the job has
# not heartbeat in this many seconds, the scheduler will mark the
# associated task instance as failed and will re-schedule the task.
scheduler_zombie_task_threshold = 300
# Turn off scheduler catchup by setting this to False.
# Default behavior is unchanged and
# Command Line Backfills still work, but the scheduler
# will not do scheduler catchup if this is False,
# however it can be set on a per DAG basis in the
# DAG definition (catchup)
catchup_by_default = True
# This changes the batch size of queries in the scheduling main loop.
# If this is too high, SQL query performance may be impacted by one
# or more of the following:
# - reversion to full table scan
# - complexity of query predicate
# - excessive locking
#
# Additionally, you may hit the maximum allowable query length for your db.
#
# Set this to 0 for no limit (not advised)
max_tis_per_query = 512
# Statsd (https://github.com/etsy/statsd) integration settings
statsd_on = False
statsd_host = localhost
statsd_port = 8125
statsd_prefix = airflow
# If you want to avoid send all the available metrics to StatsD,
# you can configure an allow list of prefixes to send only the metrics that
# start with the elements of the list (e.g: scheduler,executor,dagrun)
statsd_allow_list =
# The scheduler can run multiple threads in parallel to schedule dags.
# This defines how many threads will run.
max_threads = 8
authenticate = False
# Turn off scheduler use of cron intervals by setting this to False.
# DAGs submitted manually in the web UI or with trigger_dag will still run.
use_job_schedule = True
[ldap]
# set this to ldaps://<your.ldap.server>:<port>
uri =
user_filter = objectClass=*
user_name_attr = uid
group_member_attr = memberOf
superuser_filter =
data_profiler_filter =
bind_user = cn=Manager,dc=example,dc=com
bind_password = insecure
basedn = dc=example,dc=com
cacert = /etc/ca/ldap_ca.crt
search_scope = LEVEL
# This setting allows the use of LDAP servers that either return a
# broken schema, or do not return a schema.
ignore_malformed_schema = False
[mesos]
# Mesos master address which MesosExecutor will connect to.
master = localhost:5050
# The framework name which Airflow scheduler will register itself as on mesos
framework_name = Airflow
# Number of cpu cores required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
task_cpu = 1
# Memory in MB required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
task_memory = 256
# Enable framework checkpointing for mesos
# See http://mesos.apache.org/documentation/latest/slave-recovery/
checkpoint = False
# Failover timeout in milliseconds.
# When checkpointing is enabled and this option is set, Mesos waits
# until the configured timeout for
# the MesosExecutor framework to re-register after a failover. Mesos
# shuts down running tasks if the
# MesosExecutor framework fails to re-register within this timeframe.
# failover_timeout = 604800
# Enable framework authentication for mesos
# See http://mesos.apache.org/documentation/latest/configuration/
authenticate = False
# Mesos credentials, if authentication is enabled
# default_principal = admin
# default_secret = admin
# Optional Docker Image to run on slave before running the command
# This image should be accessible from mesos slave i.e mesos slave
# should be able to pull this docker image before executing the command.
# docker_image_slave = puckel/docker-airflow
[kerberos]
ccache = /tmp/airflow_krb5_ccache
# gets augmented with fqdn
principal = airflow
reinit_frequency = 3600
kinit_path = kinit
keytab = airflow.keytab
[github_enterprise]
api_rev = v3
[admin]
# UI to hide sensitive variable fields when set to True
hide_sensitive_variable_fields = True
[elasticsearch]
# Elasticsearch host
host =
# Format of the log_id, which is used to query for a given tasks logs
log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number}
# Used to mark the end of a log stream for a task
end_of_log_mark = end_of_log
# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id
# Code will construct log_id using the log_id template from the argument above.
# NOTE: The code will prefix the https:// automatically, don't include that here.
frontend =
# Write the task logs to the stdout of the worker, rather than the default files
write_stdout = False
# Instead of the default log formatter, write the log lines as JSON
json_format = False
# Log fields to also attach to the json output, if enabled
json_fields = asctime, filename, lineno, levelname, message
[elasticsearch_configs]
use_ssl = False
verify_certs = True
[kubernetes]
# The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run
worker_container_repository =
worker_container_tag =
worker_container_image_pull_policy = IfNotPresent
# If True (default), worker pods will be deleted upon termination
delete_worker_pods = True
# Number of Kubernetes Worker Pod creation calls per scheduler loop
worker_pods_creation_batch_size = 1
# The Kubernetes namespace where airflow workers should be created. Defaults to `default`
namespace = default
# The name of the Kubernetes ConfigMap Containing the Airflow Configuration (this file)
airflow_configmap =
# For docker image already contains DAGs, this is set to `True`, and the worker will search for dags in dags_folder,
# otherwise use git sync or dags volume claim to mount DAGs
dags_in_image = False
# For either git sync or volume mounted DAGs, the worker will look in this subpath for DAGs
dags_volume_subpath =
# For DAGs mounted via a volume claim (mutually exclusive with git-sync and host path)
dags_volume_claim =
# For volume mounted logs, the worker will look in this subpath for logs
logs_volume_subpath =
# A shared volume claim for the logs
logs_volume_claim =
# For DAGs mounted via a hostPath volume (mutually exclusive with volume claim and git-sync)
# Useful in local environment, discouraged in production
dags_volume_host =
# A hostPath volume for the logs
# Useful in local environment, discouraged in production
logs_volume_host =
# A list of configMapsRefs to envFrom. If more than one configMap is
# specified, provide a comma separated list: configmap_a,configmap_b
env_from_configmap_ref =
# A list of secretRefs to envFrom. If more than one secret is
# specified, provide a comma separated list: secret_a,secret_b
env_from_secret_ref =
# Git credentials and repository for DAGs mounted via Git (mutually exclusive with volume claim)
git_repo =
git_branch =
git_subpath =
# Use git_user and git_password for user authentication or git_ssh_key_secret_name and git_ssh_key_secret_key
# for SSH authentication
git_user =
git_password =
git_sync_root = /git
git_sync_dest = repo
git_dags_folder_mount_point =
# To get Git-sync SSH authentication set up follow this format
#
# airflow-secrets.yaml:
# ---
# apiVersion: v1
# kind: Secret
# metadata:
# name: airflow-secrets
# data:
# # key needs to be gitSshKey
# gitSshKey: <base64_encoded_data>
# ---
# airflow-configmap.yaml:
# apiVersion: v1
# kind: ConfigMap
# metadata:
# name: airflow-configmap
# data:
# known_hosts: |
# github.com ssh-rsa <...>
# airflow.cfg: |
# ...
#
# git_ssh_key_secret_name = airflow-secrets
# git_ssh_known_hosts_configmap_name = airflow-configmap
git_ssh_key_secret_name =
git_ssh_known_hosts_configmap_name =
# To give the git_sync init container credentials via a secret, create a secret
# with two fields: GIT_SYNC_USERNAME and GIT_SYNC_PASSWORD (example below) and
# add `git_sync_credentials_secret = <secret_name>` to your airflow config under the kubernetes section
#
# Secret Example:
# apiVersion: v1
# kind: Secret
# metadata:
# name: git-credentials
# data:
# GIT_SYNC_USERNAME: <base64_encoded_git_username>
# GIT_SYNC_PASSWORD: <base64_encoded_git_password>
git_sync_credentials_secret =
# For cloning DAGs from git repositories into volumes: https://github.com/kubernetes/git-sync
git_sync_container_repository = k8s.gcr.io/git-sync
git_sync_container_tag = v3.1.1
git_sync_init_container_name = git-sync-clone
git_sync_run_as_user = 65533
# The name of the Kubernetes service account to be associated with airflow workers, if any.
# Service accounts are required for workers that require access to secrets or cluster resources.
# See the Kubernetes RBAC documentation for more:
# https://kubernetes.io/docs/admin/authorization/rbac/
worker_service_account_name =
# Any image pull secrets to be given to worker pods, If more than one secret is
# required, provide a comma separated list: secret_a,secret_b
image_pull_secrets =
# GCP Service Account Keys to be provided to tasks run on Kubernetes Executors
# Should be supplied in the format: key-name-1:key-path-1,key-name-2:key-path-2
gcp_service_account_keys =
# Use the service account kubernetes gives to pods to connect to kubernetes cluster.
# It's intended for clients that expect to be running inside a pod running on kubernetes.
# It will raise an exception if called from a process not running in a kubernetes environment.
in_cluster = True
# When running with in_cluster=False change the default cluster_context or config_file
# options to Kubernetes client. Leave blank these to use default behaviour like `kubectl` has.
# cluster_context =
# config_file =
# Affinity configuration as a single line formatted JSON object.
# See the affinity model for top-level key names (e.g. `nodeAffinity`, etc.):
# https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#affinity-v1-core
affinity =
# A list of toleration objects as a single line formatted JSON array
# See:
# https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#toleration-v1-core
tolerations =
# **kwargs parameters to pass while calling a kubernetes client core_v1_api methods from Kubernetes Executor
# provided as a single line formatted JSON dictionary string.
# List of supported params in **kwargs are similar for all core_v1_apis, hence a single config variable for all apis
# See:
# https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py
# Note that if no _request_timeout is specified, the kubernetes client will wait indefinitely for kubernetes
# api responses, which will cause the scheduler to hang. The timeout is specified as [connect timeout, read timeout]
kube_client_request_args = {"_request_timeout" : [60,60] }
# Worker pods security context options
# See:
# https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
# Specifies the uid to run the first process of the worker pods containers as
run_as_user =
# Specifies a gid to associate with all containers in the worker pods
# if using a git_ssh_key_secret_name use an fs_group
# that allows for the key to be read, e.g. 65533
fs_group =
[kubernetes_node_selectors]
# The Key-value pairs to be given to worker pods.
# The worker pods will be scheduled to the nodes of the specified key-value pairs.
# Should be supplied in the format: key = value
[kubernetes_annotations]
# The Key-value annotations pairs to be given to worker pods.
# Should be supplied in the format: key = value
[kubernetes_environment_variables]
# The scheduler sets the following environment variables into your workers. You may define as
# many environment variables as needed and the kubernetes launcher will set them in the launched workers.
# Environment variables in this section are defined as follows
# <environment_variable_key> = <environment_variable_value>
#
# For example if you wanted to set an environment variable with value `prod` and key
# `ENVIRONMENT` you would follow the following format:
# ENVIRONMENT = prod
#
# Additionally you may override worker airflow settings with the AIRFLOW__<SECTION>__<KEY>
# formatting as supported by airflow normally.
[kubernetes_secrets]
# The scheduler mounts the following secrets into your workers as they are launched by the
# scheduler. You may define as many secrets as needed and the kubernetes launcher will parse the
# defined secrets and mount them as secret environment variables in the launched workers.
# Secrets in this section are defined as follows
# <environment_variable_mount> = <kubernetes_secret_object>=<kubernetes_secret_key>
#
# For example if you wanted to mount a kubernetes secret key named `postgres_password` from the
# kubernetes secret object `airflow-secret` as the environment variable `POSTGRES_PASSWORD` into
# your workers you would follow the following format:
# POSTGRES_PASSWORD = airflow-secret=postgres_credentials
#
# Additionally you may override worker airflow settings with the AIRFLOW__<SECTION>__<KEY>
# formatting as supported by airflow normally.
[kubernetes_labels]
# The Key-value pairs to be given to worker pods.
# The worker pods will be given these static labels, as well as some additional dynamic labels
# to identify the task.
# Should be supplied in the format: key = value

View file

@ -0,0 +1,82 @@
[Main]
# The path where the planet will be downloaded and the maps are generated.
MAIN_OUT_PATH: /maps_build
# If the flag DEBUG is set a special small planet file will be downloaded.
DEBUG: 1
[Developer]
# The path where the generator_tool will be searched.
BUILD_PATH: /omim-build-release
# The path to the project directory omim.
OMIM_PATH: /omim
[Storage]
# Webdaw settings.
WD_HOST: webdav
WD_LOGIN: alice
WD_PASSWORD: secret1234
[Generator tool]
# The path to the omim/data.
USER_RESOURCE_PATH: ${Developer:OMIM_PATH}/data
# Do not change it. This is determined automatically.
# NODE_STORAGE: map
[Osm tools]
# The path to the osmctools sources.
OSM_TOOLS_SRC_PATH: ${Developer:OMIM_PATH}/tools/osmctools
# The path where osmctools will be searched or will be built.
OSM_TOOLS_PATH: /osmctools
[Stages]
# Run osmupdate tool for planet.
NEED_PLANET_UPDATE: 0
[Logging]
# The path where maps_generator log will be saved.
# LOG_FILE_PATH: generation.log
[External]
# The url to the planet file.
# PLANET_URL:
# The url to the file with md5 sum of the planet.
# PLANET_MD5_URL:
# The base url to WorldCoasts.geom and WorldCoasts.rawgeom (without file name).
# Files latest_coasts.geom and latest_coasts.rawgeom must be at this URL.
# For example, if PLANET_COASTS_URL = https://somesite.com/download/
# The https://somesite.com/download/latest_coasts.geom url will be used to download latest_coasts.geom and
# the https://somesite.com/download/latest_coasts.rawgeom url will be used to download latest_coasts.rawgeom.
# PLANET_COASTS_URL:
# The url to the subway file.
SUBWAY_URL: https://cdn.organicmaps.app/subway.json
# Urls for production maps generation.
# UGC_URL:
# HOTELS_URL:
# PROMO_CATALOG_CITIES:
# POPULARITY_URL:
# FOOD_URL:
# FOOD_TRANSLATIONS_URL:
# SRTM_PATH:
# ISOLINES_PATH:
# UK_POSTCODES_URL:
# US_POSTCODES_URL:
[Common]
# Auto detection.
THREADS_COUNT: 0
# Emails for mailing.
# EMAILS:
[Stats]
# Path to rules for calculating statistics by type
STATS_TYPES_CONFIG: ${Developer:OMIM_PATH}/tools/python/maps_generator/var/etc/stats_types_config.txt

View file

@ -0,0 +1,13 @@
#!/usr/bin/env bash
export PYTHONPATH=/omim/tools/python
export AIRFLOW_HOME=/airflow_home
# Initialize the database.
airflow initdb
# Start the web server, default port is 8880.
airflow webserver -p 8880 &
# Start the scheduler.
airflow scheduler

View file

@ -0,0 +1,14 @@
#!/usr/bin/env bash
BUILD_PATH="$(dirname "$0")"
OMIM_PATH="$(cd "${OMIM_PATH:-${BUILD_PATH}/../../../..}"; pwd)"
echo "Build airmaps service.."
mv "${OMIM_PATH}/.dockerignore" "${OMIM_PATH}/.dockerignore_" 2> /dev/null
cp "${BUILD_PATH}/.dockerignore" ${OMIM_PATH}
docker-compose build
rm "${OMIM_PATH}/.dockerignore"
mv "${OMIM_PATH}/.dockerignore_" "${OMIM_PATH}/.dockerignore" 2> /dev/null

View file

@ -0,0 +1,10 @@
#!/usr/bin/env bash
BUILD_PATH="$(dirname "$0")"
OMIM_PATH="$(cd "${OMIM_PATH:-${BUILD_PATH}/../../../..}"; pwd)"
echo "Cleaning.."
rm "${OMIM_PATH}/.dockerignore" 2> /dev/null
mv "${OMIM_PATH}/.dockerignore_" "${OMIM_PATH}/.dockerignore" 2> /dev/null
rm -r "${BUILD_PATH}/storage" 2> /dev/null

View file

@ -0,0 +1,10 @@
#!/usr/bin/env bash
BUILD_PATH="$(dirname "$0")"
echo "Creating storage.."
mkdir -p "${BUILD_PATH}/storage/tests/coasts"
mkdir -p "${BUILD_PATH}/storage/tests/maps/open_source"
mkdir -p "${BUILD_PATH}/storage/tests/planet_regular"
chown -R www-data:www-data "${BUILD_PATH}/storage/tests"

View file

@ -0,0 +1,41 @@
version: "3"
services:
webdav:
image: sashgorokhov/webdav
container_name: webdav
hostname: webdav
restart: always
environment:
USERNAME: alice
PASSWORD: secret1234
volumes:
- ./storage/tests:/media/tests
db:
image: postgres:12.2
container_name: db
hostname: db
restart: always
environment:
POSTGRES_DB: airflow
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
airmaps:
build:
context: ../../../..
dockerfile: ./tools/python/airmaps/sandbox/airmaps/Dockerfile
args:
- TZ=Europe/Moscow
container_name: airmaps
hostname: airmaps
restart: always
links:
- webdav
- db
ports:
- "80:8880"
command: ./tools/python/airmaps/sandbox/airmaps/run_airmaps_service.sh
volumes:
- ./airmaps/airflow.cfg:/airflow_home/airflow.cfg
- ./airmaps/airmaps.ini:/omim/tools/python/airmaps/var/etc/airmaps.ini

36
tools/python/airmaps/setup.py Executable file
View file

@ -0,0 +1,36 @@
#!/usr/bin/env python3
import os
import sys
import setuptools
module_dir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(module_dir, "..", "..", ".."))
from pyhelpers.setup import chdir
from pyhelpers.setup import get_version
from pyhelpers.setup import get_requirements
with chdir(os.path.abspath(os.path.dirname(__file__))):
setuptools.setup(
name="omim-airmaps",
version=str(get_version()),
author="CoMaps",
author_email="info@comaps.app",
description="This package contains tools for generating maps with Apache Airflow.",
url="https://codeberg.org/comaps",
package_dir={"airmaps": ""},
package_data={"": ["var/**/*"]},
packages=[
"airmaps",
"airmaps.dags",
"airmaps.instruments",
],
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: Apache Software License",
],
python_requires=">=3.6",
install_requires=get_requirements(),
)

View file

@ -0,0 +1,82 @@
[Main]
# The path where the planet will be downloaded and the maps are generated.
MAIN_OUT_PATH: ~/maps_build
# If the flag DEBUG is set a special small planet file will be downloaded.
DEBUG: 1
[Developer]
# The path where the generator_tool will be searched.
BUILD_PATH: ~/omim-build-release
# The path to the project directory omim.
OMIM_PATH: ~/omim
[Storage]
# Webdaw settings.
# WD_HOST:
# WD_LOGIN:
# WD_PASSWORD:
[Generator tool]
# The path to the omim/data.
USER_RESOURCE_PATH: ${Developer:OMIM_PATH}/data
# Do not change it. This is determined automatically.
# NODE_STORAGE: map
[Osm tools]
# The path to the osmctools sources.
OSM_TOOLS_SRC_PATH: ${Developer:OMIM_PATH}/tools/osmctools
# The path where osmctools will be searched or will be built.
OSM_TOOLS_PATH: ~/osmctools
[Stages]
# Run osmupdate tool for planet.
NEED_PLANET_UPDATE: 0
[Logging]
# The path where maps_generator log will be saved.
# LOG_FILE_PATH: generation.log
[External]
# The url to the planet file.
# PLANET_URL:
# The url to the file with md5 sum of the planet.
# PLANET_MD5_URL:
# The base url to WorldCoasts.geom and WorldCoasts.rawgeom (without file name).
# Files latest_coasts.geom and latest_coasts.rawgeom must be at this URL.
# For example, if PLANET_COASTS_URL = https://somesite.com/download/
# The https://somesite.com/download/latest_coasts.geom url will be used to download latest_coasts.geom and
# the https://somesite.com/download/latest_coasts.rawgeom url will be used to download latest_coasts.rawgeom.
# PLANET_COASTS_URL:
# The url to the subway file.
SUBWAY_URL: https://cdn.organicmaps.app/subway.json
# Urls for production maps generation.
# UGC_URL:
# HOTELS_URL:
# PROMO_CATALOG_CITIES:
# POPULARITY_URL:
# FOOD_URL:
# FOOD_TRANSLATIONS_URL:
# SRTM_PATH:
# ISOLINES_PATH:
# UK_POSTCODES_URL:
# US_POSTCODES_URL:
[Common]
# Auto detection.
THREADS_COUNT: 0
# Emails for mailing.
# EMAILS:
[Stats]
# Path to rules for calculating statistics by type
STATS_TYPES_CONFIG: ${Developer:OMIM_PATH}/tools/python/maps_generator/var/etc/stats_types_config.txt

View file

@ -0,0 +1,86 @@
#!/usr/bin/env python3
# coding: utf8
from __future__ import print_function
from collections import namedtuple, defaultdict
from datetime import datetime
from sklearn import metrics
import argparse
import base64
import json
import logging
import matplotlib.pyplot as plt
import os
import pickle
import time
import urllib2
import re
# init logging
logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s')
def load_binary_list(path):
"""Loads reference binary classifier output. """
bits = []
with open(path, 'r') as fd:
for line in fd:
if (not line.strip()) or line.startswith('#'):
continue
bits.append(1 if line.startswith('y') else 0)
return bits
def load_score_list(path):
"""Loads list of matching scores. """
scores = []
with open(path, 'r') as fd:
for line in fd:
if (not line.strip()) or line.startswith('#'):
continue
scores.append(float(re.search(r'result score: (\d*\.\d+)', line).group(1)))
return scores
def process_options():
# TODO(mgsergio): Fix description.
parser = argparse.ArgumentParser(description="Download and process booking hotels.")
parser.add_argument("-v", "--verbose", action="store_true", dest="verbose")
parser.add_argument("-q", "--quiet", action="store_false", dest="verbose")
parser.add_argument("--reference_list", dest="reference_list", help="Path to data files")
parser.add_argument("--sample_list", dest="sample_list", help="Name and destination for output file")
parser.add_argument("--show", dest="show", default=False, action="store_true",
help="Show graph for precision and recall")
options = parser.parse_args()
if not options.reference_list or not options.sample_list:
parser.print_help()
exit()
return options
def main():
options = process_options()
reference = load_binary_list(options.reference_list)
sample = load_score_list(options.sample_list)
precision, recall, threshold = metrics.precision_recall_curve(reference, sample)
aa = zip(precision, recall, threshold)
max_by_hmean = max(aa, key=lambda (p, r, t): p*r/(p+r))
print("Optimal threshold: {2} for precision: {0} and recall: {1}".format(*max_by_hmean))
print("AUC: {0}".format(metrics.roc_auc_score(reference, sample)))
if options.show:
plt.plot(recall, precision)
plt.title("Precision/Recall")
plt.ylabel("Precision")
plt.xlabel("Recall")
plt.show()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,114 @@
#!/usr/bin/env python3
import sys
import os
import re
import json
from string import digits
CONTENT_REGEX = re.compile(r'/\*.*?\*/', re.DOTALL)
TYPE_ENTRIES_REGEX = re.compile(r'"(.*?)"\s*=\s*"(.*?)"')
SINGLE_REPLACE = False
def main(lang, data_en):
strings_file_path = os.path.join('iphone', 'Maps', 'LocalizedStrings', f'{lang}.lproj', 'LocalizableTypes.strings')
json_file_path = os.path.join('data', 'categories-strings', f'{lang}.json', 'localize.json')
with open(strings_file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Remove comments
content = re.sub(CONTENT_REGEX, '', content)
type_entries = {key[5:]: value for key, value in re.findall(TYPE_ENTRIES_REGEX, content)}
with open(json_file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
for type_name, localized_value in type_entries.items():
key_matched = False
for json_key in data.keys():
json_key_split = json_key.split('|')
for key in json_key_split:
already_there = False
_key_matched = False
if type_name == key.replace('-', '.').replace('_', '.'):
key_matched = True
data_split = data[json_key].split('|')
try:
data_split.extend([
value
for category in
[a for a in json_key_split
if a.startswith('@')]
for value in
data[category].split('|')
])
except KeyError:
pass
for value in data_split:
if value and value[0] in digits:
value = value[1:]
value = value.lower()
localized_value_lower = localized_value.lower()
# Prevents adding duplicates that differ only by the word "shop"
if value in localized_value_lower:
already_there = True
break
if localized_value_lower == value:
_key_matched = True
break
if already_there:
break
if not _key_matched:
if SINGLE_REPLACE and len(data_split) == 1:
data[json_key] = localized_value
print(f'Replaced "{data[json_key]}" with "{localized_value}" in "{json_key}"')
else:
data[json_key] = localized_value+'|'+data[json_key]
print(f'Appended "{localized_value}" to "{json_key}"')
if not key_matched:
for json_key in data.keys():
for key in json_key.split('|'):
if type_name == key.replace('-', '.').replace('_', '.'):
print(f'Created "{localized_value}" for "{json_key}"')
data.update({json_key: localized_value})
res = json.dumps(data, ensure_ascii=False, separators=(",\n", ": ")
).replace('{', '{\n').replace('}', '\n}')
with open(json_file_path, 'w', encoding='utf-8') as f:
f.write(res)
if __name__ == '__main__':
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} [-r] <language_codes>")
sys.exit(1)
if sys.argv[1] == '-r':
SINGLE_REPLACE = True
del sys.argv[1]
if len(sys.argv) < 2:
print("No languages specified")
sys.exit(1)
with open('data/categories-strings/en.json/localize.json', 'r', encoding='utf-8') as f:
data_en = json.load(f)
if len(sys.argv) > 2:
for lang in sys.argv[1:]:
print(f'{lang}:')
main(lang, data_en)
print('\n')
else:
main(sys.argv[1], data_en)

View file

@ -0,0 +1,77 @@
#!/usr/bin/env python3
import os
import json
import sys
LANGUAGES = (
'af', 'ar', 'be', 'bg', 'ca', 'cs', 'da', 'de', 'el', 'en', 'en-AU',
'en-GB', 'en-US', 'es', 'es-MX', 'et', 'eu', 'fa', 'fi', 'fr', 'fr-CA',
'he', 'hi', 'hu', 'id', 'it', 'ja', 'ko', 'lt', 'lv', 'mr', 'nb', 'nl',
'pl', 'pt', 'pt-BR', 'ro', 'ru', 'sk', 'sr', 'sv', 'sw', 'th', 'tr', 'uk',
'vi', 'zh-Hans', 'zh-Hant'
)
def load_localize_json(lang_dir):
file_path = os.path.join(lang_dir, 'localize.json')
if not os.path.isfile(file_path):
return {}
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except json.JSONDecodeError as e:
print(f"Error decoding JSON from {file_path}: {e}")
return {}
def collect_all_keys(base_dir):
all_data = {}
lang_dirs = [d for d in os.listdir(base_dir) if d.endswith('.json')]
for lang_dir in lang_dirs:
lang = lang_dir.replace('.json', '')
if lang not in LANGUAGES:
print(f"Skipping unsupported language directory: {lang_dir}")
continue
full_path = os.path.join(base_dir, lang_dir)
if os.path.isdir(full_path):
data = load_localize_json(full_path)
for key, value in data.items():
if key not in all_data:
all_data[key] = {}
all_data[key][lang] = value
return all_data
def write_category_file(all_data, output_file):
with open(output_file, 'w', encoding='utf-8') as f:
for i, (key, translations) in enumerate(all_data.items()):
f.write(key + '\n')
for lang in LANGUAGES:
if lang in translations and translations[lang]:
f.write(f"{lang}:{translations[lang]}\n")
elif lang == 'en' and key in translations:
f.write('\n')
if i < len(all_data) - 1:
f.write('\n')
def main():
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <json_directory> [categories.txt]")
sys.exit(1)
base_dir = sys.argv[1]
output_file = sys.argv[2] if len(sys.argv) > 2 else "categories.txt"
if not os.path.isdir(base_dir):
print(f"Directory not found: {base_dir}")
sys.exit(1)
all_data = collect_all_keys(base_dir)
write_category_file(all_data, output_file)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,83 @@
#!/usr/bin/env python3
import os
import json
import sys
LANGUAGES = (
'af', 'ar', 'be', 'bg', 'ca', 'cs', 'da', 'de', 'el', 'en', 'en-AU',
'en-GB', 'en-US', 'es', 'es-MX', 'et', 'eu', 'fa', 'fi', 'fr', 'fr-CA',
'he', 'hi', 'hu', 'id', 'it', 'ja', 'ko', 'lt', 'lv', 'mr', 'nb', 'nl',
'pl', 'pt', 'pt-BR', 'ro', 'ru', 'sk', 'sr', 'sv', 'sw', 'th', 'tr', 'uk',
'vi', 'zh-Hans', 'zh-Hant'
)
# TODO: respect the order of key/values in the JSON when converting back and forth
def parse_translations(input_file):
"""
Parses a translation file and generates a JSON file per language.
"""
# Read the input file line by line
with open(input_file, 'r', encoding='utf-8') as f:
lines = [line.rstrip('\n') for line in f]
# Split the file into blocks separated by empty lines
blocks = []
current_block = []
for line in lines:
stripped_line = line.strip()
if stripped_line.startswith('#'):
continue
if not stripped_line:
if current_block:
blocks.append(current_block)
current_block = []
else:
current_block.append(line)
if current_block:
blocks.append(current_block)
# Initialize dictionaries for each language
lang_data = {lang: {} for lang in LANGUAGES}
# Process each block
for block in blocks:
key_line = block[0]
has_translation = False
for line in block[1:]:
if ':' not in line:
print(f"Skipping invalid line: {line}")
continue
lang, translation = line.split(':', 1)
lang = lang.strip()
translation = translation.strip()
if lang in LANGUAGES:
lang_data[lang][key_line] = translation
has_translation = True
else:
print(f"Warning: Unsupported language {lang} in line: {line}")
if not has_translation:
lang_data['en'][key_line] = ""
# Write JSON files
for lang, data in lang_data.items():
if not data:
continue
dir_name = f"{lang}.json"
os.makedirs(dir_name, exist_ok=True)
file_path = os.path.join(dir_name, 'localize.json')
with open(file_path, 'w', encoding='utf-8') as f:
f.write(json.dumps(data, ensure_ascii=False, separators=(",\n", ": ")).replace('{', '{\n').replace('}', '\n}'))
def main():
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <categories.txt>")
sys.exit(1)
input_file = sys.argv[1]
parse_translations(input_file)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,129 @@
#!/usr/bin/env python3
#coding: utf8
from __future__ import print_function
from argparse import ArgumentParser
from collections import defaultdict
from find_untranslated_strings import ITUNES_LANGS
class CategoriesConverter:
def __init__(self):
args = self.parse_args()
self.categories = CategoriesTxt(args.categories)
self.should_format = args.format
self.output = args.output
def process(self):
if self.should_format:
self.categories.write_formatted()
else:
self.categories.write_as_strings(self.output)
def parse_args(self):
parser = ArgumentParser(
description="""
A script for converting categories.txt into the strings.txt format
and back, as well as for autoformatting categories.txt. This is
useful for interacting with the translation partners.
"""
)
parser.add_argument(
"-c", "--categories",
required=True,
dest="categories",
help="""Path to the categories file to be converted into the strings.txt format."""
)
parser.add_argument(
"-o", "--output",
dest="output",
help="""The destination file."""
)
parser.add_argument(
"-f", "--format",
dest="format", action="store_true", default=False,
help="""Format the file and exit"""
)
return parser.parse_args()
class CategoriesTxt:
"""For now, let's allow comments only at the beginning of the file."""
def __init__(self, filepath):
self.translations = defaultdict(lambda: defaultdict(str))
self.keys_in_order = []
self.comments = []
self.filepath = filepath
self.all_langs = set()
self.parse_file()
def parse_file(self):
current_key = ""
this_line_is_key = True
with open(self.filepath) as infile:
for line in map(str.strip, infile):
if line.startswith("#"):
self.comments.append(line)
this_line_is_key = True
elif not line:
this_line_is_key = True
elif this_line_is_key:
self.keys_in_order.append(line)
current_key = line
this_line_is_key = False
else:
pos = line.index(':')
lang = line[:pos]
translation = line[pos + 1:]
self.translations[current_key][lang] = translation
def write_as_categories(self, outfile):
self.write_strings_formatted(outfile, "\n{}\n", "{}:{}\n")
def write_as_strings(self, filepath):
with open(filepath, "w") as outfile:
self.write_strings_formatted(outfile, key_format="\n [{}]\n", line_format=" {} = {}\n")
def write_strings_formatted(self, outfile, key_format, line_format):
for key in self.keys_in_order:
outfile.write(key_format.format(key.strip("[]")))
pair = self.translations[key]
for lang in ITUNES_LANGS:
if lang in pair:
outfile.write(line_format.format(lang, pair[lang]))
remaining_langs = sorted(list(set(pair.keys()) - set(ITUNES_LANGS)))
for lang in remaining_langs:
outfile.write(line_format.format(lang, pair[lang]))
def add_translation(self, translation, key, lang):
if key not in self.keys_in_order:
self.keys_in_order.append(key)
self.translations[key][lang] = translation
def append_to_translation(self, translation, key, lang):
self.translations[key][lang] += translation
def write_formatted(self):
with open(self.filepath, "w") as outfile:
for comment in self.comments:
outfile.write(comment + "\n")
self.write_as_categories(outfile)
if __name__ == "__main__":
c = CategoriesConverter()
c.process()

View file

@ -0,0 +1,50 @@
#!/usr/bin/env python3
import sys
import os
path = os.path.join(os.path.dirname(sys.argv[0]), '..', '..', 'data', 'categories.txt')
if len(sys.argv) < 2:
print('Merges some language in categories.txt with English')
print('Usage: {} <lang> [path_to_categories.txt]'.format(sys.argv[0]))
print('Default path to categories: {}'.format(path))
if not os.path.exists(path):
print('Warning: default path to categories.txt will fail')
sys.exit(1)
lang = sys.argv[1]
if len(sys.argv) > 2:
path = sys.argv[2]
with open(path, 'r') as f:
langs = []
trans = None
def flush_langs():
for lang in langs:
if trans and l[0] == 'en':
parts = lang[1].split('|')
parts[0] = '{} - {}'.format(parts[0], trans)
lang[1] = '|'.join(parts)
print(':'.join(lang))
for line in map(str.strip, f):
if len(line) == 0 or line[0] == '#':
if langs:
flush_langs()
langs = []
trans = None
print(line)
elif not langs:
print(line)
else:
if ':' not in line:
raise Exception('Line {} is not a translation line'.format(line))
l = line.split(':')
langs.append(l)
if l[0] == lang:
trans = l[1].split('|')[0]
if trans[0].isdigit():
trans = trans[1:]
if trans[0] == '^':
trans = trans[1:]
flush_langs()

View file

@ -0,0 +1,232 @@
#!/usr/bin/env python3
#
# Check AppStore / GooglePlay / F-Droid metadata
#
import os
import sys
import glob
import shutil
from urllib.parse import urlparse
os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", ".."))
# https://support.google.com/googleplay/android-developer/answer/9844778?visit_id=637740303439369859-3116807078&rd=1#zippy=%2Cview-list-of-available-languages
GPLAY_LOCALES = [
"af", # Afrikaans
"sq", # Albanian
"am", # Amharic
"ar", # Arabic
"hy-AM", # Armenian
"az-AZ", # Azerbaijani
"bn-BD", # Bangla
"eu-ES", # Basque
"be", # Belarusian
"bg", # Bulgarian
"my-MM", # Burmese
"ca", # Catalan
"zh-HK", # Chinese (Hong Kong)
"zh-CN", # Chinese (Simplified)
"zh-TW", # Chinese (Traditional)
"hr", # Croatian
"cs-CZ", # Czech
"da-DK", # Danish
"nl-NL", # Dutch
"en-IN", # English
"en-SG", # English
"en-ZA", # English
"en-AU", # English (Australia)
"en-CA", # English (Canada)
"en-GB", # English (United Kingdom)
"en-US", # English (United States)
"et", # Estonian
"fil", # Filipino
"fi-FI", # Finnish
"fr-CA", # French (Canada)
"fr-FR", # French (France)
"gl-ES", # Galician
"ka-GE", # Georgian
"de-DE", # German
"el-GR", # Greek
"gu", # Gujarati
"iw-IL", # Hebrew
"hi-IN", # Hindi
"hu-HU", # Hungarian
"is-IS", # Icelandic
"id", # Indonesian
"it-IT", # Italian
"ja-JP", # Japanese
"kn-IN", # Kannada
"kk", # Kazakh
"km-KH", # Khmer
"ko-KR", # Korean
"ky-KG", # Kyrgyz
"lo-LA", # Lao
"lv", # Latvian
"lt", # Lithuanian
"mk-MK", # Macedonian
"ms", # Malay
"ms-MY", # Malay (Malaysia)
"ml-IN", # Malayalam
"mr-IN", # Marathi
"mn-MN", # Mongolian
"ne-NP", # Nepali
"no-NO", # Norwegian
"fa", # Persian
"fa-AE", # Persian
"fa-AF", # Persian
"fa-IR", # Persian
"pl-PL", # Polish
"pt-BR", # Portuguese (Brazil)
"pt-PT", # Portuguese (Portugal)
"pa", # Punjabi
"ro", # Romanian
"rm", # Romansh
"ru-RU", # Russian
"sr", # Serbian
"si-LK", # Sinhala
"sk", # Slovak
"sl", # Slovenian
"es-419", # Spanish (Latin America)
"es-ES", # Spanish (Spain)
"es-US", # Spanish (United States)
"sw", # Swahili
"sv-SE", # Swedish
"ta-IN", # Tamil
"te-IN", # Telugu
"th", # Thai
"tr-TR", # Turkish
"uk", # Ukrainian
"ur", # Urdu
"vi", # Vietnamese
"zu", # Zulu
]
# From a Fastline error message and https://help.apple.com/app-store-connect/#/dev997f9cf7c
APPSTORE_LOCALES = [
"ar-SA", "ca", "cs", "da", "de-DE", "el", "en-AU", "en-CA", "en-GB", "en-US", "es-ES", "es-MX", "fi", "fr-CA", "fr-FR", "he", "hi", "hr", "hu", "id", "it", "ja", "ko", "ms", "nl-NL", "no", "pl", "pt-BR", "pt-PT", "ro", "ru", "sk", "sv", "th", "tr", "uk", "vi", "zh-Hans", "zh-Hant"
]
def error(path, message, *args, **kwargs):
print("", path + ":", message.format(*args, **kwargs), file=sys.stderr)
return False
def done(path, ok):
if ok:
print("", path)
return ok
def check_raw(path, max_length, ignoreEmptyFilesAndNewLines=False):
ok = True
with open(path, 'r') as f:
text = f.read()
if not ignoreEmptyFilesAndNewLines:
if not text:
ok = error(path, "empty")
elif text[-1] == os.linesep:
text = text[:-1]
else:
ok = error(path, "missing new line")
else:
text = text.strip()
cur_length = len(text)
if cur_length > max_length:
ok = error(path, "too long: got={}, expected={}", cur_length, max_length)
return ok, text
def check_text(path, max, optional=False, ignoreEmptyFilesAndNewLines=False):
try:
return done(path, check_raw(path, max, ignoreEmptyFilesAndNewLines)[0])
except FileNotFoundError as e:
if optional:
return True,
print("🚫", path)
return False,
def check_url(path, ignoreEmptyFilesAndNewLines=False):
(ok, url) = check_raw(path, 500, ignoreEmptyFilesAndNewLines)
url = urlparse(url)
if not url.scheme in ('https', 'http'):
ok = error(path, "invalid URL: {}", url)
return done(path, ok)
def check_email(path):
(ok, email) = check_raw(path, 500)
ok = ok and email.find('@') != -1 and email.find('.') != -1
return done(path, ok)
def check_exact(path, expected):
(ok, value) = check_raw(path, len(expected))
if value != expected:
ok = error(path, "invalid value: got={}, expected={}", value, expected)
return done(path, ok)
def check_android(is_gplay):
ok = True
flavor = "google" if is_gplay else "fdroid"
flavor = f'android/app/src/{flavor}/play/'
ok = check_url(flavor + 'contact-website.txt') and ok
ok = check_email(flavor + 'contact-email.txt') and ok
ok = check_exact(flavor + 'default-language.txt', 'en-US') and ok
for locale in glob.glob(flavor + 'listings/*/'):
if is_gplay and locale.split('/')[-2] not in GPLAY_LOCALES:
ok = error(locale, 'unsupported locale') and ok
continue
ok = check_text(locale + 'title.txt', 30 if is_gplay else 50) and ok
ok = check_text(locale + 'short-description.txt', 80) and ok
ok = check_text(locale + 'full-description.txt', 4000) and ok
ok = check_text(locale + 'release-notes.txt', 499, optional=True) and ok
''' TODO: relnotes not necessary exist for all languages, but symlinks are made for all
for locale in glob.glob(flavor + 'release-notes/*/'):
if locale.split('/')[-2] not in GPLAY_LOCALES:
ok = error(locale, 'unsupported locale') and ok
continue
ok = check_text(locale + 'default.txt', 499) and ok
'''
return ok
def check_ios():
ok = True
for locale in glob.glob('iphone/metadata/*/'):
if locale.split('/')[-2] not in APPSTORE_LOCALES:
ok = error(locale, "unsupported locale") and ok
continue
locale_complete = True
for name in ["description.txt", "keywords.txt", "marketing_url.txt", "privacy_url.txt", "subtitle.txt", "support_url.txt"]:
name_path = os.path.join(locale, name)
if not os.path.exists(name_path):
locale_complete = False
if locale_complete:
ok = check_text(locale + "subtitle.txt", 30, False, True) and ok
ok = check_text(locale + "description.txt", 4000, False, True) and ok
ok = check_text(locale + "release_notes.txt", 4000, True, True) and ok
ok = check_text(locale + "keywords.txt", 100, False, True) and ok
ok = check_url(locale + "support_url.txt", True) and ok
ok = check_url(locale + "marketing_url.txt", True) and ok
ok = check_url(locale + "privacy_url.txt", True) and ok
return ok
if __name__ == "__main__":
ok = True
if len(sys.argv) == 2 and sys.argv[1] == 'gplay':
if check_android(is_gplay=True):
sys.exit(0)
sys.exit(2)
if len(sys.argv) == 2 and sys.argv[1] == 'fdroid':
if check_android(is_gplay=False):
sys.exit(0)
sys.exit(2)
elif len(sys.argv) == 2 and sys.argv[1] == "ios":
if check_ios():
sys.exit(0)
sys.exit(2)
else:
print("Usage:", sys.argv[0], "gplay|fdroid|ios", file=sys.stderr)
sys.exit(1)

View file

@ -0,0 +1,82 @@
# City* Population*Region* | Width km* | Height km* | Square km2* | AVG | MAX | SQRT(S)
# Columns with * symbol are required
New York | 8405837 | USA | 56.5 | 50.2 | 1214 | 53.35 | 56.5 | 34.84250278
Los Angeles | 3792621 | USA | 51 | 80.5 | 1301.97 | 65.75 | 80.5 | 36.0828214
Chicago | 2714856 | USA | 20.9 | 44.6 | 606.1 | 32.75 | 44.6 | 24.61909828
Philadelphia | 1447395 | USA | 30.3 | 41.8 | 369.9 | 36.05 | 41.8 | 19.23278451
Dallas | 1223229 | USA | 38.6 | 50.7 | 997.1 | 44.65 | 50.7 | 31.57689028
San Francisco | 815358 | USA | 12 | 12.7 | 600.6 | 12.35 | 12.7 | 24.50714182
Detroit | 701475 | USA | 33.5 | 31.9 | 370.2 | 32.7 | 33.5 | 19.24058211
Memphis | 677272 | USA | 38 | 32.2 | 763.4 | 35.1 | 38 | 27.62969417
Seattle | 634535 | USA | 11.9 | 27.4 | 369.2 | 19.65 | 27.4 | 19.2145778
Boston | 625087 | USA | 15.1 | 18.2 | 232.1 | 16.65 | 18.2 | 15.23482852
Las Vegas | 596424 | USA | 43.1 | 31.9 | 340 | 37.5 | 43.1 | 18.43908891
Atlanta | 443775 | USA | 26.6 | 29.3 | 343 | 27.95 | 29.3 | 18.52025918
Miami | 433146 | USA | 13.7 | 15.8 | 92.42 | 14.75 | 15.8 | 9.613532129
New Orleans | 343829 | USA | 46 | 15.1 | 907 | 30.55 | 46 | 30.11644069
Pittsburgh | 312819 | USA | 23.3 | 26.9 | 151.1 | 25.1 | 26.9 | 12.292274
Orlando | 238300 | USA | 24.8 | 22.2 | 286.7 | 23.5 | 24.8 | 16.93221781
Des Moines | 203433 | USA | 18.2 | 17.2 | 213.9 | 17.7 | 18.2 | 14.62532051
Salt Lake City | 181698 | USA | 27.8 | 12.1 | 285.9 | 19.95 | 27.8 | 16.9085777
Aurora | 7508 | USA | 5 | 4.8 | 15.49 | 4.9 | 5 | 3.935733731
Barthsville | 36245 | USA | 9.3 | 11.9 | 58.9 | 10.6 | 11.9 | 7.674633542
Farmington | 45854 | USA | 10 | 12.9 | 69.93 | 11.45 | 12.9 | 8.362415919
Dumas | 14989 | USA | 3.2 | 8.2 | 14.29 | 5.7 | 8.2 | 3.780211634
Grand Island | 49989 | USA | 10.1 | 8.4 | 73.94 | 9.25 | 10.1 | 8.598837131
Hastings | 25058 | USA | 8.9 | 6.3 | 35.38 | 7.6 | 8.9 | 5.948108943
Decatur | 9362 | USA | 7.4 | 4.8 | 15 | 6.1 | 7.4 | 3.872983346
Van Wert | 10844 | USA | 4.5 | 5.3 | 19.71 | 4.9 | 5.3 | 4.439594576
Ottawa | 4417 | USA | 8 | 4.7 | 12.35 | 6.35 | 8 | 3.514256678
Blythe | 20590 | USA | 9.5 | 22.9 | 69.86 | 16.2 | 22.9 | 8.358229478
Morgan Hill | 39420 | USA | 11.3 | 13.9 | 33.36 | 12.6 | 13.9 | 5.775811631
London | 8308369 | Europe | 58.9 | 49.9 | 1572 | 54.4 | 58.9 | 39.6484552
Athens | 3074160 | Europe | 9.9 | 10.7 | 412 | 10.3 | 10.7 | 20.29778313
Berlin | 3397469 | Europe | 40.7 | 37.8 | 891.85 | 39.25 | 40.7 | 29.86385775
Madrid | 3215633 | Europe | 32 | 25.1 | 605.77 | 28.55 | 32 | 24.61239525
Rome | 2863322 | Europe | 28.2 | 31.9 | 1285.31 | 30.05 | 31.9 | 35.85122034
Paris | 10413386 | Europe | 10.9 | 10.6 | 2844.80 | 10.75 | 10.9 | 53.33666656
Bucharest | 1883425 | Europe | 26.1 | 27 | 228 | 26.55 | 27 | 15.09966887
Hamburg | 1751775 | Europe | 29.3 | 33.4 | 755 | 31.35 | 33.4 | 27.47726333
Vienna | 1765649 | Europe | 30.1 | 15.2 | 414.65 | 22.65 | 30.1 | 20.36295656
Warsaw | 1715517 | Europe | 28.1 | 31.2 | 517.24 | 29.65 | 31.2 | 22.74291098
Barselona | 1620943 | Europe | 17.7 | 18.5 | 101.9 | 18.1 | 18.5 | 10.09455299
Munich | 1388308 | Europe | 30.2 | 20.3 | 310.43 | 25.25 | 30.2 | 17.61902381
Milan | 1353882 | Europe | 14.5 | 26 | 181.76 | 20.25 | 26 | 13.48183964
Prague | 1243201 | Europe | 34 | 24.5 | 496 | 29.25 | 34 | 22.27105745
Brussels | 1138854 | Europe | 16.1 | 16.1 | 161.38 | 16.1 | 16.1 | 12.70354281
Birmingham | 1085400 | Europe | 14.8 | 14.8 | 103.39 | 14.8 | 14.8 | 10.16808733
Rennes | 208033 | Europe | 8 | 7.5 | 50.39 | 7.75 | 8 | 7.09859141
Lisboa | 547631 | Europe | 8.4 | 7.8 | 958 | 8.1 | 8.4 | 30.95157508
Sevilla | 703021 | Europe | 18.4 | 12.4 | 140 | 15.4 | 18.4 | 11.83215957
Malaga | 568507 | Europe | 14 | 9.4 | 395 | 11.7 | 14 | 19.87460691
Genova | 604848 | Europe | 29.9 | 9.5 | 243.6 | 19.7 | 29.9 | 15.60769041
Parma | 187214 | Europe | 8.9 | 7.9 | 260.77 | 8.4 | 8.9 | 16.14837453
Bologna | 384038 | Europe | 16.5 | 13.8 | 140.7 | 15.15 | 16.5 | 11.86170308
Erlangen | 105412 | Europe | 11.6 | 12.4 | 76.9 | 12 | 12.4 | 8.769264507
Kaltenkirchen | 19904 | Europe | 8.7 | 5.3 | 23.1 | 7 | 8.7 | 4.806245936
Lier | 33492 | Europe | 9.5 | 13 | 49.7 | 11.25 | 13 | 7.049822693
Decin | 50311 | Europe | 15.6 | 21.1 | 140 | 18.35 | 21.1 | 11.83215957
Marianske Lazne | 14083 | Europe | 9.6 | 17.9 | 51.81 | 13.75 | 17.9 | 7.197916365
Bindlach | 7211 | Europe | 9.6 | 8 | 37.6 | 8.8 | 9.6 | 6.131883887
Moscow | 11503501 | Russia | 32.2 | 46.8 | 2511 | | |
Saint Peterburg | 4879566 | Russia | 18.6 | 36.1 | 1439 | | |
Novosibirsk | 1473754 | Russia | 22.6 | 43.8 | 502.1 | | |
Yekaterinburg | 1349772 | Russia | 24.4 | 33 | 495 | | |
Nizhny Novgorod | 1250619 | Russia | 24.1 | 30.4 | 410.68 | | |
Samara | 1164685 | Russia | 20.1 | 44.7 | 541.382 | | |
Kazan | 1143535 | Russia | 34.3 | 29.3 | 425.3 | | |
Rostov-on-Don | 1089261 | Russia | 59.2 | 26.4 | 348.5 | | |
Volgograd | 1021215 | Russia | 12.4 | 49.2 | 859.353 | | |
Krasnoyarsk | 1035528 | Russia | 36.2 | 23.8 | 348 | | |
Saratov | 839755 | Russia | 25.1 | 35.2 | 394 | | |
Krasnodar | 805680 | Russia | 26.2 | 20.7 | 192.19 | | |
Tolyatti | 718127 | Russia | 31 | 15 | 314.78 | | |
Izhevsk | 637309 | Russia | 32.7 | 21.3 | 315.15 | | |
Vladivostok | 600378 | Russia | 16.4 | 39.6 | 331.16 | | |
Orenburg | 560046 | Russia | 13.6 | 36.3 | 259 | | |
Tula | 490508 | Russia | 18.2 | 21.8 | 145.8 | | |
Cheboksary | 479266 | Russia | 16.7 | 13.9 | 250.87 | | |
Tver | 411044 | Russia | 20.3 | 27.8 | 152.22 | | |
Arkhangelsk | 350985 | Russia | 13.8 | 35.7 | 294.42 | | |
Vologda | 306487 | Russia | 18.1 | 11.1 | 116 | | |
Nizhnevartovsk | 265994 | Russia | 20.1 | 13.7 | 71 | | |

158
tools/python/city_radius.py Normal file
View file

@ -0,0 +1,158 @@
import sys, os, math
import matplotlib.pyplot as plt
from optparse import OptionParser
cities = []
def strip(s):
return s.strip('\t\n ')
def load_data(path):
global cities
f = open(path, 'r')
lines = f.readlines()
f.close();
for l in lines:
if l.startswith('#'):
continue
data = l.split('|')
if len(data) < 6:
continue
item = {}
item['name'] = strip(data[0])
item['population'] = int(strip(data[1]))
item['region'] = strip(data[2])
item['width'] = float(strip(data[3]))
item['height'] = float(strip(data[4]))
item['square'] = float(data[5])
cities.append(item)
# build plot
print "Cities count: %d" % len(cities)
def formula(popul, base = 32, mult = 0.5):
#return math.exp(math.log(popul, base)) * mult
return math.pow(popul, 1 / base) * mult
def avgDistance(approx, data):
dist = 0
for x in xrange(len(data)):
dist += math.fabs(approx[x] - data[x])
return dist / float(len(data))
def findBest(popul, data, minBase = 5, maxBase = 100, stepBase = 0.1, minMult = 0.01, maxMult = 1, stepMult = 0.01):
# try to find best parameters
base = minBase
minDist = -1
bestMult = minMult
bestBase = base
while base <= maxBase:
print "%.02f%% best mult: %f, best base: %f, best dist: %f" % (100 * (base - minBase) / (maxBase - minBase), bestMult, bestBase, minDist)
mult = minMult
while mult <= maxMult:
approx = []
for p in popul:
approx.append(formula(p, base, mult))
dist = avgDistance(approx, data)
if minDist < 0 or minDist > dist:
minDist = dist
bestBase = base
bestMult = mult
mult += stepMult
base += stepBase
return (bestBase, bestMult)
def process_data(steps_count, base, mult, bestFind = False, dataFlag = 0):
avgData = []
maxData = []
sqrData = []
population = []
maxPopulation = 0
minPopulation = -1
for city in cities:
p = city['population']
w = city['width']
h = city['height']
s = city['square']
population.append(p)
if p > maxPopulation:
maxPopulation = p
if minPopulation < 0 or p < minPopulation:
minPopulation = p
maxData.append(max([w, h]))
avgData.append((w + h) * 0.5)
sqrData.append(math.sqrt(s))
bestBase = base
bestMult = mult
if bestFind:
d = maxData
if dataFlag == 1:
d = avgData
elif dataFlag == 2:
d = sqrData
bestBase, bestMult = findBest(population, d)
print "Finished\n\nBest mult: %f, Best base: %f" % (bestMult, bestBase)
approx = []
population2 = []
v = minPopulation
step = (maxPopulation - minPopulation) / float(steps_count)
for i in xrange(0, steps_count):
approx.append(formula(v, bestBase, bestMult))
population2.append(v)
v += step
plt.plot(population, avgData, 'bo', population, maxData, 'ro', population, sqrData, 'go', population2, approx, 'y')
plt.axis([minPopulation, maxPopulation, 0, 100])
plt.xscale('log')
plt.show()
if __name__ == "__main__":
if len(sys.argv) < 3:
print 'city_radius.py <data_file> <steps>'
parser = OptionParser()
parser.add_option("-f", "--file", dest="filename", default="city_popul_sqr.data",
help="source data file", metavar="path")
parser.add_option("-s", "--scan",
dest="best", default=False, action="store_true",
help="scan best values of mult and base")
parser.add_option('-m', "--mult",
dest='mult', default=1,
help='multiplier value')
parser.add_option('-b', '--base',
dest='base', default=3.6,
help="base value")
parser.add_option('-d', '--data',
default=0, dest='data',
help="Dataset to use on best values scan: 0 - max, 1 - avg, 2 - sqr")
(options, args) = parser.parse_args()
load_data(options.filename)
process_data(1000, float(options.base), float(options.mult), options.best, int(options.data))

View file

21
tools/python/data/all/setup.py Executable file
View file

@ -0,0 +1,21 @@
#!/usr/bin/env python3
import os
import sys
module_dir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(module_dir, "..", ".."))
from data.base import get_version
from data.base import setup
_V = get_version()
_D = [
"omim-data-borders",
"omim-data-essential",
"omim-data-files",
"omim-data-fonts",
"omim-data-styles",
]
setup(__file__, "all", [], install_requires=["{}=={}".format(d, _V) for d in _D])

69
tools/python/data/base.py Normal file
View file

@ -0,0 +1,69 @@
import os
import sys
from collections import defaultdict
import setuptools
module_dir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(module_dir, "..", "..", ".."))
from pyhelpers.setup import chdir
from pyhelpers.setup import get_version
DATA_PATH = os.path.abspath(
os.path.join(os.path.dirname(__file__), "..", "..", "..", "data")
)
def get_files_from_dir(abs_root_path, b, data_files):
for root, dirs, files in os.walk(abs_root_path):
data_files[b].extend(os.path.join(root, f) for f in files)
for d in dirs:
get_files_from_dir(
os.path.join(abs_root_path, d), os.path.join(b, d), data_files
)
def get_data_files(relative_data_paths):
data_files = defaultdict(lambda: [])
for p in relative_data_paths:
path = os.path.join(DATA_PATH, p)
b = os.path.join("omim-data", path.replace(DATA_PATH + os.path.sep, ""))
if os.path.isdir(path):
get_files_from_dir(path, b, data_files)
else:
b = os.path.dirname(b)
data_files[b].append(path)
return data_files.items()
def setup(
source_file,
suffix,
relative_data_paths,
packages=None,
package_dir=None,
install_requires=None,
cmdclass=None,
supported_pythons=("2", "2.7", "3", "3.5", "3.6", "3.7", "3.8", "3.9"),
):
with chdir(os.path.abspath(os.path.dirname(source_file))):
setuptools.setup(
name="omim-data-{}".format(suffix),
version=str(get_version()),
author="CoMaps",
author_email="info@comaps.app",
description="This package contains {} data files.".format(suffix),
url="https://codeberg.org/comaps",
packages=[] if packages is None else packages,
package_dir={} if package_dir is None else package_dir,
cmdclass={} if cmdclass is None else cmdclass,
classifiers=["License :: OSI Approved :: Apache Software License",]
+ [
"Programming Language :: Python :: {}".format(supported_python)
for supported_python in supported_pythons
],
install_requires=install_requires or [],
data_files=get_data_files(relative_data_paths),
)

View file

@ -0,0 +1,44 @@
import logging
import os
import tarfile
from six import BytesIO
from data_files import find_data_files
try:
import lzma
except ImportError:
from backports import lzma
logger = logging.getLogger(__name__)
def init(borders_path=None):
data_path = find_data_files("omim-data")
if data_path is None:
logger.error("omim-data was not found.")
return False
if borders_path is None:
borders_path = os.path.join(data_path, "borders")
if not os.path.exists(borders_path):
tar_lzma_path = os.path.join(data_path, "borders.tar.xz")
lzma_stream = BytesIO()
with open(tar_lzma_path, mode="rb") as f:
decompressed = lzma.decompress(f.read())
lzma_stream.write(decompressed)
lzma_stream.seek(0)
try:
with tarfile.open(fileobj=lzma_stream, mode="r") as tar:
tar.extractall(borders_path)
except PermissionError as e:
logger.error(str(e))
return False
logger.info("{} was created.".format(borders_path))
return True

View file

@ -0,0 +1,63 @@
#!/usr/bin/env python3
import os
import sys
import tarfile
from distutils import log
from distutils.command.build import build
from distutils.command.clean import clean
from six import BytesIO
try:
import lzma
except ImportError:
from backports import lzma
module_dir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(module_dir, "..", ".."))
from data.base import DATA_PATH
from data.base import chdir
from data.base import get_version
from data.base import setup
TAR_LZMA_PATH = os.path.join(DATA_PATH, "borders.tar.xz")
class BuildCmd(build, object):
def run(self):
log.info("Creating {}".format(TAR_LZMA_PATH))
tar_stream = BytesIO()
borders_path = os.path.join(DATA_PATH, "borders")
with chdir(borders_path):
with tarfile.open(fileobj=tar_stream, mode="w") as tar:
for f in os.listdir(borders_path):
tar.add(f)
tar_stream.seek(0)
with lzma.open(TAR_LZMA_PATH, mode="w") as f:
f.write(tar_stream.read())
super(BuildCmd, self).run()
class CleanCmd(clean, object):
def run(self):
if os.path.exists(TAR_LZMA_PATH):
log.info("Removing {}".format(TAR_LZMA_PATH))
os.remove(TAR_LZMA_PATH)
super(CleanCmd, self).run()
setup(
__file__,
"borders",
["borders.tar.xz", "packed_polygons.bin"],
package_dir={"borders": ""},
packages=["borders"],
cmdclass={"build": BuildCmd, "clean": CleanCmd},
install_requires=["omim-data-files=={}".format(get_version())]
)

View file

@ -0,0 +1,44 @@
#!/usr/bin/env python3
import os
import sys
module_dir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(module_dir, "..", ".."))
from data.base import get_version
from data.base import setup
setup(
__file__,
"essential",
[
"borders_vs_osm.csv",
"categories_brands.txt",
"categories_cuisines.txt",
"categories.txt",
"classificator.txt",
"colors.txt",
"countries_meta.txt",
"countries_synonyms.csv",
"countries.txt",
"external_resources.txt",
"fonts/blacklist.txt",
"fonts/unicode_blocks.txt",
"fonts/whitelist.txt",
"hierarchy.txt",
"mapcss-dynamic.txt",
"mapcss-mapping.csv",
"mixed_nodes.txt",
"mixed_tags.txt",
"old_vs_new.csv",
"patterns.txt",
"replaced_tags.txt",
"skipped_elements.json",
"synonyms.txt",
"transit_colors.txt",
"types.txt",
"ugc_types.csv",
"visibility.txt",
],
install_requires=["omim-data-files=={}".format(get_version())]
)

View file

@ -0,0 +1,26 @@
#!/usr/bin/env python3
import os
import sys
module_dir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(module_dir, "..", ".."))
from data.base import get_version
from data.base import setup
setup(
__file__,
"fonts",
[
"00_NotoNaskhArabic-Regular.ttf",
"00_NotoSansThai-Regular.ttf",
"01_dejavusans.ttf",
"02_droidsans-fallback.ttf",
"03_jomolhari-id-a3d.ttf",
"04_padauk.ttf",
"05_khmeros.ttf",
"06_code2000.ttf",
"07_roboto_medium.ttf",
],
install_requires=["omim-data-files=={}".format(get_version())]
)

View file

@ -0,0 +1,26 @@
#!/usr/bin/env python3
import os
import sys
module_dir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(module_dir, "..", ".."))
from data.base import get_version
from data.base import setup
setup(
__file__,
"styles",
[
"drules_proto.bin",
"drules_proto_default_light.bin",
"drules_proto_default_light.txt",
"drules_proto_default_dark.bin",
"drules_proto_default_dark.txt",
"drules_proto_vehicle_light.bin",
"drules_proto_vehicle_light.txt",
"drules_proto_vehicle_dark.bin",
"drules_proto_vehicle_dark.txt",
],
install_requires=["omim-data-files=={}".format(get_version())]
)

View file

@ -0,0 +1,43 @@
import os
import site
import sys
def find_data_files_in_user_installations(directory):
possible_paths = [os.path.join(site.USER_BASE, directory),] + [
os.path.normpath(os.path.join(p, "../../..", directory))
for p in site.getusersitepackages()
]
for p in possible_paths:
if os.path.isdir(p):
return p
return None
def find_data_files_in_sys_installations(directory):
possible_paths = [os.path.join(sys.prefix, directory),] + [
os.path.normpath(os.path.join(p, "../../..", directory))
for p in site.getsitepackages()
]
for p in possible_paths:
if os.path.isdir(p):
return p
return None
def find_data_files(directory, user_inst_first=True):
functions = [
(int(user_inst_first), find_data_files_in_user_installations),
(int(not user_inst_first), find_data_files_in_sys_installations),
]
functions.sort(key=lambda k: k[0])
for prior, func in functions:
res = func(directory)
if res is not None:
return res
return None

View file

@ -0,0 +1,30 @@
#!/usr/bin/env python3
import os
import sys
import setuptools
module_dir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(module_dir, "..", "..", ".."))
from pyhelpers.setup import chdir
from pyhelpers.setup import get_version
with chdir(os.path.abspath(os.path.dirname(__file__))):
supported_pythons = ("2", "2.7", "3", "3.5", "3.6", "3.7")
setuptools.setup(
name="omim-data-files",
version=str(get_version()),
author="CoMaps",
author_email="info@comaps.app",
description="This package is a library for dealing with data files.",
url="https://codeberg.org/comaps",
package_dir={"data_files": ""},
packages=["data_files",],
classifiers=["License :: OSI Approved :: Apache Software License",]
+ [
"Programming Language :: Python :: {}".format(supported_python)
for supported_python in supported_pythons
],
)

View file

View file

@ -0,0 +1,63 @@
import argparse
import itertools
import logging
import os
import wikipediaapi
from descriptions.descriptions_downloader import check_and_get_checker
from descriptions.descriptions_downloader import download_from_wikidata_tags
from descriptions.descriptions_downloader import download_from_wikipedia_tags
from descriptions.descriptions_downloader import log
def parse_args():
parser = argparse.ArgumentParser(description="Download wiki pages.", usage="python3 -m descriptions "
"--output_dir ~/maps_build/descriptions "
"--wikipedia ~/maps_build/wiki_urls.txt "
"--wikidata ~/maps_build/id_to_wikidata.csv "
"--langs en de fr es ru tr"
)
parser.add_argument(
"--output_dir", metavar="PATH", type=str, help="Output dir for saving pages."
)
parser.add_argument(
"--popularity", metavar="PATH", type=str,
help="File with popular object ids with wikipedia data to download. If not given, download all objects.",
)
parser.add_argument(
"--wikipedia", metavar="PATH", type=str, required=True, help="Input file with wikipedia url.",
)
parser.add_argument(
"--wikidata", metavar="PATH", type=str, help="Input file with wikidata ids."
)
parser.add_argument("--langs", metavar="LANGS", type=str, nargs="+", action="append",
help="Languages for pages. If left blank, pages in all available languages will be loaded.",
)
return parser.parse_args()
def main():
log.setLevel(logging.WARNING)
wikipediaapi.log.setLevel(logging.DEBUG)
args = parse_args()
wikipedia_file = args.wikipedia
wikidata_file = args.wikidata
output_dir = args.output_dir
popularity_file = args.popularity
langs = list(itertools.chain.from_iterable(args.langs))
os.makedirs(output_dir, exist_ok=True)
checker = check_and_get_checker(popularity_file)
download_from_wikipedia_tags(wikipedia_file, output_dir, langs, checker)
if wikidata_file is None:
log.warning(f"Wikidata file not set.")
elif os.path.exists(wikidata_file):
download_from_wikidata_tags(wikidata_file, output_dir, langs, checker)
else:
log.warning(f"Wikidata ({wikidata_file}) file not found.")
main()

View file

@ -0,0 +1,318 @@
import json
import logging
import os
import random
import time
import types
import urllib.error
import urllib.parse
import http.client
from concurrent.futures import ThreadPoolExecutor
import htmlmin
import requests
import wikipediaapi
from bs4 import BeautifulSoup
from wikidata.client import Client
from descriptions.exceptions import GettingError, ParseError
"""
This script downloads Wikipedia pages for different languages.
"""
log = logging.getLogger(__name__)
WORKERS = 80
REQUEST_ATTEMPTS = 8
ATTEMPTS_PAUSE_SECONDS = 4.0
HEADERS = {f"h{x}" for x in range(1, 7)}
BAD_SECTIONS = {
"en": [
"External links",
"Sources",
"See also",
"Bibliography",
"Further reading",
"References",
],
"de": [
"Einzelnachweise",
"Weblinks",
"Literatur",
"Siehe auch",
"Anmerkungen",
"Anmerkungen und Einzelnachweise",
"Filme",
"Einzelbelege",
],
"fr": [
"Bibliographie",
"Lien externe",
"Voir aussi",
"Liens externes",
"Références",
"Notes et références",
"Articles connexes",
],
"es": ["Vínculos de interés", "Véase también", "Enlaces externos", "Referencias"],
"ru": ["Литература", "Ссылки", "См. также", "Библиография", "Примечания"],
"tr": ["Kaynakça", "Ayrıca bakınız", "Dış bağlantılar", "Notlar", "Dipnot"],
}
def try_get(obj, prop, *args, **kwargs):
attempts = REQUEST_ATTEMPTS
while attempts != 0:
try:
attr = getattr(obj, prop)
is_method = isinstance(attr, types.MethodType)
return attr(*args, **kwargs) if is_method else attr
except (
requests.exceptions.ConnectionError,
requests.exceptions.ReadTimeout,
json.decoder.JSONDecodeError,
http.client.HTTPException,
) as e:
log.debug(e)
except urllib.error.HTTPError as e:
if e.code == 404:
raise GettingError(f"Page not found {e.msg}")
except KeyError:
raise GettingError(f"Getting {prop} field failed. {prop} not found.")
except urllib.error.URLError:
raise GettingError(f"URLError: {obj}, {prop}, {args}, {kwargs}")
time.sleep(random.uniform(0.0, ATTEMPTS_PAUSE_SECONDS))
attempts -= 1
raise GettingError(f"Getting {prop} field failed")
def read_popularity(path):
"""
:param path: a path of popularity file. A file contains '<id>,<rank>' rows.
:return: a set of popularity object ids
"""
ids = set()
for line in open(path):
try:
ident = int(line.split(",", maxsplit=1)[0])
except (AttributeError, IndexError):
continue
ids.add(ident)
return ids
def should_download_page(popularity_set):
def wrapped(ident):
return popularity_set is None or ident in popularity_set
return wrapped
def remove_bad_sections(soup, lang):
if lang not in BAD_SECTIONS:
return soup
it = iter(soup.find_all())
current = next(it, None)
current_header_level = None
while current is not None:
if current.name in HEADERS and current.text.strip() in BAD_SECTIONS[lang]:
current_header_level = current.name
current.extract()
current = next(it, None)
while current is not None:
if current.name == current_header_level:
break
current.extract()
current = next(it, None)
else:
current = next(it, None)
return soup
def beautify_page(html, lang):
soup = BeautifulSoup(html, "html.parser")
for x in soup.find_all():
if len(x.text.strip()) == 0:
x.extract()
soup = remove_bad_sections(soup, lang)
html = str(soup.prettify())
html = htmlmin.minify(html, remove_empty_space=True)
return html
def need_lang(lang, langs):
return lang in langs if langs else True
def get_page_info(url):
url = urllib.parse.unquote(url)
parsed = urllib.parse.urlparse(url)
try:
lang = parsed.netloc.split(".", maxsplit=1)[0]
except (AttributeError, IndexError):
raise ParseError(f"{parsed.netloc} is incorrect.")
try:
page_name = parsed.path.rsplit("/", maxsplit=1)[-1]
except (AttributeError, IndexError):
raise ParseError(f"{parsed.path} is incorrect.")
return lang, page_name
def get_wiki_page(lang, page_name):
wiki = wikipediaapi.Wikipedia(
language=lang, extract_format=wikipediaapi.ExtractFormat.HTML
)
return wiki.page(page_name)
def download(directory, url):
try:
lang, page_name = get_page_info(url)
except ParseError:
log.exception(f"Parsing failed. {url} is incorrect.")
return None
path = os.path.join(directory, f"{lang}.html")
if os.path.exists(path):
log.debug(f"{path} already exists.")
return None
page = get_wiki_page(lang, page_name)
try:
text = try_get(page, "text")
except GettingError as e:
log.exception(f"Error: page {page_name} is not downloaded for lang {lang} and url {url} ({e}).")
return None
page_size = len(text)
if page_size > 0:
os.makedirs(directory, exist_ok=True)
text = beautify_page(text, lang)
log.info(f"Save to {path} {lang} {page_name} {page_size}.")
with open(path, "w") as file:
file.write(text)
else:
log.warning(f"Page {url} is empty. It has not been saved.")
return text
def get_wiki_langs(url):
lang, page_name = get_page_info(url)
page = get_wiki_page(lang, page_name)
curr_lang = [(lang, url)]
try:
langlinks = try_get(page, "langlinks")
return (
list(zip(langlinks.keys(), [link.fullurl for link in langlinks.values()]))
+ curr_lang
)
except GettingError as e:
log.exception(f"Error: no languages for page {page_name} with url {url} ({e}).")
return curr_lang
def download_all_from_wikipedia(path, url, langs):
try:
available_langs = get_wiki_langs(url)
except ParseError:
log.exception("Parsing failed. {url} is incorrect.")
return
available_langs = filter(lambda x: need_lang(x[0], langs), available_langs)
for lang in available_langs:
download(path, lang[1])
def wikipedia_worker(output_dir, checker, langs):
def wrapped(line):
if not line.strip():
return
try:
# First param is mwm_path, which added this line entry.
_, ident, url = line.split("\t")
ident = int(ident)
if not checker(ident):
return
url = url.strip()
except (AttributeError, ValueError):
log.exception(f"{line} is incorrect.")
return
parsed = urllib.parse.urlparse(url)
path = os.path.join(output_dir, parsed.netloc, parsed.path[1:])
download_all_from_wikipedia(path, url, langs)
return wrapped
def download_from_wikipedia_tags(input_file, output_dir, langs, checker):
with open(input_file) as file:
_ = file.readline() # skip header
with ThreadPoolExecutor(WORKERS) as pool:
pool.map(wikipedia_worker(output_dir, checker, langs), file)
def get_wikidata_urls(entity, langs):
try:
keys = entity.data["sitelinks"].keys()
except (KeyError, AttributeError):
log.exception(f"Sitelinks not found for {entity.id}.")
return None
return [
entity.data["sitelinks"][k]["url"]
for k in keys
if any([k.startswith(lang) for lang in langs])
]
def wikidata_worker(output_dir, checker, langs):
def wrapped(line):
if not line.strip():
return
try:
ident, wikidata_id = line.split("\t")
ident = int(ident)
wikidata_id = wikidata_id.strip()
if not checker(ident):
return
except (AttributeError, ValueError):
log.exception(f"{line} is incorrect.")
return
client = Client()
try:
entity = try_get(client, "get", wikidata_id, load=True)
except GettingError:
log.exception(f"Error: page is not downloaded {wikidata_id}.")
return
urls = get_wikidata_urls(entity, langs)
if not urls:
return
path = os.path.join(output_dir, wikidata_id)
for url in urls:
download(path, url)
return wrapped
def download_from_wikidata_tags(input_file, output_dir, langs, checker):
wikidata_output_dir = os.path.join(output_dir, "wikidata")
os.makedirs(wikidata_output_dir, exist_ok=True)
with open(input_file) as file:
with ThreadPoolExecutor(WORKERS) as pool:
pool.map(wikidata_worker(wikidata_output_dir, checker, langs), file)
def check_and_get_checker(popularity_file):
popularity_set = None
if popularity_file is None:
log.warning(f"Popularity file not set.")
elif os.path.exists(popularity_file):
popularity_set = read_popularity(popularity_file)
log.info(f"Popularity set size: {len(popularity_set)}.")
else:
log.error(f"Popularity file ({popularity_file}) not found.")
return should_download_page(popularity_set)

View file

@ -0,0 +1,10 @@
class DescriptionError(Exception):
pass
class ParseError(DescriptionError):
pass
class GettingError(DescriptionError):
pass

View file

@ -0,0 +1,5 @@
htmlmin2==0.1.13
requests>=2.31.0
beautifulsoup4==4.9.1
wikidata==0.6.1
wikipedia-api==0.5.4

View file

@ -0,0 +1,5 @@
htmlmin2==0.1.13
requests>=2.31.0
beautifulsoup4==4.9.1
wikidata==0.6.1
wikipedia-api==0.5.4

View file

@ -0,0 +1,32 @@
#!/usr/bin/env python3
import os
import sys
import setuptools
module_dir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(module_dir, "..", "..", ".."))
from pyhelpers.setup import chdir
from pyhelpers.setup import get_version
from pyhelpers.setup import get_requirements
with chdir(os.path.abspath(os.path.dirname(__file__))):
setuptools.setup(
name="omim-descriptions",
version=str(get_version()),
author="CoMaps",
author_email="info@comaps.app",
description="This package is a library that provides descriptions "
"(such as those from Wikipedia) to geographic objects.",
url="https://codeberg.org/comaps",
package_dir={"descriptions": ""},
packages=["descriptions"],
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: Apache Software License",
],
python_requires=">=3.6",
install_requires=get_requirements(),
)

View file

@ -0,0 +1,43 @@
#!/usr/bin/env python3
import sys
import os
import shutil
def copy_style_file(style_path, drules_suffix, target_path):
if not os.path.exists(style_path):
print('Path {0} is not found'.format(style_path))
return
drules_proto_path = os.path.join(style_path, 'drules_proto_design.bin')
if not os.path.exists(drules_proto_path):
print('Path {0} is not found'.format(drules_proto_path))
return
shutil.copyfile(drules_proto_path, os.path.join(target_path, 'drules_proto' + drules_suffix + '.bin'))
for density in ['6plus', 'hdpi', 'mdpi', 'xhdpi', 'xxhdpi', 'xxxhdpi']:
res_path = os.path.join(style_path, 'resources-' + density + "_design")
if os.path.exists(res_path):
shutil.copytree(res_path, os.path.join(target_path, 'resources-' + density + drules_suffix))
if len(sys.argv) < 2:
print('Usage: {0} <path_to_omim/data/styles> [<target_path>]'.format(sys.argv[0]))
sys.exit()
path_to_styles = sys.argv[1]
if not os.path.isdir(path_to_styles):
print('Invalid path to styles folder')
sys.exit()
output_name = os.path.join('' if len(sys.argv) < 3 else sys.argv[2], 'styles')
if os.path.exists(output_name):
shutil.rmtree(output_name)
os.makedirs(output_name)
paths = ['default/light', 'default/dark', 'vehicle/light', 'vehicle/dark']
suffixes = ['_default_light', '_default_dark', '_vehicle_light', '_vehicle_dark']
for i in range(0, len(paths)):
copy_style_file(os.path.join(path_to_styles, paths[i], 'out'), suffixes[i], output_name)

View file

@ -0,0 +1,108 @@
import re
import json
from datetime import datetime, timezone
from pathlib import Path
SCRIPT_DIR: Path = Path(__file__).parent.resolve()
MAPCSS_FILE: Path = SCRIPT_DIR / "../../data/styles/default/include/Icons.mapcss"
TAGINFO_FILE: Path = SCRIPT_DIR / "../../data/taginfo.json"
BASE_ICON_URL: str = "https://codeberg.org/comaps/comaps/raw/branch/main/data/styles/default/light/symbols/"
PROJECT_INFO: dict[str, str] = {
"name": "CoMaps",
"description": "CoMaps is a community-focused privacy navigation iOS & Android app for travelers - drivers, hikers, and cyclists.",
"project_url": "https://comaps.app",
"doc_url": "https://codeberg.org/comaps/comaps/",
"icon_url": "https://codeberg.org/comaps/comaps/media/branch/main/docs/badges/logo.svg",
"contact_name": "CoMaps",
"contact_email": "hello@comaps.app"
}
def parse_mapcss(text: str) -> list[dict[str, any]]:
tags: dict[tuple[str, str | None, str], dict[str, any]] = {}
# Split blocks into: selector { props }
blocks: list[tuple[str, str]] = re.findall(r"([^\{]+)\{([^\}]*)\}", text, re.MULTILINE)
for selector, props in blocks:
# Extract icon filename from props
icon_re: re.Pattern = re.compile(r"icon-image:\s*([^;]+);")
icon_match: re.Match | None = icon_re.search(props)
icon_url: str | None = None
if icon_match:
icon_file: str = icon_match.group(1).strip()
if icon_file and icon_file.lower() not in ["none", "zero-icon.svg"]:
icon_url = BASE_ICON_URL + icon_file
# Split the selector into lines
lines: list[str] = [line.strip() for line in selector.split("\n") if line.strip()]
for line in lines:
# Find anything inside square brackets
square_brackets_re: re.Pattern = re.compile(r"\[(.*?)\]")
square_brackets: list[str] = square_brackets_re.findall(line)
if not square_brackets:
continue
# Find key=value pairs
pairs: list[tuple[str, str | None]] = []
for sqb in square_brackets:
key, sep, value = sqb.partition("=")
key = key.strip()
if key.startswith("!"):
continue # skip negated keys
value = value.strip() if sep else None
pairs.append((key, value))
# Hardcode: convert value "not" to "no"
pairs = [(k, "no" if v == "not" else v) for k, v in pairs]
if not pairs:
continue # skip if no valid pairs
# Build shared description from all pairs
desc: str = " + ".join(f"{k}={v if v is not None else '*'}" for k, v in pairs)
# Emit a tag per pair
for key, value in pairs:
tag_id: tuple[str, str | None, str] = (key, value, desc)
if tag_id not in tags:
tag: dict[str, any] = {
"description": desc,
"key": key,
}
if value is not None:
tag["value"] = value
if icon_url:
tag["icon_url"] = icon_url
tags[tag_id] = tag
else:
if icon_url:
tags[tag_id]["icon_url"] = icon_url
# Sort by description, then key, then value
return sorted(tags.values(), key=lambda x: (x["description"], x["key"], x.get("value", "")))
def main() -> None:
with open(MAPCSS_FILE, "r", encoding="utf-8") as f:
mapcss: str = f.read()
tags: list[dict[str, any]] = parse_mapcss(mapcss)
data: dict[str, any] = {
"data_format": 1,
"data_url": "https://codeberg.org/comaps/comaps/raw/branch/main/data/taginfo.json",
"data_updated": datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ"),
"project": PROJECT_INFO,
"tags": tags
}
with open(TAGINFO_FILE, "w", encoding="utf-8") as f:
json.dump(data, f, indent=4, ensure_ascii=False)
print(f"✅ JSON saved to {TAGINFO_FILE}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,286 @@
#!/usr/bin/env python3
import csv
import json
import argparse
import mimetypes
import traceback
import urllib.error
import urllib.parse
import urllib.request
import xml.etree.ElementTree as ET
from os import path, access, R_OK, linesep
from io import StringIO
from datetime import datetime
class GoogleMapsConverter:
def __init__(self, input_file=None, output_format=None, bookmark_list_name=None, api_key=None):
print("Follow these steps to export your saved places from Google Maps and convert them to a GPX or KML File")
print()
print("1. Create an API key for Google Places API following this guide")
print(" https://developers.google.com/maps/documentation/places/web-service/get-api-key")
print("2. Go to https://takeout.google.com/ and sign in with your Google account")
print("3. Select 'Saved' and 'Maps (My Places)' and create an export")
print("4. Download and unzip the export")
print ("5a. Look for CSV files (e.g. for lists) in the folder Takeout/Saved")
print ("5b. Look for GeoJSON files (e.g. for Saved Places) in the folder Takeout/Maps")
print()
if input_file is None:
self.get_input_file()
else:
self.input_file = input_file
if not path.isfile(self.input_file):
raise FileNotFoundError(f"Couldn't find {self.input_file}")
if not access(self.input_file, R_OK):
raise PermissionError(f"Couldn't read {self.input_file}")
if output_format is None:
self.get_output_format()
else:
self.output_format = output_format
if bookmark_list_name is None:
self.get_bookmark_list_name()
else:
self.bookmark_list_name = bookmark_list_name
self.output_file = self.bookmark_list_name + "." + self.output_format
if api_key is None:
self.get_api_key()
else:
self.api_key = api_key
self.places = []
def get_input_file(self):
while True:
self.input_file = input("Path to the file: ")
if not path.isfile(self.input_file):
print(f"Couldn't find {self.input_file}")
continue
if not access(self.input_file, R_OK):
print(f"Couldn't read {self.input_file}")
continue
break
def get_output_format(self):
while True:
self.output_format = input("Output format (kml or gpx): ").lower()
if self.output_format not in ['kml', 'gpx']:
print("Please provide a valid output format" + linesep)
continue
else:
break
def get_bookmark_list_name(self):
while True:
self.bookmark_list_name = input("Bookmark list name: ")
if not self.bookmark_list_name:
print("Please provide a name" + linesep)
continue
else:
self.output_file = self.bookmark_list_name + "." + self.output_format
break
def get_api_key(self):
while True:
if self.api_key:
break
self.api_key = input("API key: ")
if not self.api_key:
print("Please provide an API key" + linesep)
continue
else:
break
def convert_timestamp(self, timestamp):
if timestamp.endswith('Z'):
timestamp = timestamp[:-1]
date = datetime.fromisoformat(timestamp)
return date.strftime('%Y-%m-%d %H:%M:%S')
def get_json(self, url):
max_attempts = 3
for retry in range(max_attempts):
try:
response = urllib.request.urlopen(url)
return json.load(response)
except urllib.error.URLError:
print(f"Couldn't connect to Google Maps. Retrying... ({retry + 1}/{max_attempts})")
if retry < max_attempts - 1:
continue
else:
raise
def get_name_and_coordinates_from_google_api(self, api_key, q=None, cid=None):
url = None
if q:
params = {'query': q, 'key': api_key}
url = f"https://maps.googleapis.com/maps/api/place/textsearch/json?{urllib.parse.urlencode(params)}"
elif cid:
params = {'cid': cid, 'fields': 'geometry,name', 'key': api_key}
url= f"https://maps.googleapis.com/maps/api/place/details/json?{urllib.parse.urlencode(params)}"
else:
return None
result = self.get_json(url)
if result['status'] == 'OK':
place = result.get('results', [result.get('result')])[0]
location = place['geometry']['location']
name = place['name']
return {'name': name, 'coordinates': [str(location['lat']), str(location['lng'])]}
else:
print(f'{result.get("status", "")}: {result.get("error_message", "")}')
return None
def process_geojson_features(self, content):
try:
geojson = json.loads(content)
except json.JSONDecodeError:
raise ValueError(f"The file {self.input_file} is not a valid JSON file.")
for feature in geojson['features']:
geometry = feature['geometry']
coordinates = geometry['coordinates']
properties = feature['properties']
google_maps_url = properties.get('google_maps_url', '')
location = properties.get('location', {})
name = None
# Check for "null island" coordinates [0, 0]
# These are a common artifact of Google Maps exports
# See https://github.com/organicmaps/organicmaps/pull/8721
if coordinates == [0, 0]:
parsed_url = urllib.parse.urlparse(google_maps_url)
query_params = urllib.parse.parse_qs(parsed_url.query)
# Google Maps URLs can contain either a query string parameter 'q', 'cid'
q = query_params.get('q', [None])[0]
cid = query_params.get('cid', [None])[0]
# Sometimes the 'q' parameter is a comma-separated lat long pair
if q and ',' in q and all(part.replace('.', '', 1).replace('-', '', 1).isdigit() for part in q.split(',')):
coordinates = q.split(',')
else:
result = self.get_name_and_coordinates_from_google_api(self.api_key, q=q, cid=cid)
if result:
coordinates = result['coordinates']
if 'name' in result:
name = result['name']
else:
print(f"Couldn't extract coordinates from Google Maps. Skipping {q or cid}")
coord_string = ', '.join(map(str, coordinates)) if coordinates else None
# If name was not retrieved from the Google Maps API, then use the name from the location object,
# with a fallback to the address, and finally to the coordinates
if not name:
name = location.get('name') or location.get('address') or coord_string
description = ""
if 'address' in properties:
description += f"<b>Address:</b> {location['address']}<br>"
if 'date' in properties:
description += f"<b>Date bookmarked:</b> {self.convert_timestamp(properties['date'])}<br>"
if 'Comment' in properties:
description += f"<b>Comment:</b> {properties['Comment']}<br>"
if google_maps_url:
description += f"<b>Google Maps URL:</b> <a href=\"{google_maps_url}\">{google_maps_url}</a><br>"
place = {
'name': name,
'description': description
}
if coordinates:
place['coordinates'] = ','.join(map(str, coordinates))
else:
place['coordinates'] = '0,0'
self.places.append(place)
def process_csv_features(self, content):
csvreader = csv.reader(StringIO(content), delimiter=',')
next(csvreader) # skip header
for idx, row in enumerate(csvreader):
name = row[0]
description = row[1]
url = row[2]
print(f"\rProgress: {idx + 1} Parsing {name}...", end='')
try:
if url.startswith("https://www.google.com/maps/search/"):
coordinates = url.split('/')[-1].split(',')
coordinates.reverse()
coordinates = ','.join(coordinates)
elif url.startswith('https://www.google.com/maps/place/'):
ftid = url.split('!1s')[-1]
params = {'key': self.api_key, 'fields': 'geometry', 'ftid': ftid}
places_url = "https://maps.googleapis.com/maps/api/place/details/json?" \
+ urllib.parse.urlencode(params)
try:
data = self.get_json(places_url)
location = data['result']['geometry']['location']
coordinates = ','.join([str(location['lng']), str(location['lat'])])
except (urllib.error.URLError, KeyError):
print(f"Couldn't extract coordinates from Google Maps. Skipping {name}")
continue
else:
print(f"Couldn't parse url. Skipping {name}")
continue
self.places.append({'name': name, 'description': description, 'coordinates': coordinates})
except Exception:
print(f"Couldn't parse {name}: {traceback.format_exc()}")
def write_kml(self):
root = ET.Element("kml")
doc = ET.SubElement(root, "Document")
for place in self.places:
placemark = ET.SubElement(doc, "Placemark")
ET.SubElement(placemark, "name").text = place['name']
ET.SubElement(placemark, "description").text = place['description']
point = ET.SubElement(placemark, "Point")
ET.SubElement(point, "coordinates").text = place['coordinates']
tree = ET.ElementTree(root)
tree.write(self.output_file)
def write_gpx(self):
gpx = ET.Element("gpx", version="1.1", creator="GoogleMapsConverter")
for place in self.places:
wpt = ET.SubElement(gpx, "wpt", lat=place['coordinates'].split(',')[1], lon=place['coordinates'].split(',')[0])
ET.SubElement(wpt, "name").text = place['name']
ET.SubElement(wpt, "desc").text = place['description']
tree = ET.ElementTree(gpx)
tree.write(self.output_file)
def convert(self):
with open(self.input_file, 'r') as file:
content = file.read().strip()
if not content:
raise ValueError(f"The file {self.input_file} is empty or not a valid JSON file.")
mime_type, _ = mimetypes.guess_type(self.input_file)
if mime_type == 'application/geo+json' or mime_type == 'application/json':
self.process_geojson_features(content)
elif mime_type == 'text/csv':
self.process_csv_features(content)
else:
raise ValueError(f"Unsupported file format: {self.input_file}")
# Write to output file in the desired format, KML or GPX
if self.output_format == 'kml':
self.write_kml()
elif self.output_format == 'gpx':
self.write_gpx()
print("Exported Google Saved Places to " + path.abspath(self.output_file))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert Google Maps saved places to KML or GPX.")
parser.add_argument('--input', help="Path to the file")
parser.add_argument('--format', choices=['kml', 'gpx'], default='gpx', help="Output format: 'kml' or 'gpx'")
parser.add_argument('--bookmark_list_name', help="Name of the bookmark list")
parser.add_argument('--api_key', help="API key for Google Places API")
args = parser.parse_args()
converter = GoogleMapsConverter(
input_file=args.input,
output_format=args.format,
bookmark_list_name=args.bookmark_list_name,
api_key=args.api_key
)
converter.convert()

View file

@ -0,0 +1,168 @@
#!/usr/bin/env python3
"""
GPX to iOS Simulator simctl location command
Converts a GPX file to simctl location start command for realistic iOS location simulation.
Tested with CoMaps exported tracks
Usage:
python gpx_to_simctl.py test_route.gpx
"""
import argparse
import xml.etree.ElementTree as ET
from pathlib import Path
import sys
import subprocess
def extract_track_points_from_gpx(gpx_file: Path):
"""Extract track points from GPX file."""
tree = ET.parse(gpx_file)
root = tree.getroot()
points = []
# Find all elements with lat/lon attributes
for elem in root.findall('.//*[@lat][@lon]'):
lat = float(elem.get('lat'))
lon = float(elem.get('lon'))
points.append((lat, lon))
return points
def generate_simctl_command(points, speed_kmh=60, interval=0.1, distance=None, device="booted"):
"""Generate simctl location start command."""
if len(points) < 2:
raise ValueError("Need at least 2 waypoints for simctl location start")
# Convert km/h to m/s
speed_mps = speed_kmh / 3.6
# Format waypoints as lat,lon pairs
waypoint_strings = [f"{lat:.6f},{lon:.6f}" for lat, lon in points]
# Build command
cmd = ["xcrun", "simctl", "location", device, "start"]
cmd.append(f"--speed={speed_mps:.2f}")
if distance:
cmd.append(f"--distance={distance}")
else:
cmd.append(f"--interval={interval}")
cmd.extend(waypoint_strings)
return cmd
def main():
parser = argparse.ArgumentParser(
description="Convert GPX file to simctl location start command",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python gpx_to_simctl.py test_route.gpx --speed 60 --interval 0.1
python gpx_to_simctl.py test_route.gpx --speed 80 --distance 10 --clear-first
python gpx_to_simctl.py test_route.gpx --speed 50 --dry-run
"""
)
parser.add_argument('gpx_file', help='Input GPX file')
parser.add_argument('--speed', type=float, default=60,
help='Speed in km/h (default: 60)')
parser.add_argument('--interval', type=float, default=0.1,
help='Update interval in seconds (default: 0.1)')
parser.add_argument('--distance', type=float,
help='Update distance in meters (overrides --interval)')
parser.add_argument('--device', default='booted',
help='Target device (default: booted)')
parser.add_argument('--dry-run', action='store_true',
help='Show command without executing (default: execute)')
parser.add_argument('--clear-first', action='store_true',
help='Clear existing location before starting')
args = parser.parse_args()
# Validate input file
gpx_file = Path(args.gpx_file)
if not gpx_file.exists():
print(f"Error: GPX file '{gpx_file}' not found", file=sys.stderr)
return 1
try:
# Extract waypoints
points = extract_track_points_from_gpx(gpx_file)
print(f"Extracted {len(points)} waypoints from {gpx_file}")
if len(points) < 2:
print("Error: Need at least 2 waypoints for location simulation", file=sys.stderr)
return 1
# Generate command
cmd = generate_simctl_command(
points,
speed_kmh=args.speed,
interval=args.interval,
distance=args.distance,
device=args.device
)
# Show command
print(f"\nGenerated simctl command:")
print(" ".join(cmd))
# Calculate simulation info
speed_mps = args.speed / 3.6
total_distance = 0
for i in range(1, len(points)):
lat1, lon1 = points[i-1]
lat2, lon2 = points[i]
# Simple distance approximation
total_distance += ((lat2-lat1)**2 + (lon2-lon1)**2)**0.5 * 111000 # rough conversion to meters
duration = total_distance / speed_mps
print(f"\nSimulation info:")
print(f" Speed: {args.speed} km/h ({speed_mps:.1f} m/s)")
print(f" Waypoints: {len(points)}")
print(f" Estimated distance: {total_distance/1000:.2f} km")
print(f" Estimated duration: {duration:.0f} seconds ({duration/60:.1f} minutes)")
if args.distance:
print(f" Update distance: {args.distance}m")
else:
print(f" Update interval: {args.interval}s")
# Execute by default unless dry-run
if args.dry_run:
print(f"\n[DRY RUN] Command that would be executed:")
print(f" {' '.join(cmd)}")
if args.clear_first:
clear_cmd = ["xcrun", "simctl", "location", args.device, "clear"]
print(f" (would clear location first: {' '.join(clear_cmd)})")
else:
print(f"\nExecuting command...")
# Clear location first if requested
if args.clear_first:
clear_cmd = ["xcrun", "simctl", "location", args.device, "clear"]
print("Clearing existing location...")
subprocess.run(clear_cmd, check=True)
# Execute the start command
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
print("✅ Location simulation started successfully!")
if result.stdout.strip():
print(result.stdout.strip())
else:
print(f"❌ Error executing command:")
print(result.stderr.strip())
return 1
return 0
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
return 1
if __name__ == '__main__':
sys.exit(main())

View file

@ -0,0 +1,68 @@
Edit the rclone conf secret for Codeberg Actions, to deliver maps to i.e. /var/www/html/maps/251231 via a limited user.
apt update
apt install nginx vim
### set hostname for ssh sanity (will show in console upon next bash launch):
vim /etc/hostname
hostname cdn-XX-1
### for SSL:
sudo snap install --classic certbot
sudo certbot --nginx
### remove IPs from logging on line ~36:
vim /etc/nginx/nginx.conf
```
##
# Logging Settings
##
log_format comaps '0.0.0.0 - - [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"';
access_log /var/log/nginx/access.log comaps;
```
### set up monitoring:
apt install goaccess
edit /etc/goaccess/goaccess.conf and uncomment time-format %H:%M:%S, date-format %Y-%m-%d, log-format COMBINED
vim /etc/crontab
`*/5 * * * * root /usr/bin/goaccess /var/log/nginx/access.log -o /var/www/html/monitor.html`
### set up basic http pages/responses:
cd /var/www/html/
mkdir maps
rm index.nginx-debian.html
wget https://www.comaps.app/favicon.ico
vim robots.txt
```
User-agent: *
Disallow: /
```
vim index.html
```
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>CoMaps CDN</title>
</head>
<body>
<h1>This is a CDN for <a href="https://comaps.app">CoMaps</a></h1>
<h2>Resources:</h2>
<ol>
<li>CoMaps <a href="https://cdn.comaps.app/subway/">subway validator</a></li>
<li>CoMaps <a href="https://comaps.app/news/">News</a></li>
<li><a href="https://comaps.app/donate/">Donate</a></li>
</ol>
</body>
</html>
```

View file

@ -0,0 +1,20 @@
# French National Library Archiving
The library has taken an interest in archiving CoMaps and its data as a snapshot
of our world and the way people interact with maps, in a way that doesn't rely on
maintaining servers etc. (With an APK and MWM files and some copy-paste, you can
reproduce our app on an emulator etc.)
## Instructions
Every 6 months or so, @jeanbaptisteC may ask to upload the most recent map version
and a custom APK with bundled World map (googleRelease) with production keys (like web release).
Credentials for `frlibrary` are in the mapgen rclone, or in zyphlar/pastk's password managers.
To upload (modify dates accordingly):
```
rclone copy CoMaps-25110702-google-release.apk frlibrary:/apk/
rclone copy 251104 frlibrary:/maps/251104
```

View file

@ -0,0 +1,186 @@
# maps_generator
`maps_generator` is the Python CLI for generating `.mwm` maps for the CoMaps. This tool functions as the driver for the `generator_tool` C++ executable.
**Use the `generator_tool` and application from the same release. The application does not support
maps built by a generator_tool newer than the app.**
## What are maps?
Maps are `.mwm` binary files with special meta-information for rendering, searching, routing, and other use cases.
Files from [data/borders](https://codeberg.org/comaps/comaps/src/branch/main/data/borders) define map boundaries for each individual file. The world is segmented into separate files by these boundaries, with the intent of having manageably small files to download. These files are referred to as *maps* or *countries*. A country is referring to one of these files, not necessarily a geographic country. Also note that there are two special countries called *World* and *WorldCoasts*. These are small simplified maps of the world and coastlines (sea and ocean watercover) used when other maps have not yet been downloaded.
## Setup
You must have Python version >= 3.7 and complete the following steps:
1. Switch to the branch of your app's version (see the note of #maps_generator section). E.g.:
```sh
git checkout 2023.06.04-13-android
```
The app version can be found in the "About" section of CoMaps.
2. Build the `generator_tool` binary (run from the root of the repo):
```sh
./tools/unix/build_omim.sh -r generator_tool
./tools/unix/build_omim.sh -r world_roads_builder_tool
./tools/unix/build_omim.sh -r mwm_diff_tool
```
3. Go to the `python` directory:
```sh
cd tools/python/
```
4. Install python dependencies:
```sh
pip install -r maps_generator/requirements_dev.txt
```
5. Create a [configuration file with defaults](https://codeberg.org/comaps/comaps/src/branch/main/tools/python/maps_generator/var/etc/map_generator.ini.default):
```sh
cp maps_generator/var/etc/map_generator.ini.default maps_generator/var/etc/map_generator.ini
```
6. Read through and edit the configuration file.
Ensure that `OMIM_PATH` is set correctly.
The default `PLANET_URL` setting makes the generator to download an OpenStreetMap dump file for the North Macedonia from [Geofabrik](http://download.geofabrik.de/index.html). Change `PLANET_URL` and `PLANET_MD5_URL` to get a region you want.
## Basic Usage
Make sure you are in the `tools/python` repo directory for starting the generator.
```sh
cd tools/python
```
Build a `.mwm` map file for North Macedonia without using coastlines (it's a land-locked country anyway):
```sh
python3 -m maps_generator --countries="Macedonia" --skip="Coastline"
```
It's possible to skip coastlines for countries that have a sea coast too, but the sea water will not be rendered in that case.
Make sure that you specify country names that are actually contained in your pbf file, or you'll get errors in the next step. Check the filenames in the `data/borders/` folder (without the `.poly` extension) for a list of all valid country names. For example, New York City is in `US_New York_New_York` and all of England (minus Ireland, Scotland, and Wales) can be generated by specifying `UK_England_*`.
To see other possible command-line options:
```sh
python3 -m maps_generator -h
```
## Troubleshooting
The general log file (by default its `maps_build/generation.log`) contains output of the `maps_generator` python script only. More detailed logs that include output of the `generator_tool` binary are located in the `logs/` subdir of a particular build directory, e.g. `maps_build/2023_06_04__20_05_07/logs/`.
## More Examples
### Japan with coastlines
1. Open https://download.geofabrik.de/asia/japan.html and copy url of osm.pbf and md5sum files.
2. Put the urls into the `PLANET_URL` and `PLANET_MD5_URL` settings of the `map_generator.ini` file.
3. Set `PLANET_COASTS_URL` to a location with `latest_coasts.geom` and `latest_coasts.rawgeom` files. You don't need to download these files if the whole planet is built. They are generated in the process of building the whole planet (the coastline should be valid and continuous for it to succeed).
4. Run
```sh
python3 -m maps_generator --countries="World, WorldCoasts, Japan_*"
```
### Rebuild stages
For example, you changed routing code in the project and want to regenerate maps.
You must have previous generation. You may regenerate starting from the routing stage and only for two mwms:
```sh
python3 -m maps_generator -c --from_stage="Routing" --countries="Japan_Kinki Region_Osaka_Osaka, Japan_Chugoku Region_Tottori"
```
### Custom maps from GeoJSON
If you have an OSM PBF file and want to cut custom map regions, you can use a polygon feature in a GeoJSON file. This is a useful alternative if you want a custom area, or you do not want to figure out which countrie(s) apply to the area you need.
1. If you don't already have the .osm.pbf file, download applicable area of the world in .osm.pbf format, for example from [Geofabrik](http://download.geofabrik.de/index.html).
2. Generate area in geojson format of the territory in which you are interested. You can do it via [geojson.io](http://geojson.io/). Select the area on the map and copy corresponding part of the resulting geojson. You need to copy the contents of the `features: [ { ... } ]`, without features array, but with inner braces: `{...}`. For example, here is the full geojson of the rectangle area around Melbourne:
```json
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[143.75610351562497, -39.21523130910491],
[147.98583984375, -39.21523130910491],
[147.98583984375, -36.03133177633187],
[143.75610351562497, -36.03133177633187],
[143.75610351562497, -39.21523130910491]
]
]
}
}
]
}
```
You need to copy this part of the geojson:
```json
{
"type": "Feature",
"properties": {},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[143.75610351562497, -39.21523130910491],
[147.98583984375, -39.21523130910491],
[147.98583984375, -36.03133177633187],
[143.75610351562497, -36.03133177633187],
[143.75610351562497, -39.21523130910491]
]
]
}
}
```
3. Save selected geojson in some file with .geojson extension. For example, `borders.geojson`.
4. Extract this area from .osm.pbf file with the help of [osmium tool](https://osmcode.org/osmium-tool/):
```
osmium extract -p borders.geojson germany-latest.osm.pbf -o germany_part.osm.pbf
```
5. Run the `maps_generator` tool:
```sh
python3 -m maps_generator --skip="Coastline" --without_countries="World*"
```
In this example we skipped generation of the World\* files because they are ones of the most time- and resources-consuming mwms.
### Subways layer
You can manually generate a subway layer file to use in the `SUBWAY_URL` ini setting. See [instructions](https://codeberg.org/comaps/comaps/src/branch/main/docs/SUBWAY_GENERATION.md).
## Testing maps
If you're testing a new feature you likely wish to test the maps locally
### iOS
The easiest is to use the Simulator and switch out the map file in the Documents folder
Finding the folder is slight tricky, the easiest is to look in the Xcode debug message window, as it often prints messages that contain the Documents folder
E.g.,
```
I(1) 0.11666 platform/string_storage_base.cpp:24 StringStorageBase(): Settings path: /Users/<user-name>/Library/Developer/CoreSimulator/Devices/EFE74BF2-2871-4364-A633-BC8F1BAB9DF3/data/Containers/Data/Application/252BDFA5-3E60-43A6-B09C-158BC55DC450/Documents/settings.ini
```
In this folder the map file is in a YYMMDD subfolder

View file

@ -0,0 +1,19 @@
import os
from maps_generator.generator import settings
CONFIG_PATH = os.path.join(
os.path.dirname(os.path.join(os.path.realpath(__file__))),
"var",
"etc",
"map_generator.ini",
)
print(f"Loading configuration from {CONFIG_PATH}")
settings.init(CONFIG_PATH)
from maps_generator.generator import stages_declaration
from maps_generator.generator.stages import stages
stages.init()

View file

@ -0,0 +1,251 @@
import logging
import os
from argparse import ArgumentParser
from argparse import RawDescriptionHelpFormatter
from maps_generator.generator import settings
from maps_generator.generator import stages
from maps_generator.generator import stages_declaration as sd
from maps_generator.generator.env import Env
from maps_generator.generator.env import PathProvider
from maps_generator.generator.env import WORLDS_NAMES
from maps_generator.generator.env import find_last_build_dir
from maps_generator.generator.env import get_all_countries_list
from maps_generator.generator.exceptions import ContinueError
from maps_generator.generator.exceptions import SkipError
from maps_generator.generator.exceptions import ValidationError
from maps_generator.maps_generator import generate_coasts
from maps_generator.maps_generator import generate_maps
from maps_generator.utils.algo import unique
logger = logging.getLogger("maps_generator")
def parse_options():
parser = ArgumentParser(
description="A tool to generate map files in Organic Maps' .mwm format.",
epilog="See maps_generator/README.md for setup instructions and usage examples.",
formatter_class=RawDescriptionHelpFormatter,
parents=[settings.parser],
)
parser.add_argument(
"-c",
"--continue",
default="",
nargs="?",
type=str,
help="Continue the last build or the one specified in CONTINUE from the "
"last stopped stage.",
)
parser.add_argument(
"-s",
"--suffix",
default="",
type=str,
help="Suffix of the name of a build directory.",
)
parser.add_argument(
"--countries",
type=str,
default="",
help="List of countries/regions, separated by a comma or a semicolon, or a path to "
"a file with a newline-separated list of regions, for which maps "
"should be built. Filenames in data/borders/ (without the .poly extension) "
"represent all valid region names. "
"A * wildcard is accepted, e.g. --countries=\"UK*\" will match "
"UK_England_East Midlands, UK_England_East of England_Essex, etc.",
)
parser.add_argument(
"--without_countries",
type=str,
default="",
help="List of countries/regions to exclude from generation. "
"Has a priority over --countries and uses the same syntax.",
)
parser.add_argument(
"--skip",
type=str,
default="",
help=f"List of stages, separated by a comma or a semicolon, "
f"for which building will be skipped. Available skip stages: "
f"{', '.join([s.replace('stage_', '') for s in stages.stages.get_visible_stages_names()])}.",
)
parser.add_argument(
"--from_stage",
type=str,
default="",
help=f"Stage from which maps will be rebuild. Available stages: "
f"{', '.join([s.replace('stage_', '') for s in stages.stages.get_visible_stages_names()])}.",
)
parser.add_argument(
"--coasts",
default=False,
action="store_true",
help="Build only WorldCoasts.raw and WorldCoasts.rawgeom files.",
)
parser.add_argument(
"--force_download_files",
default=False,
action="store_true",
help="If build is continued, files will always be downloaded again.",
)
parser.add_argument(
"--production",
default=False,
action="store_true",
help="Build production maps. Otherwise 'OSM-data-only maps' are built "
"without additional data like SRTM.",
)
parser.add_argument(
"--order",
type=str,
default=os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"var/etc/file_generation_order.txt",
),
help="Mwm generation order, useful to have particular maps completed first "
"in a long build (defaults to maps_generator/var/etc/file_generation_order.txt "
"to process big countries first).",
)
return parser.parse_args()
def main():
root = logging.getLogger()
root.addHandler(logging.NullHandler())
options = parse_options()
# Processing of 'continue' option.
# If 'continue' is set maps generation is continued from the last build
# that is found automatically.
build_name = None
continue_ = getattr(options, "continue")
if continue_ is None or continue_:
d = find_last_build_dir(continue_)
if d is None:
raise ContinueError(
"The build cannot continue: the last build directory was not found."
)
build_name = d
countries_line = ""
without_countries_line = ""
if "COUNTRIES" in os.environ:
countries_line = os.environ["COUNTRIES"]
if options.countries:
countries_line = options.countries
else:
countries_line = "*"
if options.without_countries:
without_countries_line = options.without_countries
all_countries = get_all_countries_list(PathProvider.borders_path())
def end_star_compare(prefix, full):
return full.startswith(prefix)
def compare(a, b):
return a == b
def get_countries_set_from_line(line):
countries = []
used_countries = set()
countries_list = []
if os.path.isfile(line):
with open(line) as f:
countries_list = [x.strip() for x in f]
elif line:
countries_list = [x.strip() for x in line.replace(";", ",").split(",")]
for country_item in countries_list:
cmp = compare
_raw_country = country_item[:]
if _raw_country and _raw_country[-1] == "*":
_raw_country = _raw_country.replace("*", "")
cmp = end_star_compare
for country in all_countries:
if cmp(_raw_country, country):
used_countries.add(country_item)
countries.append(country)
countries = unique(countries)
diff = set(countries_list) - used_countries
if diff:
raise ValidationError(f"Bad input countries: {', '.join(diff)}")
return set(countries)
countries = get_countries_set_from_line(countries_line)
without_countries = get_countries_set_from_line(without_countries_line)
countries -= without_countries
countries = list(countries)
if not countries:
countries = all_countries
# Processing of 'order' option.
# It defines an order of countries generation using a file from 'order' path.
if options.order:
ordered_countries = []
countries = set(countries)
with open(options.order) as file:
for c in file:
if c.strip().startswith("#"):
continue
c = c.split("\t")[0].strip()
if c in countries:
ordered_countries.append(c)
countries.remove(c)
if countries:
raise ValueError(
f"{options.order} does not have an order " f"for {countries}."
)
countries = ordered_countries
# Processing of 'skip' option.
skipped_stages = set()
if options.skip:
for s in options.skip.replace(";", ",").split(","):
stage = s.strip()
if not stages.stages.is_valid_stage_name(stage):
raise SkipError(f"{stage} not found.")
skipped_stages.add(stages.get_stage_type(stage))
if settings.PLANET_URL != settings.DEFAULT_PLANET_URL:
skipped_stages.add(sd.StageUpdatePlanet)
if sd.StageCoastline in skipped_stages:
if any(x in WORLDS_NAMES for x in options.countries):
raise SkipError(
f"You can not skip {stages.get_stage_name(sd.StageCoastline)}"
f" if you want to generate {WORLDS_NAMES}."
f" You can exclude them with --without_countries option."
)
if not settings.NEED_PLANET_UPDATE:
skipped_stages.add(sd.StageUpdatePlanet)
if not settings.NEED_BUILD_WORLD_ROADS:
skipped_stages.add(sd.StagePrepareRoutingWorld)
skipped_stages.add(sd.StageRoutingWorld)
# Make env and run maps generation.
env = Env(
countries=countries,
production=options.production,
build_name=build_name,
build_suffix=options.suffix,
skipped_stages=skipped_stages,
force_download_files=options.force_download_files
)
from_stage = None
if options.from_stage:
from_stage = f"{options.from_stage}"
if options.coasts:
generate_coasts(env, from_stage)
else:
generate_maps(env, from_stage)
env.finish()
main()

View file

@ -0,0 +1,60 @@
import argparse
import sys
from maps_generator.checks.default_check_set import CheckType
from maps_generator.checks.default_check_set import LogsChecks
from maps_generator.checks.default_check_set import get_logs_check_sets_and_filters
from maps_generator.checks.default_check_set import run_checks_and_print_results
def get_args():
parser = argparse.ArgumentParser(
description="This script checks maps generation logs and prints results."
)
parser.add_argument(
"--old", type=str, required=True, help="Path to old logs directory.",
)
parser.add_argument(
"--new", type=str, required=True, help="Path to new logs directory.",
)
parser.add_argument(
"--checks",
action="store",
type=str,
nargs="*",
default=None,
help=f"Set of checks: {', '.join(c.name for c in LogsChecks)}. "
f"By default, all checks will run.",
)
parser.add_argument(
"--level",
type=str,
required=False,
choices=("low", "medium", "hard", "strict"),
default="medium",
help="Messages level.",
)
parser.add_argument(
"--output",
type=str,
required=False,
default="",
help="Path to output file. stdout by default.",
)
return parser.parse_args()
def main():
args = get_args()
checks = {LogsChecks[c] for c in args.checks} if args.checks is not None else None
s = get_logs_check_sets_and_filters(args.old, args.new, checks)
run_checks_and_print_results(
s,
CheckType[args.level],
file=open(args.output, "w") if args.output else sys.stdout,
)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,65 @@
import argparse
import sys
from maps_generator.checks.default_check_set import CheckType
from maps_generator.checks.default_check_set import MwmsChecks
from maps_generator.checks.default_check_set import get_mwm_check_sets_and_filters
from maps_generator.checks.default_check_set import run_checks_and_print_results
def get_args():
parser = argparse.ArgumentParser(
description="This script checks mwms and prints results."
)
parser.add_argument(
"--old", type=str, required=True, help="Path to old mwm directory.",
)
parser.add_argument(
"--new", type=str, required=True, help="Path to new mwm directory.",
)
parser.add_argument(
"--categories", type=str, required=True, help="Path to categories file.",
)
parser.add_argument(
"--checks",
action="store",
type=str,
nargs="*",
default=None,
help=f"Set of checks: {', '.join(c.name for c in MwmsChecks)}. "
f"By default, all checks will run.",
)
parser.add_argument(
"--level",
type=str,
required=False,
choices=("low", "medium", "hard", "strict"),
default="medium",
help="Messages level.",
)
parser.add_argument(
"--output",
type=str,
required=False,
default="",
help="Path to output file. stdout by default.",
)
return parser.parse_args()
def main():
args = get_args()
checks = {MwmsChecks[c] for c in args.checks} if args.checks else None
s = get_mwm_check_sets_and_filters(
args.old, args.new, checks, categories_path=args.categories
)
run_checks_and_print_results(
s,
CheckType[args.level],
file=open(args.output, "w") if args.output else sys.stdout,
)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,309 @@
import os
import sys
from abc import ABC
from abc import abstractmethod
from collections import namedtuple
from enum import Enum
from functools import lru_cache
from typing import Any
from typing import Callable
from typing import List
ResLine = namedtuple("ResLine", ["previous", "current", "diff", "arrow"])
class Arrow(Enum):
zero = 0
down = 1
up = 2
ROW_TO_STR = {
Arrow.zero: "◄►",
Arrow.down: "",
Arrow.up: "",
}
def norm(value):
if isinstance(value, (int, float)):
return abs(value)
elif hasattr(value, "__len__"):
return len(value)
elif hasattr(value, "norm"):
return value.norm()
assert False, type(value)
def get_rel(r: ResLine) -> bool:
rel = 0.0
if r.arrow != Arrow.zero:
prev = norm(r.previous)
if prev == 0:
rel = 100.0
else:
rel = norm(r.diff) * 100.0 / prev
return rel
class Check(ABC):
"""
Base class for any checks.
Usual flow:
# Create check object.
check = AnyCheck("ExampleCheck")
# Do work.
check.check()
# Get results and process them
raw_result = check.get_result()
process_result(raw_result)
# or print result
check.print()
"""
def __init__(self, name: str):
self.name = name
def print(self, silent_if_no_results=False, filt=None, file=sys.stdout):
s = self.formatted_string(silent_if_no_results, filt)
if s:
print(s, file=file)
@abstractmethod
def check(self):
"""
Performs a logic of the check.
"""
pass
@abstractmethod
def get_result(self) -> Any:
"""
Returns a raw result of the check.
"""
pass
@abstractmethod
def formatted_string(self, silent_if_no_results=False, *args, **kwargs) -> str:
"""
Returns a formatted string of a raw result of the check.
"""
pass
class CompareCheckBase(Check, ABC):
def __init__(self, name: str):
super().__init__(name)
self.op: Callable[
[Any, Any], Any
] = lambda previous, current: current - previous
self.do: Callable[[Any], Any] = lambda x: x
self.zero: Any = 0
self.diff_format: Callable[[Any], str] = lambda x: str(x)
self.format: Callable[[Any], str] = lambda x: str(x)
self.filt: Callable[[Any], bool] = lambda x: True
def set_op(self, op: Callable[[Any, Any], Any]):
self.op = op
def set_do(self, do: Callable[[Any], Any]):
self.do = do
def set_zero(self, zero: Any):
self.zero = zero
def set_diff_format(self, diff_format: Callable[[Any], str]):
self.diff_format = diff_format
def set_format(self, format: Callable[[Any], str]):
self.format = format
def set_filt(self, filt: Callable[[Any], bool]):
self.filt = filt
class CompareCheck(CompareCheckBase):
def __init__(
self, name: str, old: Any, new: Any,
):
super().__init__(name)
self.old = old
self.new = new
self.result = None
def set_op(self, op: Callable[[Any, Any], Any]):
self.op = op
def set_do(self, do: Callable[[Any], Any]):
self.do = do
def set_zero(self, zero: Any):
self.zero = zero
def get_result(self) -> ResLine:
return self.result
def check(self):
previous = self.do(self.old)
if previous is None:
return False
current = self.do(self.new)
if current is None:
return False
diff = self.op(previous, current)
if diff is None:
return False
arrow = Arrow.zero
if diff > self.zero:
arrow = Arrow.up
elif diff < self.zero:
arrow = Arrow.down
self.result = ResLine(
previous=previous, current=current, diff=diff, arrow=arrow
)
return True
def formatted_string(self, silent_if_no_results=False, *args, **kwargs) -> str:
assert self.result
if silent_if_no_results and self.result.arrow == Arrow.zero:
return ""
rel = get_rel(self.result)
return (
f"{self.name}: {ROW_TO_STR[self.result.arrow]} {rel:.2f}% "
f"[{self.format(self.result.previous)}"
f"{self.format(self.result.current)}: "
f"{self.diff_format(self.result.diff)}]"
)
class CompareCheckSet(CompareCheckBase):
def __init__(self, name: str):
super().__init__(name)
self.checks = []
def add_check(self, check: Check):
self.checks.append(check)
def set_op(self, op: Callable[[Any, Any], Any]):
for c in self.checks:
c.set_op(op)
def set_do(self, do: Callable[[Any], Any]):
for c in self.checks:
c.set_do(do)
def set_zero(self, zero: Any):
for c in self.checks:
c.set_zero(zero)
def set_diff_format(self, diff_format: Callable[[Any], str]):
for c in self.checks:
c.set_diff_format(diff_format)
def set_format(self, format: Callable[[Any], str]):
for c in self.checks:
c.set_format(format)
def check(self):
for c in self.checks:
c.check()
def get_result(self,) -> List[ResLine]:
return [c.get_result() for c in self._with_result()]
def formatted_string(self, silent_if_no_results=False, filt=None, _offset=0) -> str:
sets = filter(lambda c: isinstance(c, CompareCheckSet), self._with_result())
checks = filter(lambda c: isinstance(c, CompareCheck), self._with_result())
checks = sorted(checks, key=lambda c: norm(c.get_result().diff), reverse=True)
if filt is None:
filt = self.filt
checks = filter(lambda c: filt(c.get_result()), checks)
sets = list(sets)
checks = list(checks)
no_results = not checks and not sets
if silent_if_no_results and no_results:
return ""
head = [
f"{' ' * _offset}Check set[{self.name}]:",
]
lines = []
if no_results:
lines.append(f"{' ' * (_offset + 2)}No results.")
for c in checks:
s = c.formatted_string(silent_if_no_results, filt, _offset + 2)
if s:
lines.append(f"{' ' * (_offset + 2)}{s}")
for s in sets:
s = s.formatted_string(silent_if_no_results, filt, _offset + 2)
if s:
lines.append(s)
if not lines:
return ""
head += lines
return "\n".join(head) + "\n"
def _with_result(self):
return (c for c in self.checks if c.get_result() is not None)
@lru_cache(maxsize=None)
def _get_and_check_files(old_path, new_path, ext):
files = list(filter(lambda f: f.endswith(ext), os.listdir(old_path)))
s = set(files) ^ set(filter(lambda f: f.endswith(ext), os.listdir(new_path)))
assert len(s) == 0, s
return files
def build_check_set_for_files(
name: str,
old_path: str,
new_path: str,
*,
ext: str = "",
recursive: bool = False,
op: Callable[[Any, Any], Any] = lambda previous, current: current - previous,
do: Callable[[Any], Any] = lambda x: x,
zero: Any = 0,
diff_format: Callable[[Any], str] = lambda x: str(x),
format: Callable[[Any], str] = lambda x: str(x),
):
if recursive:
raise NotImplementedError(
f"CheckSetBuilderForFiles is not implemented for recursive."
)
cs = CompareCheckSet(name)
for file in _get_and_check_files(old_path, new_path, ext):
cs.add_check(
CompareCheck(
file, os.path.join(old_path, file), os.path.join(new_path, file)
)
)
cs.set_do(do)
cs.set_op(op)
cs.set_zero(zero)
cs.set_diff_format(diff_format)
cs.set_format(format)
return cs

View file

@ -0,0 +1,34 @@
import re
from maps_generator.checks import check
from maps_generator.checks.logs import logs_reader
ADDR_PATTERN = re.compile(
r".*BuildAddressTable\(\) Address: "
r"Matched percent (?P<matched_percent>[0-9.]+) "
r"Total: (?P<total>\d+) "
r"Missing: (?P<missing>\d+)"
)
def get_addresses_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
"""
Returns an addresses check set, that checks a difference in 'matched_percent'
addresses of BuildAddressTable between old logs and new logs.
"""
def do(path: str):
log = logs_reader.Log(path)
if not log.is_mwm_log:
return None
found = logs_reader.find_and_parse(log.lines, ADDR_PATTERN)
if not found:
return None
d = found[0][0]
return float(d["matched_percent"])
return check.build_check_set_for_files(
"Addresses check", old_path, new_path, ext=".log", do=do
)

View file

@ -0,0 +1,58 @@
from collections import defaultdict
from maps_generator.checks import check
from maps_generator.checks.check_mwm_types import count_all_types
from mwm import NAME_TO_INDEX_TYPE_MAPPING
def parse_groups(path):
groups = defaultdict(set)
with open(path) as f:
for line in f:
line = line.strip()
if line.startswith("#"):
continue
if line.startswith("@"):
continue
array = line.split("@", maxsplit=1)
if len(array) == 2:
types_str, categories = array
types_int = {
NAME_TO_INDEX_TYPE_MAPPING[t]
for t in types_str.strip("|").split("|")
}
for category in categories.split("|"):
category = category.replace("@", "", 1)
groups[category].update(types_int)
return groups
def get_categories_check_set(
old_path: str, new_path: str, categories_path: str
) -> check.CompareCheckSet:
"""
Returns a categories check set, that checks a difference in a number of
objects of categories(from categories.txt) between old mwms and new mwms.
"""
cs = check.CompareCheckSet("Categories check")
def make_do(indexes):
def do(path):
all_types = count_all_types(path)
return sum(all_types[i] for i in indexes)
return do
for category, types in parse_groups(categories_path).items():
cs.add_check(
check.build_check_set_for_files(
f"Category {category} check",
old_path,
new_path,
ext=".mwm",
do=make_do(types),
)
)
return cs

View file

@ -0,0 +1,49 @@
import logging
from functools import lru_cache
from maps_generator.checks import check
from maps_generator.checks.logs import logs_reader
from maps_generator.generator.stages_declaration import stages
@lru_cache(maxsize=None)
def _get_log_stages(path):
log = logs_reader.Log(path)
return logs_reader.normalize_logs(logs_reader.split_into_stages(log))
def get_log_levels_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
"""
Returns a log levels check set, that checks a difference in a number of
message levels from warning and higher for each stage between old mwms
and new mwms.
"""
cs = check.CompareCheckSet("Log levels check")
def make_do(level, stage_name, cache={}):
def do(path):
for s in _get_log_stages(path):
if s.name == stage_name:
k = f"{path}:{stage_name}"
if k not in cache:
cache[k] = logs_reader.count_levels(s)
return cache[k][level]
return None
return do
for stage_name in (
stages.get_visible_stages_names() + stages.get_invisible_stages_names()
):
for level in (logging.CRITICAL, logging.ERROR, logging.WARNING):
cs.add_check(
check.build_check_set_for_files(
f"Stage {stage_name} - {logging.getLevelName(level)} check",
old_path,
new_path,
ext=".log",
do=make_do(level, stage_name),
)
)
return cs

View file

@ -0,0 +1,61 @@
from collections import defaultdict
from functools import lru_cache
from typing import Union
from maps_generator.checks import check
from mwm import Mwm
from mwm import NAME_TO_INDEX_TYPE_MAPPING
from mwm import readable_type
from mwm import type_index
@lru_cache(maxsize=None)
def count_all_types(path: str):
c = defaultdict(int)
for ft in Mwm(path, parse=False):
for t in ft.types():
c[t] += 1
return c
def get_mwm_type_check_set(
old_path: str, new_path: str, type_: Union[str, int]
) -> check.CompareCheckSet:
"""
Returns a mwm type check set, that checks a difference in a number of
type [type_] between old mwms and new mwms.
"""
if isinstance(type_, str):
type_ = type_index(type_)
assert type_ >= 0, type_
return check.build_check_set_for_files(
f"Types check [{readable_type(type_)}]",
old_path,
new_path,
ext=".mwm",
do=lambda path: count_all_types(path)[type_],
)
def get_mwm_types_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
"""
Returns a mwm types check set, that checks a difference in a number of
each type between old mwms and new mwms.
"""
cs = check.CompareCheckSet("Mwm types check")
def make_do(index):
return lambda path: count_all_types(path)[index]
for t_name, t_index in NAME_TO_INDEX_TYPE_MAPPING.items():
cs.add_check(
check.build_check_set_for_files(
f"Type {t_name} check",
old_path,
new_path,
ext=".mwm",
do=make_do(t_index),
)
)
return cs

View file

@ -0,0 +1,124 @@
import os
from functools import lru_cache
from maps_generator.checks import check
from mwm import Mwm
class SectionNames:
def __init__(self, sections):
self.sections = sections
def __sub__(self, other):
return SectionNames(
{k: self.sections[k] for k in set(self.sections) - set(other.sections)}
)
def __lt__(self, other):
if isinstance(other, int):
return len(self.sections) < other
elif isinstance(other, SectionNames):
return self.sections < other.sections
assert False, type(other)
def __gt__(self, other):
if isinstance(other, int):
return len(self.sections) > other
elif isinstance(other, SectionNames):
return self.sections > other.sections
assert False, type(other)
def __len__(self):
return len(self.sections)
def __str__(self):
return str(self.sections)
@lru_cache(maxsize=None)
def read_sections(path: str):
return Mwm(path, parse=False).sections_info()
def get_appeared_sections_check_set(
old_path: str, new_path: str
) -> check.CompareCheckSet:
return check.build_check_set_for_files(
f"Appeared sections check",
old_path,
new_path,
ext=".mwm",
do=lambda path: SectionNames(read_sections(path)),
diff_format=lambda s: ", ".join(f"{k}:{v.size}" for k, v in s.sections.items()),
format=lambda s: f"number of sections: {len(s.sections)}",
)
def get_disappeared_sections_check_set(
old_path: str, new_path: str
) -> check.CompareCheckSet:
return check.build_check_set_for_files(
f"Disappeared sections check",
old_path,
new_path,
ext=".mwm",
do=lambda path: SectionNames(read_sections(path)),
op=lambda previous, current: previous - current,
diff_format=lambda s: ", ".join(f"{k}:{v.size}" for k, v in s.sections.items()),
format=lambda s: f"number of sections: {len(s.sections)}",
)
def get_sections_existence_check_set(
old_path: str, new_path: str
) -> check.CompareCheckSet:
"""
Returns a sections existence check set, that checks appeared and
disappeared sections between old mwms and new mwms.
"""
cs = check.CompareCheckSet("Sections existence check")
cs.add_check(get_appeared_sections_check_set(old_path, new_path))
cs.add_check(get_disappeared_sections_check_set(old_path, new_path))
return cs
def _get_sections_set(path):
sections = set()
for file in os.listdir(path):
p = os.path.join(path, file)
if os.path.isfile(p) and file.endswith(".mwm"):
sections.update(read_sections(p).keys())
return sections
def get_sections_size_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
"""
Returns a sections size check set, that checks a difference in a size
of each sections of mwm between old mwms and new mwms.
"""
sections_set = _get_sections_set(old_path)
sections_set.update(_get_sections_set(new_path))
cs = check.CompareCheckSet("Sections size check")
def make_do(section):
def do(path):
sections = read_sections(path)
if section not in sections:
return None
return sections[section].size
return do
for section in sections_set:
cs.add_check(
check.build_check_set_for_files(
f"Size of {section} check",
old_path,
new_path,
ext=".mwm",
do=make_do(section),
)
)
return cs

View file

@ -0,0 +1,17 @@
import os
from maps_generator.checks import check
def get_size_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
"""
Returns a size check set, that checks a difference in a size of mwm between
old mwms and new mwms.
"""
return check.build_check_set_for_files(
"Size check",
old_path,
new_path,
ext=".mwm",
do=lambda path: os.path.getsize(path),
)

View file

@ -0,0 +1,167 @@
import sys
from collections import namedtuple
from enum import Enum
from typing import Callable
from typing import Mapping
from typing import Optional
from typing import Set
from typing import Tuple
from maps_generator.checks import check
from maps_generator.checks.check_addresses import get_addresses_check_set
from maps_generator.checks.check_categories import get_categories_check_set
from maps_generator.checks.check_log_levels import get_log_levels_check_set
from maps_generator.checks.check_mwm_types import get_mwm_type_check_set
from maps_generator.checks.check_mwm_types import get_mwm_types_check_set
from maps_generator.checks.check_sections import get_sections_existence_check_set
from maps_generator.checks.check_sections import get_sections_size_check_set
from maps_generator.checks.check_size import get_size_check_set
class CheckType(Enum):
low = 1
medium = 2
hard = 3
strict = 4
Threshold = namedtuple("Threshold", ["abs", "rel"])
_default_thresholds = {
CheckType.low: Threshold(abs=20, rel=20),
CheckType.medium: Threshold(abs=15, rel=15),
CheckType.hard: Threshold(abs=10, rel=10),
CheckType.strict: Threshold(abs=0, rel=0),
}
def set_thresholds(check_type_map: Mapping[CheckType, Threshold]):
global _default_thresholds
_default_thresholds = check_type_map
def make_tmap(
low: Optional[Tuple[float, float]] = None,
medium: Optional[Tuple[float, float]] = None,
hard: Optional[Tuple[float, float]] = None,
strict: Optional[Tuple[float, float]] = None,
):
thresholds = _default_thresholds.copy()
if low is not None:
thresholds[CheckType.low] = Threshold(*low)
if medium is not None:
thresholds[CheckType.medium] = Threshold(*medium)
if hard is not None:
thresholds[CheckType.hard] = Threshold(*hard)
if strict is not None:
thresholds[CheckType.strict] = Threshold(*strict)
return thresholds
def make_default_filter(check_type_map: Mapping[CheckType, Threshold] = None):
if check_type_map is None:
check_type_map = _default_thresholds
def maker(check_type: CheckType):
threshold = check_type_map[check_type]
def default_filter(r: check.ResLine):
return (
check.norm(r.diff) > threshold.abs and check.get_rel(r) > threshold.rel
)
return default_filter
return maker
class MwmsChecks(Enum):
sections_existence = 1
sections_size = 2
mwm_size = 3
types = 4
booking = 5
categories = 6
def get_mwm_check_sets_and_filters(
old_path: str, new_path: str, checks: Set[MwmsChecks] = None, **kwargs
) -> Mapping[check.Check, Callable]:
def need_add(t: MwmsChecks):
return checks is None or t in checks
m = {get_sections_existence_check_set(old_path, new_path): None}
if need_add(MwmsChecks.sections_size):
c = get_sections_size_check_set(old_path, new_path)
thresholds = make_tmap(low=(0, 20), medium=(0, 10), hard=(0, 5))
m[c] = make_default_filter(thresholds)
mb = 1 << 20
if need_add(MwmsChecks.mwm_size):
c = get_size_check_set(old_path, new_path)
thresholds = make_tmap(low=(2 * mb, 10), medium=(mb, 5), hard=(0.5 * mb, 2))
m[c] = make_default_filter(thresholds)
if need_add(MwmsChecks.types):
c = get_mwm_types_check_set(old_path, new_path)
thresholds = make_tmap(low=(500, 30), medium=(100, 20), hard=(100, 10))
m[c] = make_default_filter(thresholds)
if need_add(MwmsChecks.booking):
c = get_mwm_type_check_set(old_path, new_path, "sponsored-booking")
thresholds = make_tmap(low=(500, 20), medium=(50, 10), hard=(50, 5))
m[c] = make_default_filter(thresholds)
if need_add(MwmsChecks.categories):
c = get_categories_check_set(old_path, new_path, kwargs["categories_path"])
thresholds = make_tmap(low=(200, 20), medium=(50, 10), hard=(50, 5))
m[c] = make_default_filter(thresholds)
return m
class LogsChecks(Enum):
log_levels = 1
addresses = 2
def get_logs_check_sets_and_filters(
old_path: str, new_path: str, checks: Set[LogsChecks] = None
) -> Mapping[check.Check, Callable]:
def need_add(t: LogsChecks):
return checks is None or t in checks
m = {get_log_levels_check_set(old_path, new_path): None}
if need_add(LogsChecks.addresses):
c = get_addresses_check_set(old_path, new_path)
thresholds = make_tmap(low=(50, 20), medium=(20, 10), hard=(10, 5))
m[c] = make_default_filter(thresholds)
return m
def _print_header(file, header, width=100, s="="):
stars = s * ((width - len(header)) // 2)
rstars = stars
if 2 * len(stars) + len(header) < width:
rstars += s
print(stars, header, rstars, file=file)
def run_checks_and_print_results(
checks: Mapping[check.Check, Callable],
check_type: CheckType,
silent_if_no_results: bool = True,
file=sys.stdout,
):
for check, make_filt in checks.items():
check.check()
_print_header(file, check.name)
check.print(
silent_if_no_results=silent_if_no_results,
filt=None if make_filt is None else make_filt(check_type),
file=file,
)

View file

@ -0,0 +1,241 @@
import datetime
import logging
import os
import re
from collections import Counter
from collections import namedtuple
from enum import Enum
from pathlib import Path
from typing import List
from typing import Tuple
from typing import Union
import maps_generator.generator.env as env
from maps_generator.generator.stages import get_stage_type
from maps_generator.utils.algo import parse_timedelta
logger = logging.getLogger(__name__)
FLAGS = re.MULTILINE | re.DOTALL
GEN_LINE_PATTERN = re.compile(
r"^LOG\s+TID\((?P<tid>\d+)\)\s+(?P<level>[A-Z]+)\s+"
r"(?P<timestamp>[-.e0-9]+)\s+(?P<message>.+)$",
FLAGS,
)
GEN_LINE_CHECK_PATTERN = re.compile(
r"^TID\((?P<tid>\d+)\)\s+" r"ASSERT FAILED\s+(?P<message>.+)$", FLAGS
)
MAPS_GEN_LINE_PATTERN = re.compile(
r"^\[(?P<time_string>[0-9-:, ]+)\]\s+(?P<level>\w+)\s+"
r"(?P<module>\w+)\s+(?P<message>.+)$",
FLAGS,
)
STAGE_START_MSG_PATTERN = re.compile(r"^Stage (?P<name>\w+): start ...$")
STAGE_FINISH_MSG_PATTERN = re.compile(
r"^Stage (?P<name>\w+): finished in (?P<duration_string>.+)$"
)
LogLine = namedtuple("LogLine", ["timestamp", "level", "tid", "message", "type"])
LogStage = namedtuple("LogStage", ["name", "duration", "lines"])
class LogType(Enum):
gen = 1
maps_gen = 2
class Log:
def __init__(self, path: str):
self.path = Path(path)
self.name = self.path.stem
self.is_stage_log = False
self.is_mwm_log = False
try:
get_stage_type(self.name)
self.is_stage_log = True
except AttributeError:
if self.name in env.COUNTRIES_NAMES or self.name in env.WORLDS_NAMES:
self.is_mwm_log = True
self.lines = self._parse_lines()
def _parse_lines(self) -> List[LogLine]:
logline = ""
state = None
lines = []
base_timestamp = 0.0
def try_parse_and_insert():
nonlocal logline
logline = logline.strip()
if not logline:
return
nonlocal base_timestamp
line = None
if state == LogType.gen:
line = Log._parse_gen_line(logline, base_timestamp)
elif state == LogType.maps_gen:
line = Log._parse_maps_gen_line(logline)
base_timestamp = line.timestamp
if line is not None:
lines.append(line)
else:
logger.warn(f"{self.name}: line was not parsed: {logline}")
logline = ""
with self.path.open() as logfile:
for line in logfile:
if line.startswith("LOG") or line.startswith("TID"):
try_parse_and_insert()
state = LogType.gen
elif line.startswith("["):
try_parse_and_insert()
state = LogType.maps_gen
logline += line
try_parse_and_insert()
return lines
@staticmethod
def _parse_gen_line(line: str, base_time: float = 0.0) -> LogLine:
m = GEN_LINE_PATTERN.match(line)
if m:
return LogLine(
timestamp=base_time + float(m["timestamp"]),
level=logging.getLevelName(m["level"]),
tid=int(m["tid"]),
message=m["message"],
type=LogType.gen,
)
m = GEN_LINE_CHECK_PATTERN.match(line)
if m:
return LogLine(
timestamp=None,
level=logging.getLevelName("CRITICAL"),
tid=None,
message=m["message"],
type=LogType.gen,
)
assert False, line
@staticmethod
def _parse_maps_gen_line(line: str) -> LogLine:
m = MAPS_GEN_LINE_PATTERN.match(line)
time_string = m["time_string"].split(",")[0]
timestamp = datetime.datetime.strptime(
time_string, logging.Formatter.default_time_format
).timestamp()
if m:
return LogLine(
timestamp=float(timestamp),
level=logging.getLevelName(m["level"]),
tid=None,
message=m["message"],
type=LogType.maps_gen,
)
assert False, line
class LogsReader:
def __init__(self, path: str):
self.path = os.path.abspath(os.path.expanduser(path))
def __iter__(self):
for filename in os.listdir(self.path):
if filename.endswith(".log"):
yield Log(os.path.join(self.path, filename))
def split_into_stages(log: Log) -> List[LogStage]:
log_stages = []
name = None
lines = []
for line in log.lines:
if line.message.startswith("Stage"):
m = STAGE_START_MSG_PATTERN.match(line.message)
if m:
if name is not None:
logger.warn(f"{log.name}: stage {name} has not finish line.")
log_stages.append(LogStage(name=name, duration=None, lines=lines))
name = m["name"]
m = STAGE_FINISH_MSG_PATTERN.match(line.message)
if m:
# assert name == m["name"], line
duration = parse_timedelta(m["duration_string"])
log_stages.append(LogStage(name=name, duration=duration, lines=lines))
name = None
lines = []
else:
lines.append(line)
if name is not None:
logger.warn(f"{log.name}: stage {name} has not finish line.")
log_stages.append(LogStage(name=name, duration=None, lines=lines))
return log_stages
def _is_worse(lhs: LogStage, rhs: LogStage) -> bool:
if (lhs.duration is None) ^ (rhs.duration is None):
return lhs.duration is None
if len(rhs.lines) > len(lhs.lines):
return True
return rhs.duration > lhs.duration
def normalize_logs(llogs: List[LogStage]) -> List[LogStage]:
normalized_logs = []
buckets = {}
for log in llogs:
if log.name in buckets:
if _is_worse(normalized_logs[buckets[log.name]], log):
normalized_logs[buckets[log.name]] = log
else:
normalized_logs.append(log)
buckets[log.name] = len(normalized_logs) - 1
return normalized_logs
def count_levels(logs: Union[List[LogLine], LogStage]) -> Counter:
if isinstance(logs, list):
return Counter(log.level for log in logs)
if isinstance(logs, LogStage):
return count_levels(logs.lines)
assert False, f"Type {type(logs)} is unsupported."
def find_and_parse(
logs: Union[List[LogLine], LogStage], pattern: Union[str, type(re.compile(""))],
) -> List[Tuple[dict, str]]:
if isinstance(pattern, str):
pattern = re.compile(pattern, FLAGS)
if isinstance(logs, list):
found = []
for log in logs:
m = pattern.match(log.message)
if m:
found.append((m.groupdict(), log))
return found
if isinstance(logs, LogStage):
return find_and_parse(logs.lines, pattern)
assert False, f"Type {type(logs)} is unsupported."

View file

@ -0,0 +1,37 @@
import argparse
from maps_generator.generator.statistics import diff
from maps_generator.generator.statistics import read_types
def get_args():
parser = argparse.ArgumentParser(
description="This script prints the difference between old_stats.json and new_stats.json."
)
parser.add_argument(
"--old",
default="",
type=str,
required=True,
help="Path to old file with map generation statistics.",
)
parser.add_argument(
"--new",
default="",
type=str,
required=True,
help="Path to new file with map generation statistics.",
)
return parser.parse_args()
def main():
args = get_args()
old = read_types(args.old)
new = read_types(args.new)
for line in diff(new, old):
print(";".join(str(x) for x in line))
if __name__ == "__main__":
main()

View file

@ -0,0 +1,68 @@
"""
This file contains api for osmfilter and generator_tool to generate coastline.
"""
import os
import subprocess
from maps_generator.generator import settings
from maps_generator.generator.env import Env
from maps_generator.generator.gen_tool import run_gen_tool
from maps_generator.generator.osmtools import osmfilter
def filter_coastline(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
):
osmfilter(
name_executable,
in_file,
out_file,
output=output,
error=error,
keep="",
keep_ways="natural=coastline",
keep_nodes="capital=yes place=town =city",
)
def make_coastline(env: Env):
coastline_o5m = os.path.join(env.paths.coastline_path, "coastline.o5m")
filter_coastline(
env[settings.OSM_TOOL_FILTER],
env.paths.planet_o5m,
coastline_o5m,
output=env.get_subprocess_out(),
error=env.get_subprocess_out(),
)
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
data_path=env.paths.data_path,
intermediate_data_path=env.paths.coastline_path,
osm_file_type="o5m",
osm_file_name=coastline_o5m,
node_storage=env.node_storage,
user_resource_path=env.paths.user_resource_path,
preprocess=True,
)
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
data_path=env.paths.data_path,
intermediate_data_path=env.paths.coastline_path,
osm_file_type="o5m",
osm_file_name=coastline_o5m,
node_storage=env.node_storage,
user_resource_path=env.paths.user_resource_path,
make_coasts=True,
fail_on_coasts=True,
threads_count=settings.THREADS_COUNT,
)

View file

@ -0,0 +1,100 @@
from pathlib import Path
import subprocess
import warnings
class Status:
NO_NEW_VERSION = "Failed: new version doesn't exist: {new}"
INTERNAL_ERROR = "Failed: internal error (C++ module) while calculating"
NO_OLD_VERSION = "Skipped: old version doesn't exist: {old}"
NOTHING_TO_DO = "Skipped: output already exists: {out}"
OK = "Succeeded: calculated {out}: {diff_size} out of {new_size} bytes"
TOO_LARGE = "Cancelled: {out}: diff {diff_size} > new version {new_size}"
@classmethod
def is_error(cls, status):
return status == cls.NO_NEW_VERSION or status == cls.INTERNAL_ERROR
def calculate_diff(params):
diff_tool, new, old, out = params["tool"], params["new"], params["old"], params["out"]
if not new.exists():
return Status.NO_NEW_VERSION, params
if not old.exists():
return Status.NO_OLD_VERSION, params
status = Status.OK
if out.exists():
status = Status.NOTHING_TO_DO
else:
res = subprocess.run([diff_tool.as_posix(), "make", old, new, out])
if res.returncode != 0:
return Status.INTERNAL_ERROR, params
diff_size = out.stat().st_size
new_size = new.stat().st_size
if diff_size > new_size:
status = Status.TOO_LARGE
params.update({
"diff_size": diff_size,
"new_size": new_size
})
return status, params
def mwm_diff_calculation(data_dir, logger, depth):
data = list(data_dir.get_mwms())[:depth]
results = map(calculate_diff, data)
for status, params in results:
if Status.is_error(status):
raise Exception(status.format(**params))
logger.info(status.format(**params))
class DataDir(object):
def __init__(self, diff_tool, mwm_name, new_version_dir, old_version_root_dir):
self.diff_tool_path = Path(diff_tool)
self.mwm_name = mwm_name
self.diff_name = self.mwm_name + ".mwmdiff"
self.new_version_dir = Path(new_version_dir)
self.new_version_path = Path(new_version_dir, mwm_name)
self.old_version_root_dir = Path(old_version_root_dir)
def get_mwms(self):
old_versions = sorted(
self.old_version_root_dir.glob("[0-9]*"),
reverse=True
)
for old_version_dir in old_versions:
if (old_version_dir != self.new_version_dir and
old_version_dir.is_dir()):
diff_dir = Path(self.new_version_dir, old_version_dir.name)
diff_dir.mkdir(exist_ok=True)
yield {
"tool": self.diff_tool_path,
"new": self.new_version_path,
"old": Path(old_version_dir, self.mwm_name),
"out": Path(diff_dir, self.diff_name)
}
if __name__ == "__main__":
import logging
import sys
logger = logging.getLogger()
logger.addHandler(logging.StreamHandler(stream=sys.stdout))
logger.setLevel(logging.DEBUG)
data_dir = DataDir(
mwm_name=sys.argv[1], new_version_dir=sys.argv[2],
old_version_root_dir=sys.argv[3],
)
mwm_diff_calculation(data_dir, logger, depth=1)

View file

@ -0,0 +1,582 @@
import collections
import datetime
import logging
import logging.config
import os
import shutil
import sys
from functools import wraps
from typing import Any
from typing import AnyStr
from typing import Callable
from typing import Dict
from typing import List
from typing import Optional
from typing import Set
from typing import Type
from typing import Union
from maps_generator.generator import settings
from maps_generator.generator import status
from maps_generator.generator.osmtools import build_osmtools
from maps_generator.generator.stages import Stage
from maps_generator.utils.file import find_executable
from maps_generator.utils.file import is_executable
from maps_generator.utils.file import make_symlink
logger = logging.getLogger("maps_generator")
WORLD_NAME = "World"
WORLD_COASTS_NAME = "WorldCoasts"
WORLDS_NAMES = {WORLD_NAME, WORLD_COASTS_NAME}
def get_all_countries_list(borders_path: AnyStr) -> List[AnyStr]:
"""Returns all countries including World and WorldCoasts."""
return [
f.replace(".poly", "")
for f in os.listdir(borders_path)
if os.path.isfile(os.path.join(borders_path, f))
] + list(WORLDS_NAMES)
def create_if_not_exist_path(path: AnyStr) -> bool:
"""Creates directory if it doesn't exist."""
try:
os.makedirs(path)
logger.info(f"Create {path} ...")
return True
except FileExistsError:
return False
def create_if_not_exist(func: Callable[..., AnyStr]) -> Callable[..., AnyStr]:
"""
It's a decorator, that wraps func in create_if_not_exist_path,
that returns a path.
"""
@wraps(func)
def wrapper(*args, **kwargs):
path = func(*args, **kwargs)
create_if_not_exist_path(path)
return path
return wrapper
class Version:
"""It's used for writing and reading a generation version."""
@staticmethod
def write(out_path: AnyStr, version: AnyStr):
with open(os.path.join(out_path, settings.VERSION_FILE_NAME), "w") as f:
f.write(str(version))
@staticmethod
def read(version_path: AnyStr) -> int:
with open(version_path) as f:
line = f.readline().strip()
try:
return int(line)
except ValueError:
logger.exception(f"Cast '{line}' to int error.")
return 0
def find_last_build_dir(hint: Optional[AnyStr] = None) -> Optional[AnyStr]:
"""
It tries to find a last generation directory. If it's found function
returns path of last generation directory. Otherwise returns None.
"""
if hint is not None:
p = os.path.join(settings.MAIN_OUT_PATH, hint)
return hint if os.path.exists(p) else None
try:
paths = [
os.path.join(settings.MAIN_OUT_PATH, f)
for f in os.listdir(settings.MAIN_OUT_PATH)
]
except FileNotFoundError:
logger.exception(f"{settings.MAIN_OUT_PATH} not found.")
return None
versions = []
for path in paths:
version_path = os.path.join(path, settings.VERSION_FILE_NAME)
if not os.path.isfile(version_path):
versions.append(0)
else:
versions.append(Version.read(version_path))
pairs = sorted(zip(paths, versions), key=lambda p: p[1], reverse=True)
return None if not pairs or pairs[0][1] == 0 else pairs[0][0].split(os.sep)[-1]
class PathProvider:
"""
PathProvider is used for building paths for a maps generation.
"""
def __init__(self, build_path: AnyStr, build_name:AnyStr, mwm_version: AnyStr):
self.build_path = build_path
self.build_name = build_name
self.mwm_version = mwm_version
create_if_not_exist_path(self.build_path)
@property
@create_if_not_exist
def intermediate_data_path(self) -> AnyStr:
"""
intermediate_data_path contains intermediate files,
for example downloaded external files, that are needed for generation,
*.mwm.tmp files, etc.
"""
return os.path.join(self.build_path, "intermediate_data")
@property
@create_if_not_exist
def cache_path(self) -> AnyStr:
"""cache_path contains caches for nodes, ways, relations."""
if not settings.CACHE_PATH:
return self.intermediate_data_path
return os.path.join(settings.CACHE_PATH, self.build_name)
@property
@create_if_not_exist
def data_path(self) -> AnyStr:
"""It's a synonym for intermediate_data_path."""
return self.intermediate_data_path
@property
@create_if_not_exist
def intermediate_tmp_path(self) -> AnyStr:
"""intermediate_tmp_path contains *.mwm.tmp files."""
return os.path.join(self.intermediate_data_path, "tmp")
@property
@create_if_not_exist
def mwm_path(self) -> AnyStr:
"""mwm_path contains *.mwm files."""
return os.path.join(self.build_path, self.mwm_version)
@property
@create_if_not_exist
def log_path(self) -> AnyStr:
"""mwm_path log files."""
return os.path.join(self.build_path, "logs")
@property
@create_if_not_exist
def generation_borders_path(self) -> AnyStr:
"""
generation_borders_path contains *.poly files, that define
which .mwm files are generated.
"""
return os.path.join(self.intermediate_data_path, "borders")
@property
@create_if_not_exist
def draft_path(self) -> AnyStr:
"""draft_path is used for saving temporary intermediate files."""
return os.path.join(self.build_path, "draft")
@property
@create_if_not_exist
def osm2ft_path(self) -> AnyStr:
"""osm2ft_path contains osmId<->ftId mappings."""
return os.path.join(self.build_path, "osm2ft")
@property
@create_if_not_exist
def coastline_path(self) -> AnyStr:
"""coastline_path is used for a coastline generation."""
return os.path.join(self.intermediate_data_path, "coasts")
@property
@create_if_not_exist
def coastline_tmp_path(self) -> AnyStr:
"""coastline_tmp_path is used for a coastline generation."""
return os.path.join(self.coastline_path, "tmp")
@property
@create_if_not_exist
def status_path(self) -> AnyStr:
"""status_path contains status files."""
return os.path.join(self.build_path, "status")
@property
@create_if_not_exist
def descriptions_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "descriptions")
@property
@create_if_not_exist
def stats_path(self) -> AnyStr:
return os.path.join(self.build_path, "stats")
@property
@create_if_not_exist
def transit_path(self) -> AnyStr:
return self.intermediate_data_path
@property
def transit_path_experimental(self) -> AnyStr:
return (
os.path.join(self.intermediate_data_path, "transit_from_gtfs")
if settings.TRANSIT_URL
else ""
)
@property
def world_roads_path(self) -> AnyStr:
return (
os.path.join(self.intermediate_data_path, "world_roads.txt")
if settings.NEED_BUILD_WORLD_ROADS
else ""
)
@property
def planet_osm_pbf(self) -> AnyStr:
return os.path.join(self.build_path, f"{settings.PLANET}.osm.pbf")
@property
def planet_o5m(self) -> AnyStr:
return os.path.join(self.build_path, f"{settings.PLANET}.o5m")
@property
def world_roads_o5m(self) -> AnyStr:
return os.path.join(self.build_path, "world_roads.o5m")
@property
def main_status_path(self) -> AnyStr:
return os.path.join(self.status_path, status.with_stat_ext("stages"))
@property
def packed_polygons_path(self) -> AnyStr:
return os.path.join(self.mwm_path, "packed_polygons.bin")
@property
def localads_path(self) -> AnyStr:
return os.path.join(self.build_path, f"localads_{self.mwm_version}")
@property
def types_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "types.txt")
@property
def external_resources_path(self) -> AnyStr:
return os.path.join(self.mwm_path, "external_resources.txt")
@property
def id_to_wikidata_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "id_to_wikidata.csv")
@property
def wiki_url_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "wiki_urls.txt")
@property
def ugc_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "ugc_db.sqlite3")
@property
def hotels_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "hotels.csv")
@property
def promo_catalog_cities_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "promo_catalog_cities.json")
@property
def promo_catalog_countries_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "promo_catalog_countries.json")
@property
def popularity_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "popular_places.csv")
@property
def subway_path(self) -> AnyStr:
return os.path.join(
self.intermediate_data_path, "mapsme_osm_subways.transit.json"
)
@property
def food_paths(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "ids_food.json")
@property
def food_translations_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "translations_food.json")
@property
def cities_boundaries_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "cities_boundaries.bin")
@property
def hierarchy_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "hierarchy.txt")
@property
def old_to_new_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "old_vs_new.csv")
@property
def borders_to_osm_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "borders_vs_osm.csv")
@property
def countries_synonyms_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "countries_synonyms.csv")
@property
def counties_txt_path(self) -> AnyStr:
return os.path.join(self.mwm_path, "countries.txt")
@property
def user_resource_path(self) -> AnyStr:
return settings.USER_RESOURCE_PATH
@staticmethod
def srtm_path() -> AnyStr:
return settings.SRTM_PATH
@staticmethod
def isolines_path() -> AnyStr:
return settings.ISOLINES_PATH
@staticmethod
def addresses_path() -> AnyStr:
return settings.ADDRESSES_PATH
@staticmethod
def borders_path() -> AnyStr:
return os.path.join(settings.USER_RESOURCE_PATH, "borders")
@staticmethod
@create_if_not_exist
def tmp_dir():
return settings.TMPDIR
COUNTRIES_NAMES = set(get_all_countries_list(PathProvider.borders_path()))
class Env:
"""
Env provides a generation environment. It sets up instruments and paths,
that are used for a maps generation. It stores state of the maps generation.
"""
def __init__(
self,
countries: Optional[List[AnyStr]] = None,
production: bool = False,
build_name: Optional[AnyStr] = None,
build_suffix: AnyStr = "",
skipped_stages: Optional[Set[Type[Stage]]] = None,
force_download_files: bool = False,
):
self.setup_logging()
logger.info("Start setup ...")
os.environ["TMPDIR"] = PathProvider.tmp_dir()
for k, v in self.setup_osm_tools().items():
setattr(self, k, v)
self.production = production
self.force_download_files = force_download_files
self.countries = countries
self.skipped_stages = set() if skipped_stages is None else skipped_stages
if self.countries is None:
self.countries = get_all_countries_list(PathProvider.borders_path())
self.node_storage = settings.NODE_STORAGE
version_format = "%Y_%m_%d__%H_%M_%S"
suffix_div = "-"
self.dt = None
if build_name is None:
self.dt = datetime.datetime.now()
build_name = self.dt.strftime(version_format)
if build_suffix:
build_name = f"{build_name}{suffix_div}{build_suffix}"
else:
s = build_name.split(suffix_div, maxsplit=1)
if len(s) == 1:
s.append("")
date_str, build_suffix = s
self.dt = datetime.datetime.strptime(date_str, version_format)
self.build_suffix = build_suffix
self.mwm_version = self.dt.strftime("%y%m%d")
self.planet_version = self.dt.strftime("%s")
self.build_path = os.path.join(settings.MAIN_OUT_PATH, build_name)
self.build_name = build_name
self.gen_tool = self.setup_generator_tool()
if WORLD_NAME in self.countries:
self.world_roads_builder_tool = self.setup_world_roads_builder_tool()
self.diff_tool = self.setup_mwm_diff_tool()
logger.info(f"Build name is {self.build_name}.")
logger.info(f"Build path is {self.build_path}.")
self.paths = PathProvider(self.build_path, self.build_name, self.mwm_version)
Version.write(self.build_path, self.planet_version)
self.setup_borders()
self.setup_osm2ft()
if self.force_download_files:
for item in os.listdir(self.paths.status_path):
if item.endswith(".download"):
os.remove(os.path.join(self.paths.status_path, item))
self.main_status = status.Status()
# self.countries_meta stores log files and statuses for each country.
self.countries_meta = collections.defaultdict(dict)
self.subprocess_out = None
self.subprocess_countries_out = {}
printed_countries = ", ".join(self.countries)
if len(self.countries) > 50:
printed_countries = (
f"{', '.join(self.countries[:25])}, ..., "
f"{', '.join(self.countries[-25:])}"
)
logger.info(
f"The following {len(self.countries)} maps will build: "
f"{printed_countries}."
)
logger.info("Finish setup")
def __getitem__(self, item):
return self.__dict__[item]
def get_tmp_mwm_names(self) -> List[AnyStr]:
tmp_ext = ".mwm.tmp"
existing_names = set()
for f in os.listdir(self.paths.intermediate_tmp_path):
path = os.path.join(self.paths.intermediate_tmp_path, f)
if f.endswith(tmp_ext) and os.path.isfile(path):
name = f.replace(tmp_ext, "")
if name in self.countries:
existing_names.add(name)
return [c for c in self.countries if c in existing_names]
def add_skipped_stage(self, stage: Union[Type[Stage], Stage]):
if isinstance(stage, Stage):
stage = stage.__class__
self.skipped_stages.add(stage)
def is_accepted_stage(self, stage: Union[Type[Stage], Stage]) -> bool:
if isinstance(stage, Stage):
stage = stage.__class__
return stage not in self.skipped_stages
def finish(self):
self.main_status.finish()
def finish_mwm(self, mwm_name: AnyStr):
self.countries_meta[mwm_name]["status"].finish()
def set_subprocess_out(self, subprocess_out: Any, country: Optional[AnyStr] = None):
if country is None:
self.subprocess_out = subprocess_out
else:
self.subprocess_countries_out[country] = subprocess_out
def get_subprocess_out(self, country: Optional[AnyStr] = None):
if country is None:
return self.subprocess_out
else:
return self.subprocess_countries_out[country]
@staticmethod
def setup_logging():
def exception_handler(type, value, tb):
logger.exception(
f"Uncaught exception: {str(value)}", exc_info=(type, value, tb)
)
logging.config.dictConfig(settings.LOGGING)
sys.excepthook = exception_handler
@staticmethod
def setup_generator_tool() -> AnyStr:
logger.info("Check generator tool ...")
exceptions = []
for gen_tool in settings.POSSIBLE_GEN_TOOL_NAMES:
gen_tool_path = shutil.which(gen_tool)
if gen_tool_path is None:
logger.info(f"Looking for generator tool in {settings.BUILD_PATH} ...")
try:
gen_tool_path = find_executable(settings.BUILD_PATH, gen_tool)
except FileNotFoundError as e:
exceptions.append(e)
continue
logger.info(f"Generator tool found - {gen_tool_path}")
return gen_tool_path
raise Exception(exceptions)
@staticmethod
def setup_world_roads_builder_tool() -> AnyStr:
logger.info(f"Check world_roads_builder_tool. Looking for it in {settings.BUILD_PATH} ...")
world_roads_builder_tool_path = find_executable(settings.BUILD_PATH, "world_roads_builder_tool")
logger.info(f"world_roads_builder_tool found - {world_roads_builder_tool_path}")
return world_roads_builder_tool_path
@staticmethod
def setup_mwm_diff_tool() -> AnyStr:
logger.info(f"Check mwm_diff_tool. Looking for it in {settings.BUILD_PATH} ...")
mwm_diff_tool_path = find_executable(settings.BUILD_PATH, "mwm_diff_tool")
logger.info(f"mwm_diff_tool found - {mwm_diff_tool_path}")
return mwm_diff_tool_path
@staticmethod
def setup_osm_tools() -> Dict[AnyStr, AnyStr]:
path = settings.OSM_TOOLS_PATH
osm_tool_names = [
settings.OSM_TOOL_CONVERT,
settings.OSM_TOOL_UPDATE,
settings.OSM_TOOL_FILTER,
]
logger.info("Check for the osmctools binaries...")
# Check in the configured path first.
tmp_paths = [os.path.join(path, t) for t in osm_tool_names]
if not all([is_executable(t) for t in tmp_paths]):
# Or use a system-wide installation.
tmp_paths = [shutil.which(t) for t in osm_tool_names]
if all([is_executable(t) for t in tmp_paths]):
osm_tool_paths = dict(zip(osm_tool_names, tmp_paths))
logger.info(f"Found osmctools at {', '.join(osm_tool_paths.values())}")
return osm_tool_paths
logger.info(f"osmctools are not found, building from the sources into {path}...")
os.makedirs(path, exist_ok=True)
return build_osmtools(settings.OSM_TOOLS_SRC_PATH)
def setup_borders(self):
temp_borders = self.paths.generation_borders_path
borders = PathProvider.borders_path()
for x in self.countries:
if x in WORLDS_NAMES:
continue
poly = f"{x}.poly"
make_symlink(os.path.join(borders, poly), os.path.join(temp_borders, poly))
make_symlink(temp_borders, os.path.join(self.paths.draft_path, "borders"))
def setup_osm2ft(self):
for x in os.listdir(self.paths.osm2ft_path):
p = os.path.join(self.paths.osm2ft_path, x)
if os.path.isfile(p) and x.endswith(".mwm.osm2ft"):
shutil.move(p, os.path.join(self.paths.mwm_path, x))

View file

@ -0,0 +1,58 @@
import os
import subprocess
class MapsGeneratorError(Exception):
pass
class OptionNotFound(MapsGeneratorError):
pass
class ValidationError(MapsGeneratorError):
pass
class ContinueError(MapsGeneratorError):
pass
class SkipError(MapsGeneratorError):
pass
class BadExitStatusError(MapsGeneratorError):
pass
class ParseError(MapsGeneratorError):
pass
class FailedTest(MapsGeneratorError):
pass
def wait_and_raise_if_fail(p):
if p.wait() != os.EX_OK:
if type(p) is subprocess.Popen:
args = p.args
stdout = p.stdout
stderr = p.stderr
logs = None
errors = None
if type(stdout) is not type(None):
logs = stdout.read(256).decode()
if type(stderr) is not type(None):
errors = stderr.read(256).decode()
if errors != logs:
logs += " and " + errors
msg = f"The launch of {args.pop(0)} failed.\nArguments used: {' '.join(args)}\nSee details in {logs}"
raise BadExitStatusError(msg)
else:
args = p.args
logs = p.output.name
if p.error.name != logs:
logs += " and " + p.error.name
msg = f"The launch of {args.pop(0)} failed.\nArguments used: {' '.join(args)}\nSee details in {logs}"
raise BadExitStatusError(msg)

View file

@ -0,0 +1,162 @@
import copy
import logging
import os
import subprocess
from maps_generator.generator.exceptions import OptionNotFound
from maps_generator.generator.exceptions import ValidationError
from maps_generator.generator.exceptions import wait_and_raise_if_fail
logger = logging.getLogger("maps_generator")
class GenTool:
OPTIONS = {
"dump_cities_boundaries": bool,
"emit_coasts": bool,
"fail_on_coasts": bool,
"generate_cameras": bool,
"generate_cities_boundaries": bool,
"generate_cities_ids": bool,
"generate_features": bool,
"generate_geo_objects_features": bool,
"generate_geo_objects_index": bool,
"generate_geometry": bool,
"generate_index": bool,
"generate_isolines_info": bool,
"generate_maxspeed": bool,
"generate_packed_borders": bool,
"generate_popular_places": bool,
"generate_region_features": bool,
"generate_regions": bool,
"generate_regions_kv": bool,
"generate_search_index": bool,
"generate_traffic_keys": bool,
"generate_world": bool,
"have_borders_for_whole_world": bool,
"make_city_roads": bool,
"make_coasts": bool,
"make_cross_mwm": bool,
"make_routing_index": bool,
"make_transit_cross_mwm": bool,
"make_transit_cross_mwm_experimental": bool,
"preprocess": bool,
"split_by_polygons": bool,
"stats_types": bool,
"version": bool,
"threads_count": int,
"booking_data": str,
"promo_catalog_cities": str,
"brands_data": str,
"brands_translations_data": str,
"cache_path": str,
"cities_boundaries_data": str,
"data_path": str,
"dump_wikipedia_urls": str,
"geo_objects_features": str,
"geo_objects_key_value": str,
"ids_without_addresses": str,
"idToWikidata": str,
"intermediate_data_path": str,
"isolines_path": str,
"addresses_path": str,
"nodes_list_path": str,
"node_storage": str,
"osm_file_name": str,
"osm_file_type": str,
"output": str,
"planet_version": str,
"popular_places_data": str,
"regions_features": str,
"regions_index": str,
"regions_key_value": str,
"srtm_path": str,
"transit_path": str,
"transit_path_experimental": str,
"world_roads_path": str,
"ugc_data": str,
"uk_postcodes_dataset": str,
"us_postcodes_dataset": str,
"user_resource_path": str,
"wikipedia_pages": str,
}
def __init__(
self, name_executable, out=subprocess.DEVNULL, err=subprocess.DEVNULL, **options
):
self.name_executable = name_executable
self.subprocess = None
self.output = out
self.error = err
self.options = {"threads_count": 1}
self.logger = logger
self.add_options(**options)
@property
def args(self):
return self._collect_cmd()
def add_options(self, **options):
if "logger" in options:
self.logger = options["logger"]
for k, v in options.items():
if k == "logger":
continue
if k not in GenTool.OPTIONS:
raise OptionNotFound(f"{k} is unavailable option")
if type(v) is not GenTool.OPTIONS[k]:
raise ValidationError(
f"{k} required {str(GenTool.OPTIONS[k])},"
f" but not {str(type(v))}"
)
self.options[k] = str(v).lower() if type(v) is bool else v
return self
def run_async(self):
assert self.subprocess is None, "You forgot to call wait()"
cmd = self._collect_cmd()
self.subprocess = subprocess.Popen(
cmd, stdout=self.output, stderr=self.error, env=os.environ
)
self.logger.info(
f"Run generator tool [{self.get_build_version()}]:" f" {' '.join(cmd)} "
)
return self
def wait(self):
code = self.subprocess.wait()
self.subprocess = None
return code
def run(self):
self.run_async()
wait_and_raise_if_fail(self)
def branch(self):
c = GenTool(self.name_executable, out=self.output, err=self.error)
c.options = copy.deepcopy(self.options)
return c
def get_build_version(self):
p = subprocess.Popen(
[self.name_executable, "--version"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=os.environ,
)
wait_and_raise_if_fail(p)
out, err = p.communicate()
return out.decode("utf-8").replace("\n", " ").strip()
def _collect_cmd(self):
options = ["".join(["--", k, "=", str(v)]) for k, v in self.options.items()]
return [self.name_executable, *options]
def run_gen_tool(*args, **kwargs):
GenTool(*args, **kwargs).run()

View file

@ -0,0 +1,151 @@
import os
from typing import AnyStr
from typing import List
from typing import Optional
from typing import Type
from typing import Union
import filelock
from maps_generator.generator.env import Env
from maps_generator.generator.exceptions import ContinueError
from maps_generator.generator.stages import Stage
from maps_generator.generator.stages import get_stage_name
from maps_generator.generator.stages import stages
from maps_generator.generator.status import Status
from maps_generator.generator.status import without_stat_ext
class Generation:
"""
Generation describes process of a map generation. It contains stages.
For example:
generation = Generation(env)
generation.add_stage(s1)
generation.add_stage(s2)
generation.run()
"""
def __init__(self, env: Env, build_lock: bool = True):
self.env: Env = env
self.stages: List[Stage] = []
self.runnable_stages: Optional[List[Stage]] = None
self.build_lock: bool = build_lock
for country_stage in stages.countries_stages:
if self.is_skipped_stage(country_stage):
self.env.add_skipped_stage(country_stage)
for stage in stages.stages:
if self.is_skipped_stage(stage):
self.env.add_skipped_stage(stage)
def is_skipped_stage(self, stage: Union[Type[Stage], Stage]) -> bool:
return (
stage.is_production_only and not self.env.production
) or not self.env.is_accepted_stage(stage)
def add_stage(self, stage: Stage):
self.stages.append(stage)
if self.is_skipped_stage(stage):
self.env.add_skipped_stage(stage)
def pre_run(self):
skipped = set()
def traverse(current: Type[Stage]):
deps = stages.dependencies.get(current, [])
for d in deps:
skipped.add(d)
traverse(d)
for skipped_stage in self.env.skipped_stages:
traverse(skipped_stage)
for s in skipped:
self.env.add_skipped_stage(s)
self.runnable_stages = [s for s in self.stages if self.env.is_accepted_stage(s)]
def run(self, from_stage: Optional[AnyStr] = None):
self.pre_run()
if from_stage is not None:
self.reset_to_stage(from_stage)
if self.build_lock:
lock_filename = f"{os.path.join(self.env.paths.build_path, 'lock')}.lock"
with filelock.FileLock(lock_filename, timeout=1):
self.run_stages()
else:
self.run_stages()
def run_stages(self):
for stage in self.runnable_stages:
stage(self.env)
def reset_to_stage(self, stage_name: AnyStr):
"""
Resets generation state to stage_name.
Status files are overwritten new statuses according stage_name.
It supposes that stages have next representation:
stage1, ..., stage_mwm[country_stage_1, ..., country_stage_M], ..., stageN
"""
high_level_stages = [get_stage_name(s) for s in self.runnable_stages]
if not (
stage_name in high_level_stages
or any(stage_name == get_stage_name(s) for s in stages.countries_stages)
):
raise ContinueError(f"{stage_name} not in {', '.join(high_level_stages)}.")
if not os.path.exists(self.env.paths.status_path):
raise ContinueError(f"Status path {self.env.paths.status_path} not found.")
if not os.path.exists(self.env.paths.main_status_path):
raise ContinueError(
f"Status file {self.env.paths.main_status_path} not found."
)
countries_statuses_paths = []
countries = set(self.env.countries)
for f in os.listdir(self.env.paths.status_path):
full_name = os.path.join(self.env.paths.status_path, f)
if (
os.path.isfile(full_name)
and full_name != self.env.paths.main_status_path
and without_stat_ext(f) in countries
):
countries_statuses_paths.append(full_name)
def set_countries_stage(st):
for path in countries_statuses_paths:
Status(path, st).update_status()
def finish_countries_stage():
for path in countries_statuses_paths:
Status(path).finish()
def index(l: List, val):
try:
return l.index(val)
except ValueError:
return -1
mwm_stage_name = get_stage_name(stages.mwm_stage)
stage_mwm_index = index(high_level_stages, mwm_stage_name)
main_status = None
if (
stage_mwm_index == -1
or stage_name in high_level_stages[: stage_mwm_index + 1]
):
main_status = stage_name
set_countries_stage("")
elif stage_name in high_level_stages[stage_mwm_index + 1 :]:
main_status = stage_name
finish_countries_stage()
else:
main_status = get_stage_name(stages.mwm_stage)
set_countries_stage(stage_name)
Status(self.env.paths.main_status_path, main_status).update_status()

View file

@ -0,0 +1,121 @@
import os
import subprocess
from maps_generator.generator import settings
from maps_generator.generator.exceptions import BadExitStatusError
from maps_generator.generator.exceptions import wait_and_raise_if_fail
def build_osmtools(path, output=subprocess.DEVNULL, error=subprocess.DEVNULL):
src = {
settings.OSM_TOOL_UPDATE: "osmupdate.c",
settings.OSM_TOOL_FILTER: "osmfilter.c",
settings.OSM_TOOL_CONVERT: "osmconvert.c",
}
ld_flags = ("-lz",)
cc = []
result = {}
for executable, src in src.items():
out = os.path.join(settings.OSM_TOOLS_PATH, executable)
op = [
settings.OSM_TOOLS_CC,
*settings.OSM_TOOLS_CC_FLAGS,
"-o",
out,
os.path.join(path, src),
*ld_flags,
]
s = subprocess.Popen(op, stdout=output, stderr=error)
cc.append(s)
result[executable] = out
messages = []
for c in cc:
if c.wait() != os.EX_OK:
messages.append(f"The launch of {' '.join(c.args)} failed.")
if messages:
raise BadExitStatusError("\n".join(messages))
return result
def osmconvert(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
run_async=False,
**kwargs,
):
env = os.environ.copy()
env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}"
p = subprocess.Popen(
[
name_executable,
in_file,
"--drop-author",
"--drop-version",
"--out-o5m",
f"-o={out_file}",
],
env=env,
stdout=output,
stderr=error,
)
if run_async:
return p
else:
wait_and_raise_if_fail(p)
def osmupdate(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
run_async=False,
**kwargs,
):
env = os.environ.copy()
env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}"
p = subprocess.Popen(
[
name_executable,
"--drop-author",
"--drop-version",
"--out-o5m",
"-v",
in_file,
out_file,
],
env=env,
stdout=output,
stderr=error,
)
if run_async:
return p
else:
wait_and_raise_if_fail(p)
def osmfilter(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
run_async=False,
**kwargs,
):
env = os.environ.copy()
env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}"
args = [name_executable, in_file, f"-o={out_file}"] + [
f"--{k.replace('_', '-')}={v}" for k, v in kwargs.items()
]
p = subprocess.Popen(args, env=env, stdout=output, stderr=error)
if run_async:
return p
else:
wait_and_raise_if_fail(p)

View file

@ -0,0 +1,333 @@
import argparse
import multiprocessing
import os
import site
import sys
from configparser import ConfigParser
from configparser import ExtendedInterpolation
from pathlib import Path
from typing import Any
from typing import AnyStr
from maps_generator.utils.md5 import md5_ext
from maps_generator.utils.system import total_virtual_memory
ETC_DIR = os.path.join(os.path.dirname(__file__), "..", "var", "etc")
parser = argparse.ArgumentParser(add_help=False)
opt_config = "--config"
parser.add_argument(opt_config, type=str, default="", help="Path to config")
def get_config_path(config_path: AnyStr):
"""
It tries to get an opt_config value.
If doesn't get the value a function returns config_path.
"""
argv = sys.argv
indexes = (-1, -1)
for i, opt in enumerate(argv):
if opt.startswith(f"{opt_config}="):
indexes = (i, i + 1)
if opt == opt_config:
indexes = (i, i + 2)
config_args = argv[indexes[0] : indexes[1]]
if config_args:
return parser.parse_args(config_args).config
config_var = os.environ.get(f"MM_GEN__CONFIG")
return config_path if config_var is None else config_var
class CfgReader:
"""
Config reader.
There are 3 way of getting an option. In priority order:
1. From system env.
2. From config.
3. From default values.
For using the option from system env you can build an option name as
MM__GEN__ + [SECTION_NAME] + _ + [VALUE_NAME].
"""
def __init__(self, default_settings_path: AnyStr):
self.config = ConfigParser(interpolation=ExtendedInterpolation())
self.config.read([get_config_path(default_settings_path)])
def get_opt(self, s: AnyStr, v: AnyStr, default: Any = None):
val = CfgReader._get_env_val(s, v)
if val is not None:
return val
return self.config.get(s, v) if self.config.has_option(s, v) else default
def get_opt_path(self, s: AnyStr, v: AnyStr, default: AnyStr = ""):
return os.path.expanduser(self.get_opt(s, v, default))
@staticmethod
def _get_env_val(s: AnyStr, v: AnyStr):
return os.environ.get(f"MM_GEN__{s.upper()}_{v.upper()}")
DEFAULT_PLANET_URL = "https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf"
# Main section:
# If DEBUG is True, a little special planet is downloaded.
DEBUG = True
_HOME_PATH = str(Path.home())
_WORK_PATH = _HOME_PATH
TMPDIR = os.path.join(_HOME_PATH, "tmp")
MAIN_OUT_PATH = os.path.join(_WORK_PATH, "generation")
CACHE_PATH = ""
# Developer section:
BUILD_PATH = os.path.join(_WORK_PATH, "omim-build-relwithdebinfo")
OMIM_PATH = os.path.join(_WORK_PATH, "omim")
# Osm tools section:
OSM_TOOLS_SRC_PATH = os.path.join(OMIM_PATH, "tools", "osmctools")
OSM_TOOLS_PATH = os.path.join(_WORK_PATH, "osmctools")
# Generator tool section:
USER_RESOURCE_PATH = os.path.join(OMIM_PATH, "data")
NODE_STORAGE = "map"
# Stages section:
NEED_PLANET_UPDATE = False
THREADS_COUNT_FEATURES_STAGE = multiprocessing.cpu_count()
DATA_ARCHIVE_DIR = ""
DIFF_VERSION_DEPTH = 2
# Logging section:
LOG_FILE_PATH = os.path.join(MAIN_OUT_PATH, "generation.log")
# External resources section:
PLANET_URL = DEFAULT_PLANET_URL
PLANET_COASTS_URL = ""
UGC_URL = ""
HOTELS_URL = ""
PROMO_CATALOG_CITIES_URL = ""
PROMO_CATALOG_COUNTRIES_URL = ""
POPULARITY_URL = ""
SUBWAY_URL = ""
TRANSIT_URL = ""
NEED_BUILD_WORLD_ROADS = True
FOOD_URL = ""
FOOD_TRANSLATIONS_URL = ""
UK_POSTCODES_URL = ""
US_POSTCODES_URL = ""
SRTM_PATH = ""
ISOLINES_PATH = ""
ADDRESSES_PATH = ""
# Stats section:
STATS_TYPES_CONFIG = os.path.join(ETC_DIR, "stats_types_config.txt")
# Other variables:
PLANET = "planet"
POSSIBLE_GEN_TOOL_NAMES = ("generator_tool", "omim-generator_tool")
VERSION_FILE_NAME = "version.txt"
# Osm tools:
OSM_TOOL_CONVERT = "osmconvert"
OSM_TOOL_FILTER = "osmfilter"
OSM_TOOL_UPDATE = "osmupdate"
OSM_TOOLS_CC = "cc"
OSM_TOOLS_CC_FLAGS = [
"-O3",
]
# Planet and coasts:
PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom")
PLANET_COASTS_RAWGEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.rawgeom")
# Common:
THREADS_COUNT = multiprocessing.cpu_count()
# for lib logging
LOGGING = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"standard": {"format": "[%(asctime)s] %(levelname)s %(module)s %(message)s"},
},
"handlers": {
"stdout": {
"level": "INFO",
"class": "logging.StreamHandler",
"formatter": "standard",
},
"file": {
"level": "DEBUG",
"class": "logging.handlers.WatchedFileHandler",
"formatter": "standard",
"filename": LOG_FILE_PATH,
},
},
"loggers": {
"maps_generator": {
"handlers": ["stdout", "file"],
"level": "DEBUG",
"propagate": True,
}
},
}
def init(default_settings_path: AnyStr):
# Try to read a config and to overload default settings
cfg = CfgReader(default_settings_path)
# Main section:
global DEBUG
global TMPDIR
global MAIN_OUT_PATH
global CACHE_PATH
_DEBUG = cfg.get_opt("Main", "DEBUG")
DEBUG = DEBUG if _DEBUG is None else int(_DEBUG)
TMPDIR = cfg.get_opt_path("Main", "TMPDIR", TMPDIR)
MAIN_OUT_PATH = cfg.get_opt_path("Main", "MAIN_OUT_PATH", MAIN_OUT_PATH)
CACHE_PATH = cfg.get_opt_path("Main", "CACHE_PATH", CACHE_PATH)
# Developer section:
global BUILD_PATH
global OMIM_PATH
BUILD_PATH = cfg.get_opt_path("Developer", "BUILD_PATH", BUILD_PATH)
OMIM_PATH = cfg.get_opt_path("Developer", "OMIM_PATH", OMIM_PATH)
# Osm tools section:
global OSM_TOOLS_SRC_PATH
global OSM_TOOLS_PATH
OSM_TOOLS_SRC_PATH = cfg.get_opt_path(
"Osm tools", "OSM_TOOLS_SRC_PATH", OSM_TOOLS_SRC_PATH
)
OSM_TOOLS_PATH = cfg.get_opt_path("Osm tools", "OSM_TOOLS_PATH", OSM_TOOLS_PATH)
# Generator tool section:
global USER_RESOURCE_PATH
global NODE_STORAGE
USER_RESOURCE_PATH = cfg.get_opt_path(
"Generator tool", "USER_RESOURCE_PATH", USER_RESOURCE_PATH
)
NODE_STORAGE = cfg.get_opt("Generator tool", "NODE_STORAGE", NODE_STORAGE)
assert os.path.exists(OMIM_PATH) is True, f"Can't find OMIM_PATH (set to {OMIM_PATH})"
if not os.path.exists(USER_RESOURCE_PATH):
from data_files import find_data_files
USER_RESOURCE_PATH = find_data_files("omim-data")
assert USER_RESOURCE_PATH is not None
import borders
# Issue: If maps_generator is installed in your system as a system
# package and borders.init() is called first time, call borders.init()
# might return False, because you need root permission.
assert borders.init()
# Stages section:
global NEED_PLANET_UPDATE
global DATA_ARCHIVE_DIR
global DIFF_VERSION_DEPTH
global THREADS_COUNT_FEATURES_STAGE
NEED_PLANET_UPDATE = cfg.get_opt("Stages", "NEED_PLANET_UPDATE", NEED_PLANET_UPDATE)
DATA_ARCHIVE_DIR = cfg.get_opt_path(
"Stages", "DATA_ARCHIVE_DIR", DATA_ARCHIVE_DIR
)
DIFF_VERSION_DEPTH = int(cfg.get_opt(
"Stages", "DIFF_VERSION_DEPTH", DIFF_VERSION_DEPTH
))
threads_count = int(
cfg.get_opt(
"Generator tool",
"THREADS_COUNT_FEATURES_STAGE",
THREADS_COUNT_FEATURES_STAGE,
)
)
if threads_count > 0:
THREADS_COUNT_FEATURES_STAGE = threads_count
# Logging section:
global LOG_FILE_PATH
global LOGGING
LOG_FILE_PATH = os.path.join(MAIN_OUT_PATH, "generation.log")
LOG_FILE_PATH = cfg.get_opt_path("Logging", "MAIN_LOG", LOG_FILE_PATH)
os.makedirs(os.path.dirname(os.path.abspath(LOG_FILE_PATH)), exist_ok=True)
LOGGING["handlers"]["file"]["filename"] = LOG_FILE_PATH
# External section:
global PLANET_URL
global PLANET_MD5_URL
global PLANET_COASTS_URL
global UGC_URL
global HOTELS_URL
global PROMO_CATALOG_CITIES_URL
global PROMO_CATALOG_COUNTRIES_URL
global POPULARITY_URL
global SUBWAY_URL
global TRANSIT_URL
global NEED_BUILD_WORLD_ROADS
global FOOD_URL
global UK_POSTCODES_URL
global US_POSTCODES_URL
global FOOD_TRANSLATIONS_URL
global SRTM_PATH
global ISOLINES_PATH
global ADDRESSES_PATH
PLANET_URL = cfg.get_opt_path("External", "PLANET_URL", PLANET_URL)
PLANET_MD5_URL = cfg.get_opt_path("External", "PLANET_MD5_URL", md5_ext(PLANET_URL))
PLANET_COASTS_URL = cfg.get_opt_path(
"External", "PLANET_COASTS_URL", PLANET_COASTS_URL
)
UGC_URL = cfg.get_opt_path("External", "UGC_URL", UGC_URL)
HOTELS_URL = cfg.get_opt_path("External", "HOTELS_URL", HOTELS_URL)
PROMO_CATALOG_CITIES_URL = cfg.get_opt_path(
"External", "PROMO_CATALOG_CITIES_URL", PROMO_CATALOG_CITIES_URL
)
PROMO_CATALOG_COUNTRIES_URL = cfg.get_opt_path(
"External", "PROMO_CATALOG_COUNTRIES_URL", PROMO_CATALOG_COUNTRIES_URL
)
POPULARITY_URL = cfg.get_opt_path("External", "POPULARITY_URL", POPULARITY_URL)
SUBWAY_URL = cfg.get_opt("External", "SUBWAY_URL", SUBWAY_URL)
TRANSIT_URL = cfg.get_opt("External", "TRANSIT_URL", TRANSIT_URL)
NEED_BUILD_WORLD_ROADS = cfg.get_opt("External", "NEED_BUILD_WORLD_ROADS", NEED_BUILD_WORLD_ROADS)
FOOD_URL = cfg.get_opt("External", "FOOD_URL", FOOD_URL)
UK_POSTCODES_URL = cfg.get_opt("External", "UK_POSTCODES_URL", UK_POSTCODES_URL)
US_POSTCODES_URL = cfg.get_opt("External", "US_POSTCODES_URL", US_POSTCODES_URL)
FOOD_TRANSLATIONS_URL = cfg.get_opt(
"External", "FOOD_TRANSLATIONS_URL", FOOD_TRANSLATIONS_URL
)
SRTM_PATH = cfg.get_opt_path("External", "SRTM_PATH", SRTM_PATH)
ISOLINES_PATH = cfg.get_opt_path("External", "ISOLINES_PATH", ISOLINES_PATH)
ADDRESSES_PATH = cfg.get_opt_path("External", "ADDRESSES_PATH", ADDRESSES_PATH)
# Stats section:
global STATS_TYPES_CONFIG
STATS_TYPES_CONFIG = cfg.get_opt_path(
"Stats", "STATS_TYPES_CONFIG", STATS_TYPES_CONFIG
)
# Common:
global THREADS_COUNT
threads_count = int(cfg.get_opt("Common", "THREADS_COUNT", THREADS_COUNT))
if threads_count > 0:
THREADS_COUNT = threads_count
# Planet and costs:
global PLANET_COASTS_GEOM_URL
global PLANET_COASTS_RAWGEOM_URL
PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom")
PLANET_COASTS_RAWGEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.rawgeom")
if DEBUG:
PLANET_URL = "https://www.dropbox.com/s/m3ru5tnj8g9u4cz/planet-latest.o5m?raw=1"
PLANET_MD5_URL = (
"https://www.dropbox.com/s/8wdl2hy22jgisk5/planet-latest.o5m.md5?raw=1"
)
NEED_PLANET_UPDATE = False

View file

@ -0,0 +1,380 @@
""""
This file contains some decorators that define stages.
There are two main types of stages:
1. outer_stage - a high level stage
2. country_stage - a stage that applies to countries files(*.mwm).
country_stage might be inside stage. There are country stages inside mwm_stage.
mwm_stage is only one stage that contains country_stages.
"""
import datetime
import logging
import os
import time
from abc import ABC
from abc import abstractmethod
from collections import defaultdict
from typing import AnyStr
from typing import Callable
from typing import Dict
from typing import List
from typing import Optional
from typing import Type
from typing import Union
import filelock
from maps_generator.generator import status
from maps_generator.generator.exceptions import FailedTest
from maps_generator.utils.file import download_files
from maps_generator.utils.file import normalize_url_to_path_dict
from maps_generator.utils.log import DummyObject
from maps_generator.utils.log import create_file_handler
from maps_generator.utils.log import create_file_logger
logger = logging.getLogger("maps_generator")
class InternalDependency:
def __init__(self, url, path_method, mode=""):
self.url = url
self.path_method = path_method
self.mode = mode
class Test:
def __init__(self, test, need_run=None, is_pretest=False):
self._test = test
self._need_run = need_run
self.is_pretest = is_pretest
@property
def name(self):
return self._test.__name__
def need_run(self, env, _logger):
if self._need_run is None:
return True
if callable(self._need_run):
return self._need_run(env, _logger)
return self._need_run
def test(self, env, _logger, *args, **kwargs):
try:
res = self._test(env, _logger, *args, **kwargs)
except Exception as e:
raise FailedTest(f"Test {self.name} is failed.") from e
if not res:
raise FailedTest(f"Test {self.name} is failed.")
_logger.info(f"Test {self.name} is successfully completed.")
class Stage(ABC):
need_planet_lock = False
need_build_lock = False
is_helper = False
is_mwm_stage = False
is_production_only = False
def __init__(self, **args):
self.args = args
def __call__(self, env: "Env"):
return self.apply(env, **self.args)
@abstractmethod
def apply(self, *args, **kwargs):
pass
def get_stage_name(stage: Union[Type[Stage], Stage]) -> AnyStr:
n = stage.__class__.__name__ if isinstance(stage, Stage) else stage.__name__
return n.replace("Stage", "")
def get_stage_type(stage: Union[Type[Stage], AnyStr]):
from . import stages_declaration as sd
if isinstance(stage, str):
if not stage.startswith("Stage"):
stage = f"Stage{stage}"
return getattr(sd, stage)
return stage
class Stages:
"""Stages class is used for storing all stages."""
def __init__(self):
self.mwm_stage: Optional[Type[Stage]] = None
self.countries_stages: List[Type[Stage]] = []
self.stages: List[Type[Stage]] = []
self.helper_stages: List[Type[Stage]] = []
self.dependencies = defaultdict(set)
def init(self):
# We normalize self.dependencies to Dict[Type[Stage], Set[Type[Stage]]].
dependencies = defaultdict(set)
for k, v in self.dependencies.items():
dependencies[get_stage_type(k)] = set(get_stage_type(x) for x in v)
self.dependencies = dependencies
def set_mwm_stage(self, stage: Type[Stage]):
assert self.mwm_stage is None
self.mwm_stage = stage
def add_helper_stage(self, stage: Type[Stage]):
self.helper_stages.append(stage)
def add_country_stage(self, stage: Type[Stage]):
self.countries_stages.append(stage)
def add_stage(self, stage: Type[Stage]):
self.stages.append(stage)
def add_dependency_for(self, stage: Type[Stage], *deps):
for dep in deps:
self.dependencies[stage].add(dep)
def get_invisible_stages_names(self) -> List[AnyStr]:
return [get_stage_name(st) for st in self.helper_stages]
def get_visible_stages_names(self) -> List[AnyStr]:
"""Returns all stages names except helper stages names."""
stages = []
for s in self.stages:
stages.append(get_stage_name(s))
if s == self.mwm_stage:
stages += [get_stage_name(st) for st in self.countries_stages]
return stages
def is_valid_stage_name(self, stage_name) -> bool:
return get_stage_name(self.mwm_stage) == stage_name or any(
any(stage_name == get_stage_name(x) for x in c)
for c in [self.countries_stages, self.stages, self.helper_stages]
)
# A global variable stage contains all possible stages.
stages = Stages()
def outer_stage(stage: Type[Stage]) -> Type[Stage]:
"""It's decorator that defines high level stage."""
if stage.is_helper:
stages.add_helper_stage(stage)
else:
stages.add_stage(stage)
if stage.is_mwm_stage:
stages.set_mwm_stage(stage)
def new_apply(method):
def apply(obj: Stage, env: "Env", *args, **kwargs):
name = get_stage_name(obj)
logfile = os.path.join(env.paths.log_path, f"{name}.log")
log_handler = create_file_handler(logfile)
logger.addHandler(log_handler)
# This message is used as an anchor for parsing logs.
# See maps_generator/checks/logs/logs_reader.py STAGE_START_MSG_PATTERN
logger.info(f"Stage {name}: start ...")
t = time.time()
try:
if not env.is_accepted_stage(stage):
logger.info(f"Stage {name} was not accepted.")
return
main_status = env.main_status
main_status.init(env.paths.main_status_path, name)
if main_status.need_skip():
logger.warning(f"Stage {name} was skipped.")
return
main_status.update_status()
env.set_subprocess_out(log_handler.stream)
method(obj, env, *args, **kwargs)
finally:
d = time.time() - t
# This message is used as an anchor for parsing logs.
# See maps_generator/checks/logs/logs_reader.py STAGE_FINISH_MSG_PATTERN
logger.info(
f"Stage {name}: finished in {str(datetime.timedelta(seconds=d))}"
)
logger.removeHandler(log_handler)
return apply
stage.apply = new_apply(stage.apply)
return stage
def country_stage_status(stage: Type[Stage]) -> Type[Stage]:
"""It's helper decorator that works with status file."""
def new_apply(method):
def apply(obj: Stage, env: "Env", country: AnyStr, *args, **kwargs):
name = get_stage_name(obj)
_logger = DummyObject()
countries_meta = env.countries_meta
if "logger" in countries_meta[country]:
_logger, _ = countries_meta[country]["logger"]
if not env.is_accepted_stage(stage):
_logger.info(f"Stage {name} was not accepted.")
return
if "status" not in countries_meta[country]:
countries_meta[country]["status"] = status.Status()
country_status = countries_meta[country]["status"]
status_file = os.path.join(
env.paths.status_path, status.with_stat_ext(country)
)
country_status.init(status_file, name)
if country_status.need_skip():
_logger.warning(f"Stage {name} was skipped.")
return
country_status.update_status()
method(obj, env, country, *args, **kwargs)
return apply
stage.apply = new_apply(stage.apply)
return stage
def country_stage_log(stage: Type[Stage]) -> Type[Stage]:
"""It's helper decorator that works with log file."""
def new_apply(method):
def apply(obj: Stage, env: "Env", country: AnyStr, *args, **kwargs):
name = get_stage_name(obj)
log_file = os.path.join(env.paths.log_path, f"{country}.log")
countries_meta = env.countries_meta
if "logger" not in countries_meta[country]:
countries_meta[country]["logger"] = create_file_logger(log_file)
_logger, log_handler = countries_meta[country]["logger"]
# This message is used as an anchor for parsing logs.
# See maps_generator/checks/logs/logs_reader.py STAGE_START_MSG_PATTERN
_logger.info(f"Stage {name}: start ...")
t = time.time()
env.set_subprocess_out(log_handler.stream, country)
method(obj, env, country, *args, logger=_logger, **kwargs)
d = time.time() - t
# This message is used as an anchor for parsing logs.
# See maps_generator/checks/logs/logs_reader.py STAGE_FINISH_MSG_PATTERN
_logger.info(
f"Stage {name}: finished in {str(datetime.timedelta(seconds=d))}"
)
return apply
stage.apply = new_apply(stage.apply)
return stage
def test_stage(*tests: Test) -> Callable[[Type[Stage],], Type[Stage]]:
def new_apply(method):
def apply(obj: Stage, env: "Env", *args, **kwargs):
_logger = kwargs["logger"] if "logger" in kwargs else logger
def run_tests(tests):
for test in tests:
if test.need_run(env, _logger):
test.test(env, _logger, *args, **kwargs)
else:
_logger.info(f"Test {test.name} was skipped.")
run_tests(filter(lambda t: t.is_pretest, tests))
method(obj, env, *args, **kwargs)
run_tests(filter(lambda t: not t.is_pretest, tests))
return apply
def wrapper(stage: Type[Stage]) -> Type[Stage]:
stage.apply = new_apply(stage.apply)
return stage
return wrapper
def country_stage(stage: Type[Stage]) -> Type[Stage]:
"""It's decorator that defines country stage."""
if stage.is_helper:
stages.add_helper_stage(stage)
else:
stages.add_country_stage(stage)
return country_stage_log(country_stage_status(stage))
def mwm_stage(stage: Type[Stage]) -> Type[Stage]:
stage.is_mwm_stage = True
return stage
def production_only(stage: Type[Stage]) -> Type[Stage]:
stage.is_production_only = True
return stage
def helper_stage_for(*deps) -> Callable[[Type[Stage],], Type[Stage]]:
def wrapper(stage: Type[Stage]) -> Type[Stage]:
stages.add_dependency_for(stage, *deps)
stage.is_helper = True
return stage
return wrapper
def depends_from_internal(*deps) -> Callable[[Type[Stage],], Type[Stage]]:
def get_urls(
env: "Env", internal_dependencies: List[InternalDependency]
) -> Dict[AnyStr, AnyStr]:
deps = {}
for d in internal_dependencies:
if "p" in d.mode and not env.production or not d.url:
continue
path = None
if type(d.path_method) is property:
path = d.path_method.__get__(env.paths)
assert path is not None, type(d.path_method)
deps[d.url] = path
return deps
def download_under_lock(env: "Env", urls: Dict[AnyStr, AnyStr], stage_name: AnyStr):
lock_name = f"{os.path.join(env.paths.status_path, stage_name)}.lock"
status_name = f"{os.path.join(env.paths.status_path, stage_name)}.download"
with filelock.FileLock(lock_name):
s = status.Status(status_name)
if not s.is_finished():
urls = normalize_url_to_path_dict(urls)
download_files(urls, env.force_download_files)
s.finish()
def new_apply(method):
def apply(obj: Stage, env: "Env", *args, **kwargs):
if hasattr(obj, "internal_dependencies") and obj.internal_dependencies:
urls = get_urls(env, obj.internal_dependencies)
if urls:
download_under_lock(env, urls, get_stage_name(obj))
method(obj, env, *args, **kwargs)
return apply
def wrapper(stage: Type[Stage]) -> Type[Stage]:
stage.internal_dependencies = deps
stage.apply = new_apply(stage.apply)
return stage
return wrapper

View file

@ -0,0 +1,446 @@
""""
This file contains possible stages that maps_generator can run.
Some algorithms suppose a maps genration processes looks like:
stage1, ..., stage_mwm[country_stage_1, ..., country_stage_M], ..., stageN
Only stage_mwm can contain country_
"""
import datetime
import json
import logging
import multiprocessing
import os
import shutil
import tarfile
import errno
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import AnyStr
from typing import Type
import maps_generator.generator.diffs as diffs
import maps_generator.generator.stages_tests as st
# from descriptions.descriptions_downloader import check_and_get_checker
# from descriptions.descriptions_downloader import download_from_wikidata_tags
# from descriptions.descriptions_downloader import download_from_wikipedia_tags
from maps_generator.generator import coastline
from maps_generator.generator import settings
from maps_generator.generator import steps
from maps_generator.generator.env import Env
from maps_generator.generator.env import PathProvider
from maps_generator.generator.env import WORLD_COASTS_NAME
from maps_generator.generator.env import WORLD_NAME
from maps_generator.generator.exceptions import BadExitStatusError
from maps_generator.generator.gen_tool import run_gen_tool
from maps_generator.generator.stages import InternalDependency as D
from maps_generator.generator.stages import Stage
from maps_generator.generator.stages import Test
from maps_generator.generator.stages import country_stage
from maps_generator.generator.stages import depends_from_internal
from maps_generator.generator.stages import helper_stage_for
from maps_generator.generator.stages import mwm_stage
from maps_generator.generator.stages import outer_stage
from maps_generator.generator.stages import production_only
from maps_generator.generator.stages import test_stage
from maps_generator.generator.statistics import get_stages_info
from maps_generator.utils.file import download_files
from maps_generator.utils.file import is_verified
from post_generation.hierarchy_to_countries import hierarchy_to_countries
from post_generation.inject_promo_ids import inject_promo_ids
logger = logging.getLogger("maps_generator")
def is_accepted(env: Env, stage: Type[Stage]) -> bool:
return env.is_accepted_stage(stage)
@outer_stage
class StageDownloadAndConvertPlanet(Stage):
def apply(self, env: Env, force_download: bool = True, **kwargs):
if force_download or not is_verified(env.paths.planet_o5m):
steps.step_download_and_convert_planet(
env, force_download=force_download, **kwargs
)
@outer_stage
class StageUpdatePlanet(Stage):
def apply(self, env: Env, **kwargs):
steps.step_update_planet(env, **kwargs)
@outer_stage
class StageCoastline(Stage):
def apply(self, env: Env, use_old_if_fail=True):
coasts_geom = "WorldCoasts.geom"
coasts_rawgeom = "WorldCoasts.rawgeom"
try:
coastline.make_coastline(env)
except BadExitStatusError as e:
if not use_old_if_fail:
raise e
logger.warning("Build coasts failed. Try to download the coasts...")
download_files(
{
settings.PLANET_COASTS_GEOM_URL: os.path.join(
env.paths.coastline_path, coasts_geom
),
settings.PLANET_COASTS_RAWGEOM_URL: os.path.join(
env.paths.coastline_path, coasts_rawgeom
),
}
)
for f in [coasts_geom, coasts_rawgeom]:
path = os.path.join(env.paths.coastline_path, f)
shutil.copy2(path, env.paths.intermediate_data_path)
@outer_stage
class StagePreprocess(Stage):
def apply(self, env: Env, **kwargs):
steps.step_preprocess(env, **kwargs)
@outer_stage
@depends_from_internal(
D(settings.HOTELS_URL, PathProvider.hotels_path, "p"),
D(settings.PROMO_CATALOG_CITIES_URL, PathProvider.promo_catalog_cities_path, "p"),
D(settings.POPULARITY_URL, PathProvider.popularity_path, "p"),
D(settings.FOOD_URL, PathProvider.food_paths, "p"),
D(settings.FOOD_TRANSLATIONS_URL, PathProvider.food_translations_path, "p"),
)
@test_stage(
Test(st.make_test_booking_data(max_days=7), lambda e, _: e.production, True)
)
class StageFeatures(Stage):
def apply(self, env: Env):
extra = {}
if is_accepted(env, StageDescriptions):
extra.update({"idToWikidata": env.paths.id_to_wikidata_path})
if env.production:
extra.update(
{
"booking_data": env.paths.hotels_path,
"promo_catalog_cities": env.paths.promo_catalog_cities_path,
"popular_places_data": env.paths.popularity_path,
"brands_data": env.paths.food_paths,
"brands_translations_data": env.paths.food_translations_path,
}
)
if is_accepted(env, StageCoastline):
extra.update({"emit_coasts": True})
if is_accepted(env, StageIsolinesInfo):
extra.update({"isolines_path": PathProvider.isolines_path()})
extra.update({"addresses_path": PathProvider.addresses_path()})
steps.step_features(env, **extra)
if os.path.exists(env.paths.packed_polygons_path):
shutil.copy2(env.paths.packed_polygons_path, env.paths.mwm_path)
@outer_stage
@helper_stage_for("StageDescriptions")
class StageDownloadDescriptions(Stage):
def apply(self, env: Env):
"""
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
data_path=env.paths.data_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
dump_wikipedia_urls=env.paths.wiki_url_path,
idToWikidata=env.paths.id_to_wikidata_path,
threads_count=settings.THREADS_COUNT,
)
# https://en.wikipedia.org/wiki/Wikipedia:Multilingual_statistics
langs = ("en", "de", "fr", "es", "ru", "tr")
checker = check_and_get_checker(env.paths.popularity_path)
download_from_wikipedia_tags(
env.paths.wiki_url_path, env.paths.descriptions_path, langs, checker
)
download_from_wikidata_tags(
env.paths.id_to_wikidata_path, env.paths.descriptions_path, langs, checker
)
"""
# The src folder is hardcoded here and must be implemented on the map building machine
src = "/home/planet/wikipedia/descriptions"
# The dest folder will generally become build/*/intermediate_data/descriptions
dest = env.paths.descriptions_path
# An empty source folder is a big problem
try:
if os.path.isdir(src):
print("Found %s" % (src))
else:
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), src)
except OSError as e:
print("rmtree error: %s - %s" % (e.filename, e.strerror))
# Empty folder "descriptions" can be already created.
try:
if os.path.isdir(dest):
shutil.rmtree(dest)
else:
os.remove(dest)
except OSError as e:
print("rmtree error: %s - %s" % (e.filename, e.strerror))
os.symlink(src, dest)
@outer_stage
@mwm_stage
class StageMwm(Stage):
def apply(self, env: Env):
tmp_mwm_names = env.get_tmp_mwm_names()
if len(tmp_mwm_names):
logger.info(f'Number of feature data .mwm.tmp country files to process: {len(tmp_mwm_names)}')
with ThreadPoolExecutor(settings.THREADS_COUNT) as pool:
pool.map(
lambda c: StageMwm.make_mwm(c, env),
tmp_mwm_names
)
else:
# TODO: list all countries that were not found?
logger.warning(f'There are no feature data .mwm.tmp country files to process in {env.paths.intermediate_tmp_path}!')
logger.warning('Countries requested for generation are not in the supplied planet file?')
@staticmethod
def make_mwm(country: AnyStr, env: Env):
logger.info(f'Starting mwm generation for {country}')
world_stages = {
WORLD_NAME: [
StageIndex,
StageCitiesIdsWorld,
StagePopularityWorld,
StagePrepareRoutingWorld,
StageRoutingWorld,
StageMwmStatistics,
],
WORLD_COASTS_NAME: [StageIndex, StageMwmStatistics],
}
mwm_stages = [
StageIndex,
StageUgc,
StageSrtm,
StageIsolinesInfo,
StageDescriptions,
# call after descriptions
StagePopularity,
StageRouting,
StageRoutingTransit,
StageMwmDiffs,
StageMwmStatistics,
]
for stage in world_stages.get(country, mwm_stages):
logger.info(f'{country} mwm stage {stage.__name__}: start...')
stage(country=country)(env)
env.finish_mwm(country)
logger.info(f'Finished mwm generation for {country}')
@country_stage
class StageIndex(Stage):
def apply(self, env: Env, country, **kwargs):
if country == WORLD_NAME:
steps.step_index_world(env, country, **kwargs)
elif country == WORLD_COASTS_NAME:
steps.step_coastline_index(env, country, **kwargs)
else:
kwargs.update(
{
"uk_postcodes_dataset": settings.UK_POSTCODES_URL,
"us_postcodes_dataset": settings.US_POSTCODES_URL,
}
)
steps.step_index(env, country, **kwargs)
@country_stage
@production_only
class StageCitiesIdsWorld(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_cities_ids_world(env, country, **kwargs)
@country_stage
@helper_stage_for("StageRoutingWorld")
# ToDo: Are we sure that this stage will be skipped if StageRoutingWorld is skipped?
class StagePrepareRoutingWorld(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_prepare_routing_world(env, country, **kwargs)
@country_stage
class StageRoutingWorld(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_routing_world(env, country, **kwargs)
@country_stage
@depends_from_internal(D(settings.UGC_URL, PathProvider.ugc_path),)
@production_only
class StageUgc(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_ugc(env, country, **kwargs)
@country_stage
class StagePopularity(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_popularity(env, country, **kwargs)
@country_stage
class StagePopularityWorld(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_popularity_world(env, country, **kwargs)
@country_stage
class StageSrtm(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_srtm(env, country, **kwargs)
@country_stage
class StageIsolinesInfo(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_isolines_info(env, country, **kwargs)
@country_stage
class StageDescriptions(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_description(env, country, **kwargs)
@country_stage
class StageRouting(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_routing(env, country, **kwargs)
@country_stage
@depends_from_internal(
D(settings.SUBWAY_URL, PathProvider.subway_path),
D(settings.TRANSIT_URL, PathProvider.transit_path_experimental),
)
class StageRoutingTransit(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_routing_transit(env, country, **kwargs)
@country_stage
class StageMwmDiffs(Stage):
def apply(self, env: Env, country, logger, **kwargs):
data_dir = diffs.DataDir(
diff_tool = env.diff_tool,
mwm_name = f"{country}.mwm",
new_version_dir = env.paths.mwm_path,
old_version_root_dir = settings.DATA_ARCHIVE_DIR,
)
diffs.mwm_diff_calculation(data_dir, logger, depth=settings.DIFF_VERSION_DEPTH)
@country_stage
@helper_stage_for("StageStatistics")
class StageMwmStatistics(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_statistics(env, country, **kwargs)
@outer_stage
@depends_from_internal(
D(
settings.PROMO_CATALOG_COUNTRIES_URL,
PathProvider.promo_catalog_countries_path,
"p",
),
D(settings.PROMO_CATALOG_CITIES_URL, PathProvider.promo_catalog_cities_path, "p"),
)
class StageCountriesTxt(Stage):
def apply(self, env: Env):
countries = hierarchy_to_countries(
env.paths.old_to_new_path,
env.paths.borders_to_osm_path,
env.paths.countries_synonyms_path,
env.paths.hierarchy_path,
env.paths.mwm_path,
env.paths.mwm_version,
)
if env.production:
inject_promo_ids(
countries,
env.paths.promo_catalog_cities_path,
env.paths.promo_catalog_countries_path,
env.paths.mwm_path,
env.paths.types_path,
env.paths.mwm_path,
)
with open(env.paths.counties_txt_path, "w") as f:
json.dump(countries, f, ensure_ascii=False, indent=1)
@outer_stage
@production_only
class StageLocalAds(Stage):
def apply(self, env: Env):
create_csv(
env.paths.localads_path,
env.paths.mwm_path,
env.paths.mwm_path,
env.mwm_version,
multiprocessing.cpu_count(),
)
with tarfile.open(f"{env.paths.localads_path}.tar.gz", "w:gz") as tar:
for filename in os.listdir(env.paths.localads_path):
tar.add(os.path.join(env.paths.localads_path, filename), arcname=filename)
@outer_stage
class StageStatistics(Stage):
def apply(self, env: Env):
steps_info = get_stages_info(env.paths.log_path, {"statistics"})
stats = defaultdict(lambda: defaultdict(dict))
stats["steps"] = steps_info["steps"]
for country in env.get_tmp_mwm_names():
with open(os.path.join(env.paths.stats_path, f"{country}.json")) as f:
stats["countries"][country] = {
"types": json.load(f),
"steps": steps_info["countries"][country],
}
def default(o):
if isinstance(o, datetime.timedelta):
return str(o)
with open(os.path.join(env.paths.stats_path, "stats.json"), "w") as f:
json.dump(
stats, f, ensure_ascii=False, sort_keys=True, indent=2, default=default
)
@outer_stage
class StageCleanup(Stage):
def apply(self, env: Env):
logger.info(
f"osm2ft files will be moved from {env.paths.mwm_path} "
f"to {env.paths.osm2ft_path}."
)
for x in os.listdir(env.paths.mwm_path):
p = os.path.join(env.paths.mwm_path, x)
if os.path.isfile(p) and x.endswith(".mwm.osm2ft"):
shutil.move(p, os.path.join(env.paths.osm2ft_path, x))
logger.info(f"{env.paths.draft_path} will be removed.")
shutil.rmtree(env.paths.draft_path)

View file

@ -0,0 +1,27 @@
import os
from datetime import datetime
import json
from maps_generator.generator import settings
from maps_generator.generator.env import Env
from maps_generator.utils.file import download_file
def make_test_booking_data(max_days):
def test_booking_data(env: Env, logger, *args, **kwargs):
if not settings.HOTELS_URL:
return None
base_url, _ = settings.HOTELS_URL.rsplit("/", maxsplit=1)
url = f"{base_url}/meta.json"
meta_path = os.path.join(env.paths.tmp_dir(), "hotels-meta.json")
download_file(url, meta_path)
with open(meta_path) as f:
meta = json.load(f)
raw_date = meta["latest"].strip()
logger.info(f"Booking date is from {raw_date}.")
dt = datetime.strptime(raw_date, "%Y_%m_%d-%H_%M_%S")
return (env.dt - dt).days < max_days
return test_booking_data

View file

@ -0,0 +1,185 @@
import datetime
import json
import logging
import os
import re
from collections import defaultdict
from typing import AnyStr
from typing import Dict
from typing import List
from maps_generator.generator.env import WORLDS_NAMES
from maps_generator.generator.exceptions import ParseError
logger = logging.getLogger("maps_generator")
# Parse entries, written by ./generator/statistics.cpp PrintTypeStats.
RE_STAT = re.compile(
r"([\w:-]+): "
r"size = +\d+; "
r"features = +(\d+); "
r"length = +([0-9.e+-]+) m; "
r"area = +([0-9.e+-]+) m²; "
r"w\/names = +(\d+)"
)
RE_TIME_DELTA = re.compile(
r"^(?:(?P<days>-?\d+) (days?, )?)?"
r"((?:(?P<hours>-?\d+):)(?=\d+:\d+))?"
r"(?:(?P<minutes>-?\d+):)?"
r"(?P<seconds>-?\d+)"
r"(?:\.(?P<microseconds>\d{1,6})\d{0,6})?$"
)
RE_FINISH_STAGE = re.compile(r"(.*)Stage (.+): finished in (.+)$")
def read_stat(f):
stats = []
for line in f:
m = RE_STAT.match(line)
# Skip explanation header strings.
if m is None:
continue
stats.append(
{
"name": m.group(1),
"cnt": int(m.group(2)),
"len": float(m.group(3)),
"area": float(m.group(4)),
"names": int(m.group(5)),
}
)
return stats
def read_config(f):
config = []
for line in f:
l = line.strip()
if l.startswith("#") or not l:
continue
columns = [c.strip() for c in l.split(";", 2)]
columns[0] = re.compile(columns[0])
columns[1] = columns[1].lower()
config.append(columns)
return config
def process_stat(config, stats):
result = {}
for param in config:
res = 0
for t in stats:
if param[0].match(t["name"]):
if param[1] == "len":
res += t["len"]
elif param[1] == "area":
res += t["area"]
elif param[1] == "cnt_names":
res += t["names"]
else:
res += t["cnt"]
result[str(param[0]) + param[1]] = res
return result
def format_res(res, t):
unit = None
if t == "len":
unit = "m"
elif t == "area":
unit = ""
elif t == "cnt" or t == "cnt_names":
unit = "pc"
else:
raise ParseError(f"Unknown type {t}.")
return res, unit
def make_stats(config_path, stats_path):
with open(config_path) as f:
config = read_config(f)
with open(stats_path) as f:
stats = process_stat(config, read_stat(f))
lines = []
for param in config:
k = str(param[0]) + param[1]
st = format_res(stats[k], param[1])
lines.append({"type": param[2], "quantity": st[0], "unit": st[1]})
return lines
def parse_time(time_str):
parts = RE_TIME_DELTA.match(time_str)
if not parts:
return
parts = parts.groupdict()
time_params = {}
for name, param in parts.items():
if param:
time_params[name] = int(param)
return datetime.timedelta(**time_params)
def get_stages_info(log_path, ignored_stages=frozenset()):
result = defaultdict(lambda: defaultdict(dict))
for file in os.listdir(log_path):
path = os.path.join(log_path, file)
with open(path) as f:
for line in f:
m = RE_FINISH_STAGE.match(line)
if not m:
continue
stage_name = m.group(2)
dt = parse_time(m.group(3))
if file.startswith("stage_") and stage_name not in ignored_stages:
result["stages"][stage_name] = dt
else:
country = file.split(".")[0]
result["countries"][country][stage_name] = dt
return result
def read_types(path: AnyStr) -> Dict[AnyStr, Dict]:
""""
Reads and summarizes statistics for all countries, excluding World and
WorldCoast.
"""
with open(path) as f:
json_data = json.load(f)
all_types = {}
countries = json_data["countries"]
for country, json_value in countries.items():
if country in WORLDS_NAMES:
continue
try:
json_types = json_value["types"]
except KeyError:
logger.exception(f"Cannot parse {json_value}")
continue
for t in json_types:
curr = all_types.get(t["type"], {})
curr["quantity"] = curr.get("quantity", 0.0) + t["quantity"]
curr["unit"] = t["unit"]
all_types[t["type"]] = curr
return all_types
def diff(new: Dict[AnyStr, Dict], old: Dict[AnyStr, Dict]) -> List:
assert len(new) == len(old)
lines = []
for key in new:
o = old[key]["quantity"]
n = new[key]["quantity"]
rel = 0
if o != 0.0:
rel = int(((n - o) / o) * 100)
else:
if n != 0.0:
rel = 100
lines.append((key, o, n, rel, n - o, new[key]["unit"],))
return lines

View file

@ -0,0 +1,53 @@
import os
from typing import AnyStr
from typing import Optional
def with_stat_ext(country: AnyStr):
return f"{country}.status"
def without_stat_ext(status: AnyStr):
return status.replace(".status", "")
class Status:
"""Status is used for recovering and continuation maps generation."""
def __init__(
self, stat_path: Optional[AnyStr] = None, stat_next: Optional[AnyStr] = None
):
self.stat_path = stat_path
self.stat_next = stat_next
self.stat_saved = None
self.find = False
def init(self, stat_path: AnyStr, stat_next: AnyStr):
self.stat_path = stat_path
self.stat_next = stat_next
self.stat_saved = self.status()
if not self.find:
self.find = not self.stat_saved or not self.need_skip()
def need_skip(self) -> bool:
if self.find:
return False
return self.stat_saved and self.stat_next != self.stat_saved
def update_status(self):
with open(self.stat_path, "w") as status:
status.write(self.stat_next)
def finish(self):
with open(self.stat_path, "w") as status:
status.write("finish")
def is_finished(self):
return self.status() == "finish"
def status(self):
try:
with open(self.stat_path) as status:
return status.read()
except IOError:
return None

View file

@ -0,0 +1,453 @@
"""
This file contains basic api for generator_tool and osm tools to generate maps.
"""
import functools
import json
import logging
import os
import shutil
import subprocess
from typing import AnyStr
from maps_generator.generator import settings
from maps_generator.generator.env import Env
from maps_generator.generator.env import PathProvider
from maps_generator.generator.env import WORLDS_NAMES
from maps_generator.generator.env import WORLD_NAME
from maps_generator.generator.env import get_all_countries_list
from maps_generator.generator.exceptions import ValidationError
from maps_generator.generator.exceptions import wait_and_raise_if_fail
from maps_generator.generator.gen_tool import run_gen_tool
from maps_generator.generator.osmtools import osmconvert
from maps_generator.generator.osmtools import osmfilter
from maps_generator.generator.osmtools import osmupdate
from maps_generator.generator.statistics import make_stats
from maps_generator.utils.file import download_files
from maps_generator.utils.file import is_verified
from maps_generator.utils.file import make_symlink
from maps_generator.utils.md5 import md5_ext
from maps_generator.utils.md5 import write_md5sum
logger = logging.getLogger("maps_generator")
def multithread_run_if_one_country(func):
@functools.wraps(func)
def wrap(env, country, **kwargs):
if len(env.countries) == 1:
kwargs.update({"threads_count": settings.THREADS_COUNT})
# Otherwise index stage of Taiwan_* mwms continues to run after all other mwms have finished:
elif country == 'Taiwan_North':
kwargs.update({"threads_count": 6})
elif country == 'Taiwan_South':
kwargs.update({"threads_count": 2})
func(env, country, **kwargs)
return wrap
def convert_planet(
tool: AnyStr,
in_planet: AnyStr,
out_planet: AnyStr,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
):
osmconvert(tool, in_planet, out_planet, output=output, error=error)
write_md5sum(out_planet, md5_ext(out_planet))
def step_download_and_convert_planet(env: Env, force_download: bool, **kwargs):
# Do not copy, convert, check a local .o5m planet dump, just symlink it instead.
src = settings.PLANET_URL
if src.startswith("file://") and src.endswith(".o5m"):
os.symlink(src[7:], env.paths.planet_o5m)
return
if force_download or not is_verified(env.paths.planet_osm_pbf):
download_files(
{
settings.PLANET_URL: env.paths.planet_osm_pbf,
settings.PLANET_MD5_URL: md5_ext(env.paths.planet_osm_pbf),
},
env.force_download_files,
)
if not is_verified(env.paths.planet_osm_pbf):
raise ValidationError(f"Wrong md5 sum for {env.paths.planet_osm_pbf}.")
convert_planet(
env[settings.OSM_TOOL_CONVERT],
env.paths.planet_osm_pbf,
env.paths.planet_o5m,
output=env.get_subprocess_out(),
error=env.get_subprocess_out(),
)
os.remove(env.paths.planet_osm_pbf)
os.remove(md5_ext(env.paths.planet_osm_pbf))
def step_update_planet(env: Env, **kwargs):
tmp = f"{env.paths.planet_o5m}.tmp"
osmupdate(
env[settings.OSM_TOOL_UPDATE],
env.paths.planet_o5m,
tmp,
output=env.get_subprocess_out(),
error=env.get_subprocess_out(),
**kwargs,
)
os.remove(env.paths.planet_o5m)
os.rename(tmp, env.paths.planet_o5m)
write_md5sum(env.paths.planet_o5m, md5_ext(env.paths.planet_o5m))
def step_preprocess(env: Env, **kwargs):
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
data_path=env.paths.data_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
osm_file_type="o5m",
osm_file_name=env.paths.planet_o5m,
node_storage=env.node_storage,
user_resource_path=env.paths.user_resource_path,
preprocess=True,
**kwargs,
)
def step_features(env: Env, **kwargs):
if any(x not in WORLDS_NAMES for x in env.countries):
kwargs.update({"generate_packed_borders": True})
if any(x == WORLD_NAME for x in env.countries):
kwargs.update({"generate_world": True})
if len(env.countries) == len(get_all_countries_list(PathProvider.borders_path())):
kwargs.update({"have_borders_for_whole_world": True})
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
data_path=env.paths.data_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
osm_file_type="o5m",
osm_file_name=env.paths.planet_o5m,
node_storage=env.node_storage,
user_resource_path=env.paths.user_resource_path,
cities_boundaries_data=env.paths.cities_boundaries_path,
generate_features=True,
threads_count=settings.THREADS_COUNT_FEATURES_STAGE,
**kwargs,
)
def run_gen_tool_with_recovery_country(env: Env, *args, **kwargs):
if "data_path" not in kwargs or "output" not in kwargs:
logger.warning("The call run_gen_tool() will be without recovery.")
run_gen_tool(*args, **kwargs)
prev_data_path = kwargs["data_path"]
mwm = f"{kwargs['output']}.mwm"
osm2ft = f"{mwm}.osm2ft"
kwargs["data_path"] = env.paths.draft_path
make_symlink(
os.path.join(prev_data_path, osm2ft), os.path.join(env.paths.draft_path, osm2ft)
)
shutil.copy(
os.path.join(prev_data_path, mwm), os.path.join(env.paths.draft_path, mwm)
)
run_gen_tool(*args, **kwargs)
shutil.move(
os.path.join(env.paths.draft_path, mwm), os.path.join(prev_data_path, mwm)
)
kwargs["data_path"] = prev_data_path
@multithread_run_if_one_country
def _generate_common_index(env: Env, country: AnyStr, **kwargs):
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
node_storage=env.node_storage,
planet_version=env.planet_version,
generate_geometry=True,
generate_index=True,
output=country,
**kwargs,
)
def step_index_world(env: Env, country: AnyStr, **kwargs):
_generate_common_index(
env,
country,
generate_search_index=True,
cities_boundaries_data=env.paths.cities_boundaries_path,
generate_cities_boundaries=True,
**kwargs,
)
def step_cities_ids_world(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
user_resource_path=env.paths.user_resource_path,
output=country,
generate_cities_ids=True,
**kwargs,
)
def filter_roads(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
):
osmfilter(
name_executable,
in_file,
out_file,
output=output,
error=error,
keep="",
keep_ways="highway=motorway =trunk =primary =secondary =tertiary",
)
def make_world_road_graph(
name_executable,
path_roads_file,
path_resources,
path_res_file,
logger,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
):
world_roads_builder_tool_cmd = [
name_executable,
f"--path_roads_file={path_roads_file}",
f"--path_resources={path_resources}",
f"--path_res_file={path_res_file}",
]
logger.info(f"Starting {' '.join(world_roads_builder_tool_cmd)}")
world_roads_builder_tool = subprocess.Popen(
world_roads_builder_tool_cmd, stdout=output, stderr=error, env=os.environ
)
wait_and_raise_if_fail(world_roads_builder_tool)
def step_prepare_routing_world(env: Env, country: AnyStr, logger, **kwargs):
filter_roads(
env[settings.OSM_TOOL_FILTER],
env.paths.planet_o5m,
env.paths.world_roads_o5m,
env.get_subprocess_out(country),
env.get_subprocess_out(country),
)
make_world_road_graph(
env.world_roads_builder_tool,
env.paths.world_roads_o5m,
env.paths.user_resource_path,
env.paths.world_roads_path,
logger,
env.get_subprocess_out(country),
env.get_subprocess_out(country)
)
def step_routing_world(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
user_resource_path=env.paths.user_resource_path,
output=country,
world_roads_path=env.paths.world_roads_path,
**kwargs,
)
def step_index(env: Env, country: AnyStr, **kwargs):
_generate_common_index(env, country, generate_search_index=True, **kwargs)
def step_coastline_index(env: Env, country: AnyStr, **kwargs):
_generate_common_index(env, country, **kwargs)
def step_ugc(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
ugc_data=env.paths.ugc_path,
output=country,
**kwargs,
)
def step_popularity(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
user_resource_path=env.paths.user_resource_path,
generate_popular_places=True,
output=country,
**kwargs,
)
def step_popularity_world(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
user_resource_path=env.paths.user_resource_path,
wikipedia_pages=env.paths.descriptions_path,
idToWikidata=env.paths.id_to_wikidata_path,
generate_popular_places=True,
output=country,
**kwargs,
)
def step_srtm(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
srtm_path=env.paths.srtm_path(),
output=country,
**kwargs,
)
def step_isolines_info(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
generate_isolines_info=True,
isolines_path=PathProvider.isolines_path(),
output=country,
**kwargs,
)
def step_description(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
user_resource_path=env.paths.user_resource_path,
wikipedia_pages=env.paths.descriptions_path,
idToWikidata=env.paths.id_to_wikidata_path,
output=country,
**kwargs,
)
def step_routing(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
cities_boundaries_data=env.paths.cities_boundaries_path,
generate_maxspeed=True,
make_city_roads=True,
make_cross_mwm=True,
generate_cameras=True,
make_routing_index=True,
generate_traffic_keys=False,
output=country,
**kwargs,
)
def step_routing_transit(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
transit_path=env.paths.transit_path,
transit_path_experimental=env.paths.transit_path_experimental,
make_transit_cross_mwm=True,
make_transit_cross_mwm_experimental=bool(env.paths.transit_path_experimental),
output=country,
**kwargs,
)
def step_statistics(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
stats_types=True,
output=country,
**kwargs,
)
with open(os.path.join(env.paths.stats_path, f"{country}.json"), "w") as f:
json.dump(
make_stats(
settings.STATS_TYPES_CONFIG,
os.path.join(env.paths.intermediate_data_path, f"{country}.stats"),
),
f,
)

View file

@ -0,0 +1,55 @@
import logging
from typing import AnyStr
from typing import Iterable
from typing import Optional
from maps_generator.generator import stages_declaration as sd
from maps_generator.generator.env import Env
from maps_generator.generator.generation import Generation
from .generator.stages import Stage
logger = logging.getLogger("maps_generator")
def run_generation(
env: Env,
stages: Iterable[Stage],
from_stage: Optional[AnyStr] = None,
build_lock: bool = True,
):
generation = Generation(env, build_lock)
for s in stages:
generation.add_stage(s)
generation.run(from_stage)
def generate_maps(env: Env, from_stage: Optional[AnyStr] = None):
""""Runs maps generation."""
stages = (
sd.StageDownloadAndConvertPlanet(),
sd.StageUpdatePlanet(),
sd.StageCoastline(),
sd.StagePreprocess(),
sd.StageFeatures(),
sd.StageDownloadDescriptions(),
sd.StageMwm(),
sd.StageCountriesTxt(),
sd.StageLocalAds(),
sd.StageStatistics(),
sd.StageCleanup(),
)
run_generation(env, stages, from_stage)
def generate_coasts(env: Env, from_stage: Optional[AnyStr] = None):
"""Runs coasts generation."""
stages = (
sd.StageDownloadAndConvertPlanet(),
sd.StageUpdatePlanet(),
sd.StageCoastline(use_old_if_fail=False),
sd.StageCleanup(),
)
run_generation(env, stages, from_stage)

View file

@ -0,0 +1,8 @@
omim-data-all
omim-data-files
omim-descriptions
omim-post_generation
filelock==3.0.10
beautifulsoup4==4.9.1
requests>=2.31.0
requests_file==1.5.1

View file

@ -0,0 +1,6 @@
-r ../post_generation/requirements_dev.txt
-r ../descriptions/requirements_dev.txt
filelock==3.0.10
beautifulsoup4==4.9.1
requests>=2.31.0
requests_file==1.5.1

View file

@ -0,0 +1,37 @@
#!/usr/bin/env python3
import os
import sys
import setuptools
module_dir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(module_dir, "..", "..", ".."))
from pyhelpers.setup import chdir
from pyhelpers.setup import get_version
from pyhelpers.setup import get_requirements
with chdir(os.path.abspath(os.path.dirname(__file__))):
setuptools.setup(
name="omim-maps_generator",
version=str(get_version()),
author="CoMaps",
author_email="info@comaps.app",
description="This package contains tools for maps generation.",
url="https://codeberg.org/comaps",
package_dir={"maps_generator": ""},
package_data={"": ["var/**/*"]},
packages=[
"maps_generator",
"maps_generator.generator",
"maps_generator.utils",
"maps_generator.checks"
],
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: Apache Software License",
],
python_requires=">=3.6",
install_requires=get_requirements(),
)

View file

@ -0,0 +1,131 @@
import datetime
import logging
import os
import re
import tempfile
import unittest
from collections import Counter
from maps_generator.checks.logs import logs_reader
class TestLogsReader(unittest.TestCase):
def setUp(self):
self.dir = tempfile.TemporaryDirectory()
with open(
os.path.join(self.dir.name, "Czech_Jihovychod_Jihomoravsky kraj.log"), "w"
) as file:
file.write(LOG_STRING)
logs = list(logs_reader.LogsReader(self.dir.name))
self.assertEqual(len(logs), 1)
self.log = logs[0]
def tearDown(self):
self.dir.cleanup()
def test_read_logs(self):
self.assertTrue(self.log.name.startswith("Czech_Jihovychod_Jihomoravsky kraj"))
self.assertTrue(self.log.is_mwm_log)
self.assertFalse(self.log.is_stage_log)
self.assertEqual(len(self.log.lines), 46)
def test_split_into_stages(self):
st = logs_reader.split_into_stages(self.log)
self.assertEqual(len(st), 4)
names_counter = Counter(s.name for s in st)
self.assertEqual(
names_counter,
Counter({"Routing": 1, "RoutingTransit": 1, "MwmStatistics": 2}),
)
def test_split_and_normalize_logs(self):
st = logs_reader.normalize_logs(logs_reader.split_into_stages(self.log))
self.assertEqual(len(st), 3)
m = {s.name: s for s in st}
self.assertEqual(
m["MwmStatistics"].duration, datetime.timedelta(seconds=3.628742)
)
def test_count_levels(self):
st = logs_reader.normalize_logs(logs_reader.split_into_stages(self.log))
self.assertEqual(len(st), 3)
m = {s.name: s for s in st}
c = logs_reader.count_levels(m["Routing"])
self.assertEqual(c, Counter({logging.INFO: 22, logging.ERROR: 1}))
c = logs_reader.count_levels(self.log.lines)
self.assertEqual(c, Counter({logging.INFO: 45, logging.ERROR: 1}))
def test_find_and_parse(self):
st = logs_reader.normalize_logs(logs_reader.split_into_stages(self.log))
self.assertEqual(len(st), 3)
m = {s.name: s for s in st}
pattern_str = (
r".*Leaps finished, elapsed: [0-9.]+ seconds, routes found: "
r"(?P<routes_found>\d+) , not found: (?P<routes_not_found>\d+)$"
)
for found in (
logs_reader.find_and_parse(m["Routing"], pattern_str),
logs_reader.find_and_parse(self.log.lines, re.compile(pattern_str)),
):
self.assertEqual(len(found), 1)
line = found[0]
self.assertEqual(
line[0], {"routes_found": "996363", "routes_not_found": "126519"}
)
if __name__ == "main":
unittest.main()
LOG_STRING = """
[2020-05-24 04:19:37,032] INFO stages Stage Routing: start ...
[2020-05-24 04:19:37,137] INFO gen_tool Run generator tool [generator_tool version 1590177464 f52c6496c4d90440f2e0d8088acdb3350dcf7c69]: /home/Projects/build-omim-Desktop_Qt_5_10_1_GCC_64bit-Release/generator_tool --threads_count=1 --data_path=/home/maps_build/2020_05_23__16_58_17/draft --intermediate_data_path=/home/maps_build/2020_05_23__16_58_17/intermediate_data --user_resource_path=/home/Projects/omim/data --cities_boundaries_data=/home/maps_build/2020_05_23__16_58_17/intermediate_data/cities_boundaries.bin --generate_maxspeed=true --make_city_roads=true --make_cross_mwm=true --generate_cameras=true --make_routing_index=true --generate_traffic_keys=true --output=Czech_Jihovychod_Jihomoravsky kraj
LOG TID(1) INFO 3.29e-06 Loaded countries list for version: 200402
LOG TID(1) INFO 7.945e-05 generator/camera_info_collector.cpp:339 BuildCamerasInfo() Generating cameras info for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
LOG TID(1) INFO 0.529856 generator/routing_index_generator.cpp:546 BuildRoutingIndex() Building routing index for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
LOG TID(1) INFO 2.11074 generator/routing_index_generator.cpp:563 BuildRoutingIndex() Routing section created: 639872 bytes, 163251 roads, 193213 joints, 429334 points
LOG TID(1) INFO 2.90872 generator/restriction_generator.cpp:117 SerializeRestrictions() Routing restriction info: RestrictionHeader: { No => 430, Only => 284, NoUTurn => 123, OnlyUTurn => 0 }
LOG TID(1) INFO 3.00342 generator/road_access_generator.cpp:799 BuildRoadAccessInfo() Generating road access info for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
LOG TID(1) INFO 3.77435 generator_tool/generator_tool.cpp:621 operator()() Generating cities boundaries roads for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
LOG TID(1) INFO 3.85993 generator/city_roads_generator.cpp:51 LoadCitiesBoundariesGeometry() Read: 14225 boundaries from: /home/maps_build/2020_05_23__16_58_17/intermediate_data/routing_city_boundaries.bin
LOG TID(1) INFO 6.82577 routing/city_roads_serialization.hpp:78 Serialize() Serialized 81697 road feature ids in cities. Size: 77872 bytes.
LOG TID(1) INFO 6.82611 generator_tool/generator_tool.cpp:621 operator()() Generating maxspeeds section for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
LOG TID(1) INFO 6.82616 generator/maxspeeds_builder.cpp:186 BuildMaxspeedsSection() BuildMaxspeedsSection( /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm , /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm.osm2ft , /home/maps_build/2020_05_23__16_58_17/intermediate_data/maxspeeds.csv )
LOG TID(1) INFO 7.58621 routing/maxspeeds_serialization.hpp:144 Serialize() Serialized 11413 forward maxspeeds and 302 bidirectional maxspeeds. Section size: 17492 bytes.
LOG TID(1) INFO 7.58623 generator/maxspeeds_builder.cpp:172 SerializeMaxspeeds() SerializeMaxspeeds( /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm , ...) serialized: 11715 maxspeed tags.
LOG TID(1) INFO 7.64526 generator/routing_index_generator.cpp:596 BuildRoutingCrossMwmSection() Building cross mwm section for Czech_Jihovychod_Jihomoravsky kraj
LOG TID(1) INFO 8.43521 generator/routing_index_generator.cpp:393 CalcCrossMwmConnectors() Transitions finished, transitions: 1246 , elapsed: 0.789908 seconds
LOG TID(1) INFO 8.48956 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Pedestrian model. Number of enters: 1233 Number of exits: 1233
LOG TID(1) INFO 8.48964 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Bicycle model. Number of enters: 1231 Number of exits: 1230
LOG TID(1) INFO 8.48964 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Car model. Number of enters: 1089 Number of exits: 1089
LOG TID(1) INFO 8.48965 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Transit model. Number of enters: 0 Number of exits: 0
LOG TID(1) INFO 4241.68 generator/routing_index_generator.cpp:537 FillWeights() Leaps finished, elapsed: 4233.19 seconds, routes found: 996363 , not found: 126519
LOG TID(1) INFO 4241.8 generator/routing_index_generator.cpp:588 SerializeCrossMwm() Cross mwm section generated, size: 1784214 bytes
LOG TID(1) ERROR 4243.2 generator/routing_index_generator.cpp:588 SerializeCrossMwm() Fake error.
[2020-05-24 05:30:19,319] INFO stages Stage Routing: finished in 1:10:42.287364
[2020-05-24 05:30:19,319] INFO stages Stage RoutingTransit: start ...
[2020-05-24 05:30:19,485] INFO gen_tool Run generator tool [generator_tool version 1590177464 f52c6496c4d90440f2e0d8088acdb3350dcf7c69]: /home/Projects/build-omim-Desktop_Qt_5_10_1_GCC_64bit-Release/generator_tool --threads_count=1 --data_path=/home/maps_build/2020_05_23__16_58_17/draft --intermediate_data_path=/home/maps_build/2020_05_23__16_58_17/intermediate_data --user_resource_path=/home/Projects/omim/data --transit_path=/home/maps_build/2020_05_23__16_58_17/intermediate_data --make_transit_cross_mwm=true --output=Czech_Jihovychod_Jihomoravsky kraj
LOG TID(1) INFO 3.107e-06 Loaded countries list for version: 200402
LOG TID(1) INFO 6.0315e-05 generator/transit_generator.cpp:205 BuildTransit() Building transit section for Czech_Jihovychod_Jihomoravsky kraj mwmDir: /home/maps_build/2020_05_23__16_58_17/draft/
LOG TID(1) INFO 5.40151 generator/routing_index_generator.cpp:617 BuildTransitCrossMwmSection() Building transit cross mwm section for Czech_Jihovychod_Jihomoravsky kraj
LOG TID(1) INFO 5.47317 generator/routing_index_generator.cpp:320 CalcCrossMwmTransitions() Transit cross mwm section is not generated because no transit section in mwm: /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
LOG TID(1) INFO 5.4732 generator/routing_index_generator.cpp:393 CalcCrossMwmConnectors() Transitions finished, transitions: 0 , elapsed: 0.0716537 seconds
LOG TID(1) INFO 5.47321 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Pedestrian model. Number of enters: 0 Number of exits: 0
LOG TID(1) INFO 5.47321 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Bicycle model. Number of enters: 0 Number of exits: 0
LOG TID(1) INFO 5.47322 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Car model. Number of enters: 0 Number of exits: 0
LOG TID(1) INFO 5.47322 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Transit model. Number of enters: 0 Number of exits: 0
LOG TID(1) INFO 5.47325 generator/routing_index_generator.cpp:588 SerializeCrossMwm() Cross mwm section generated, size: 31 bytes
[2020-05-24 05:30:25,144] INFO stages Stage RoutingTransit: finished in 0:00:05.824967
[2020-05-24 05:30:25,144] INFO stages Stage MwmStatistics: start ...
[2020-05-24 05:30:25,212] INFO gen_tool Run generator tool [generator_tool version 1590177464 f52c6496c4d90440f2e0d8088acdb3350dcf7c69]: /home/Projects/build-omim-Desktop_Qt_5_10_1_GCC_64bit-Release/generator_tool --threads_count=1 --data_path=/home/maps_build/2020_05_23__16_58_17/draft --intermediate_data_path=/home/maps_build/2020_05_23__16_58_17/intermediate_data --user_resource_path=/home/Projects/omim/data --stats_types=true --output=Czech_Jihovychod_Jihomoravsky kraj
LOG TID(1) INFO 1.5806e-05 generator_tool/generator_tool.cpp:621 operator()() Calculating type statistics for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
[2020-05-24 05:30:28,773] INFO stages Stage MwmStatistics: finished in 0:00:03.628742
[2020-05-24 06:30:25,144] INFO stages Stage MwmStatistics: start ...
[2020-05-24 06:30:25,212] INFO gen_tool Run generator tool [generator_tool version 1590177464 f52c6496c4d90440f2e0d8088acdb3350dcf7c69]: /home/Projects/build-omim-Desktop_Qt_5_10_1_GCC_64bit-Release/generator_tool --threads_count=1 --data_path=/home/maps_build/2020_05_23__16_58_17/draft --intermediate_data_path=/home/maps_build/2020_05_23__16_58_17/intermediate_data --user_resource_path=/home/Projects/omim/data --stats_types=true --output=Czech_Jihovychod_Jihomoravsky kraj
LOG TID(1) INFO 1.5806e-05 generator_tool/generator_tool.cpp:621 operator()() Calculating type statistics for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
[2020-05-24 06:30:28,773] INFO stages Stage MwmStatistics: finished in 0:00:01.628742
"""

View file

@ -0,0 +1,44 @@
import argparse
from multiprocessing.pool import ThreadPool
from typing import Tuple
from maps_generator.checks.logs import logs_reader
def get_args():
parser = argparse.ArgumentParser(
description="This script generates file with countries that are "
"ordered by time needed to generate them."
)
parser.add_argument(
"--output", type=str, required=True, help="Path to output file.",
)
parser.add_argument(
"--logs", type=str, required=True, help="Path to logs directory.",
)
return parser.parse_args()
def process_log(log: logs_reader.Log) -> Tuple[str, float]:
stage_logs = logs_reader.split_into_stages(log)
stage_logs = logs_reader.normalize_logs(stage_logs)
d = sum(s.duration.total_seconds() for s in stage_logs if s.duration is not None)
return log.name, d
def main():
args = get_args()
with ThreadPool() as pool:
order = pool.map(
process_log,
(log for log in logs_reader.LogsReader(args.logs) if log.is_mwm_log),
)
order.sort(key=lambda v: v[1], reverse=True)
with open(args.output, "w") as out:
out.write("# Mwm name\tGeneration time\n")
out.writelines("{}\t{}\n".format(*line) for line in order)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,19 @@
import re
from datetime import timedelta
DURATION_PATTERN = re.compile(
r"((?P<days>[-\d]+) day[s]*, )?(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d[\.\d+]*)"
)
def unique(s):
seen = set()
seen_add = seen.add
return [x for x in s if not (x in seen or seen_add(x))]
def parse_timedelta(s):
m = DURATION_PATTERN.match(s)
d = m.groupdict()
return timedelta(**{k: float(d[k]) for k in d if d[k] is not None})

View file

@ -0,0 +1,201 @@
import errno
import functools
import glob
import logging
import os
import shutil
from functools import partial
from multiprocessing.pool import ThreadPool
from typing import AnyStr
from typing import Dict
from typing import List
from typing import Optional
from urllib.parse import unquote
from urllib.parse import urljoin
from urllib.parse import urlparse
from urllib.request import url2pathname
import requests
from bs4 import BeautifulSoup
from requests_file import FileAdapter
from maps_generator.utils.md5 import check_md5
from maps_generator.utils.md5 import md5_ext
logger = logging.getLogger("maps_generator")
def is_file_uri(url: AnyStr) -> bool:
return urlparse(url).scheme == "file"
def file_uri_to_path(url : AnyStr) -> AnyStr:
file_uri = urlparse(url)
file_path = file_uri.path
# URI is something like "file://~/..."
if file_uri.netloc == '~':
file_path = f'~{file_uri.path}'
return os.path.expanduser(file_path)
return file_path
def is_executable(fpath: AnyStr) -> bool:
return fpath is not None and os.path.isfile(fpath) and os.access(fpath, os.X_OK)
@functools.lru_cache()
def find_executable(path: AnyStr, exe: Optional[AnyStr] = None) -> AnyStr:
if exe is None:
if is_executable(path):
return path
else:
raise FileNotFoundError(path)
find_pattern = f"{path}/**/{exe}"
for name in glob.iglob(find_pattern, recursive=True):
if is_executable(name):
return name
raise FileNotFoundError(f"{exe} not found in {path}")
def download_file(url: AnyStr, name: AnyStr, download_if_exists: bool = True):
logger.info(f"Trying to download {name} from {url}.")
if not download_if_exists and os.path.exists(name):
logger.info(f"File {name} already exists.")
return
if is_file_uri(url):
# url uses 'file://' scheme
shutil.copy2(file_uri_to_path(url), name)
logger.info(f"File {name} was copied from {url}.")
return
tmp_name = f"{name}__"
os.makedirs(os.path.dirname(tmp_name), exist_ok=True)
with requests.Session() as session:
session.mount("file://", FileAdapter())
with open(tmp_name, "wb") as handle:
response = session.get(url, stream=True)
file_length = None
try:
file_length = int(response.headers["Content-Length"])
except KeyError:
logger.warning(
f"There is no attribute Content-Length in headers [{url}]: {response.headers}"
)
current = 0
max_attempts = 32
attempts = max_attempts
while attempts:
for data in response.iter_content(chunk_size=4096):
current += len(data)
handle.write(data)
if file_length is None or file_length == current:
break
logger.warning(
f"Download interrupted. Resuming download from {url}: {current}/{file_length}."
)
headers = {"Range": f"bytes={current}-"}
response = session.get(url, headers=headers, stream=True)
attempts -= 1
assert (
attempts > 0
), f"Maximum failed resuming download attempts of {max_attempts} is exceeded."
shutil.move(tmp_name, name)
logger.info(f"File {name} was downloaded from {url}.")
def is_dir(url) -> bool:
return url.endswith("/")
def find_files(url) -> List[AnyStr]:
def files_list_file_scheme(path, results=None):
if results is None:
results = []
for p in os.listdir(path):
new_path = os.path.join(path, p)
if os.path.isdir(new_path):
files_list_file_scheme(new_path, results)
else:
results.append(new_path)
return results
def files_list_http_scheme(url, results=None):
if results is None:
results = []
page = requests.get(url).content
bs = BeautifulSoup(page, "html.parser")
links = bs.findAll("a", href=True)
for link in links:
href = link["href"]
if href == "./" or href == "../":
continue
new_url = urljoin(url, href)
if is_dir(new_url):
files_list_http_scheme(new_url, results)
else:
results.append(new_url)
return results
parse_result = urlparse(url)
if parse_result.scheme == "file":
return [
f.replace(parse_result.path, "")
for f in files_list_file_scheme(parse_result.path)
]
if parse_result.scheme == "http" or parse_result.scheme == "https":
return [f.replace(url, "") for f in files_list_http_scheme(url)]
assert False, parse_result
def normalize_url_to_path_dict(
url_to_path: Dict[AnyStr, AnyStr]
) -> Dict[AnyStr, AnyStr]:
for url in list(url_to_path.keys()):
if is_dir(url):
path = url_to_path[url]
del url_to_path[url]
for rel_path in find_files(url):
abs_url = urljoin(url, rel_path)
url_to_path[abs_url] = unquote(os.path.join(path, rel_path))
return url_to_path
def download_files(url_to_path: Dict[AnyStr, AnyStr], download_if_exists: bool = True):
with ThreadPool() as pool:
pool.starmap(
partial(download_file, download_if_exists=download_if_exists),
url_to_path.items(),
)
def is_exists_file_and_md5(name: AnyStr) -> bool:
return os.path.isfile(name) and os.path.isfile(md5_ext(name))
def is_verified(name: AnyStr) -> bool:
return is_exists_file_and_md5(name) and check_md5(name, md5_ext(name))
def make_symlink(target: AnyStr, link_name: AnyStr):
try:
os.symlink(target, link_name)
except OSError as e:
if e.errno == errno.EEXIST:
if os.path.islink(link_name):
link = os.readlink(link_name)
if os.path.abspath(target) != os.path.abspath(link):
raise e
else:
raise e
else:
raise e

Some files were not shown because too many files have changed in this diff Show more