Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 13:58:55 +01:00
parent 4af19165ec
commit 68073add76
12458 changed files with 12350765 additions and 2 deletions

View file

@ -0,0 +1,309 @@
import os
import sys
from abc import ABC
from abc import abstractmethod
from collections import namedtuple
from enum import Enum
from functools import lru_cache
from typing import Any
from typing import Callable
from typing import List
ResLine = namedtuple("ResLine", ["previous", "current", "diff", "arrow"])
class Arrow(Enum):
zero = 0
down = 1
up = 2
ROW_TO_STR = {
Arrow.zero: "◄►",
Arrow.down: "",
Arrow.up: "",
}
def norm(value):
if isinstance(value, (int, float)):
return abs(value)
elif hasattr(value, "__len__"):
return len(value)
elif hasattr(value, "norm"):
return value.norm()
assert False, type(value)
def get_rel(r: ResLine) -> bool:
rel = 0.0
if r.arrow != Arrow.zero:
prev = norm(r.previous)
if prev == 0:
rel = 100.0
else:
rel = norm(r.diff) * 100.0 / prev
return rel
class Check(ABC):
"""
Base class for any checks.
Usual flow:
# Create check object.
check = AnyCheck("ExampleCheck")
# Do work.
check.check()
# Get results and process them
raw_result = check.get_result()
process_result(raw_result)
# or print result
check.print()
"""
def __init__(self, name: str):
self.name = name
def print(self, silent_if_no_results=False, filt=None, file=sys.stdout):
s = self.formatted_string(silent_if_no_results, filt)
if s:
print(s, file=file)
@abstractmethod
def check(self):
"""
Performs a logic of the check.
"""
pass
@abstractmethod
def get_result(self) -> Any:
"""
Returns a raw result of the check.
"""
pass
@abstractmethod
def formatted_string(self, silent_if_no_results=False, *args, **kwargs) -> str:
"""
Returns a formatted string of a raw result of the check.
"""
pass
class CompareCheckBase(Check, ABC):
def __init__(self, name: str):
super().__init__(name)
self.op: Callable[
[Any, Any], Any
] = lambda previous, current: current - previous
self.do: Callable[[Any], Any] = lambda x: x
self.zero: Any = 0
self.diff_format: Callable[[Any], str] = lambda x: str(x)
self.format: Callable[[Any], str] = lambda x: str(x)
self.filt: Callable[[Any], bool] = lambda x: True
def set_op(self, op: Callable[[Any, Any], Any]):
self.op = op
def set_do(self, do: Callable[[Any], Any]):
self.do = do
def set_zero(self, zero: Any):
self.zero = zero
def set_diff_format(self, diff_format: Callable[[Any], str]):
self.diff_format = diff_format
def set_format(self, format: Callable[[Any], str]):
self.format = format
def set_filt(self, filt: Callable[[Any], bool]):
self.filt = filt
class CompareCheck(CompareCheckBase):
def __init__(
self, name: str, old: Any, new: Any,
):
super().__init__(name)
self.old = old
self.new = new
self.result = None
def set_op(self, op: Callable[[Any, Any], Any]):
self.op = op
def set_do(self, do: Callable[[Any], Any]):
self.do = do
def set_zero(self, zero: Any):
self.zero = zero
def get_result(self) -> ResLine:
return self.result
def check(self):
previous = self.do(self.old)
if previous is None:
return False
current = self.do(self.new)
if current is None:
return False
diff = self.op(previous, current)
if diff is None:
return False
arrow = Arrow.zero
if diff > self.zero:
arrow = Arrow.up
elif diff < self.zero:
arrow = Arrow.down
self.result = ResLine(
previous=previous, current=current, diff=diff, arrow=arrow
)
return True
def formatted_string(self, silent_if_no_results=False, *args, **kwargs) -> str:
assert self.result
if silent_if_no_results and self.result.arrow == Arrow.zero:
return ""
rel = get_rel(self.result)
return (
f"{self.name}: {ROW_TO_STR[self.result.arrow]} {rel:.2f}% "
f"[{self.format(self.result.previous)}"
f"{self.format(self.result.current)}: "
f"{self.diff_format(self.result.diff)}]"
)
class CompareCheckSet(CompareCheckBase):
def __init__(self, name: str):
super().__init__(name)
self.checks = []
def add_check(self, check: Check):
self.checks.append(check)
def set_op(self, op: Callable[[Any, Any], Any]):
for c in self.checks:
c.set_op(op)
def set_do(self, do: Callable[[Any], Any]):
for c in self.checks:
c.set_do(do)
def set_zero(self, zero: Any):
for c in self.checks:
c.set_zero(zero)
def set_diff_format(self, diff_format: Callable[[Any], str]):
for c in self.checks:
c.set_diff_format(diff_format)
def set_format(self, format: Callable[[Any], str]):
for c in self.checks:
c.set_format(format)
def check(self):
for c in self.checks:
c.check()
def get_result(self,) -> List[ResLine]:
return [c.get_result() for c in self._with_result()]
def formatted_string(self, silent_if_no_results=False, filt=None, _offset=0) -> str:
sets = filter(lambda c: isinstance(c, CompareCheckSet), self._with_result())
checks = filter(lambda c: isinstance(c, CompareCheck), self._with_result())
checks = sorted(checks, key=lambda c: norm(c.get_result().diff), reverse=True)
if filt is None:
filt = self.filt
checks = filter(lambda c: filt(c.get_result()), checks)
sets = list(sets)
checks = list(checks)
no_results = not checks and not sets
if silent_if_no_results and no_results:
return ""
head = [
f"{' ' * _offset}Check set[{self.name}]:",
]
lines = []
if no_results:
lines.append(f"{' ' * (_offset + 2)}No results.")
for c in checks:
s = c.formatted_string(silent_if_no_results, filt, _offset + 2)
if s:
lines.append(f"{' ' * (_offset + 2)}{s}")
for s in sets:
s = s.formatted_string(silent_if_no_results, filt, _offset + 2)
if s:
lines.append(s)
if not lines:
return ""
head += lines
return "\n".join(head) + "\n"
def _with_result(self):
return (c for c in self.checks if c.get_result() is not None)
@lru_cache(maxsize=None)
def _get_and_check_files(old_path, new_path, ext):
files = list(filter(lambda f: f.endswith(ext), os.listdir(old_path)))
s = set(files) ^ set(filter(lambda f: f.endswith(ext), os.listdir(new_path)))
assert len(s) == 0, s
return files
def build_check_set_for_files(
name: str,
old_path: str,
new_path: str,
*,
ext: str = "",
recursive: bool = False,
op: Callable[[Any, Any], Any] = lambda previous, current: current - previous,
do: Callable[[Any], Any] = lambda x: x,
zero: Any = 0,
diff_format: Callable[[Any], str] = lambda x: str(x),
format: Callable[[Any], str] = lambda x: str(x),
):
if recursive:
raise NotImplementedError(
f"CheckSetBuilderForFiles is not implemented for recursive."
)
cs = CompareCheckSet(name)
for file in _get_and_check_files(old_path, new_path, ext):
cs.add_check(
CompareCheck(
file, os.path.join(old_path, file), os.path.join(new_path, file)
)
)
cs.set_do(do)
cs.set_op(op)
cs.set_zero(zero)
cs.set_diff_format(diff_format)
cs.set_format(format)
return cs

View file

@ -0,0 +1,34 @@
import re
from maps_generator.checks import check
from maps_generator.checks.logs import logs_reader
ADDR_PATTERN = re.compile(
r".*BuildAddressTable\(\) Address: "
r"Matched percent (?P<matched_percent>[0-9.]+) "
r"Total: (?P<total>\d+) "
r"Missing: (?P<missing>\d+)"
)
def get_addresses_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
"""
Returns an addresses check set, that checks a difference in 'matched_percent'
addresses of BuildAddressTable between old logs and new logs.
"""
def do(path: str):
log = logs_reader.Log(path)
if not log.is_mwm_log:
return None
found = logs_reader.find_and_parse(log.lines, ADDR_PATTERN)
if not found:
return None
d = found[0][0]
return float(d["matched_percent"])
return check.build_check_set_for_files(
"Addresses check", old_path, new_path, ext=".log", do=do
)

View file

@ -0,0 +1,58 @@
from collections import defaultdict
from maps_generator.checks import check
from maps_generator.checks.check_mwm_types import count_all_types
from mwm import NAME_TO_INDEX_TYPE_MAPPING
def parse_groups(path):
groups = defaultdict(set)
with open(path) as f:
for line in f:
line = line.strip()
if line.startswith("#"):
continue
if line.startswith("@"):
continue
array = line.split("@", maxsplit=1)
if len(array) == 2:
types_str, categories = array
types_int = {
NAME_TO_INDEX_TYPE_MAPPING[t]
for t in types_str.strip("|").split("|")
}
for category in categories.split("|"):
category = category.replace("@", "", 1)
groups[category].update(types_int)
return groups
def get_categories_check_set(
old_path: str, new_path: str, categories_path: str
) -> check.CompareCheckSet:
"""
Returns a categories check set, that checks a difference in a number of
objects of categories(from categories.txt) between old mwms and new mwms.
"""
cs = check.CompareCheckSet("Categories check")
def make_do(indexes):
def do(path):
all_types = count_all_types(path)
return sum(all_types[i] for i in indexes)
return do
for category, types in parse_groups(categories_path).items():
cs.add_check(
check.build_check_set_for_files(
f"Category {category} check",
old_path,
new_path,
ext=".mwm",
do=make_do(types),
)
)
return cs

View file

@ -0,0 +1,49 @@
import logging
from functools import lru_cache
from maps_generator.checks import check
from maps_generator.checks.logs import logs_reader
from maps_generator.generator.stages_declaration import stages
@lru_cache(maxsize=None)
def _get_log_stages(path):
log = logs_reader.Log(path)
return logs_reader.normalize_logs(logs_reader.split_into_stages(log))
def get_log_levels_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
"""
Returns a log levels check set, that checks a difference in a number of
message levels from warning and higher for each stage between old mwms
and new mwms.
"""
cs = check.CompareCheckSet("Log levels check")
def make_do(level, stage_name, cache={}):
def do(path):
for s in _get_log_stages(path):
if s.name == stage_name:
k = f"{path}:{stage_name}"
if k not in cache:
cache[k] = logs_reader.count_levels(s)
return cache[k][level]
return None
return do
for stage_name in (
stages.get_visible_stages_names() + stages.get_invisible_stages_names()
):
for level in (logging.CRITICAL, logging.ERROR, logging.WARNING):
cs.add_check(
check.build_check_set_for_files(
f"Stage {stage_name} - {logging.getLevelName(level)} check",
old_path,
new_path,
ext=".log",
do=make_do(level, stage_name),
)
)
return cs

View file

@ -0,0 +1,61 @@
from collections import defaultdict
from functools import lru_cache
from typing import Union
from maps_generator.checks import check
from mwm import Mwm
from mwm import NAME_TO_INDEX_TYPE_MAPPING
from mwm import readable_type
from mwm import type_index
@lru_cache(maxsize=None)
def count_all_types(path: str):
c = defaultdict(int)
for ft in Mwm(path, parse=False):
for t in ft.types():
c[t] += 1
return c
def get_mwm_type_check_set(
old_path: str, new_path: str, type_: Union[str, int]
) -> check.CompareCheckSet:
"""
Returns a mwm type check set, that checks a difference in a number of
type [type_] between old mwms and new mwms.
"""
if isinstance(type_, str):
type_ = type_index(type_)
assert type_ >= 0, type_
return check.build_check_set_for_files(
f"Types check [{readable_type(type_)}]",
old_path,
new_path,
ext=".mwm",
do=lambda path: count_all_types(path)[type_],
)
def get_mwm_types_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
"""
Returns a mwm types check set, that checks a difference in a number of
each type between old mwms and new mwms.
"""
cs = check.CompareCheckSet("Mwm types check")
def make_do(index):
return lambda path: count_all_types(path)[index]
for t_name, t_index in NAME_TO_INDEX_TYPE_MAPPING.items():
cs.add_check(
check.build_check_set_for_files(
f"Type {t_name} check",
old_path,
new_path,
ext=".mwm",
do=make_do(t_index),
)
)
return cs

View file

@ -0,0 +1,124 @@
import os
from functools import lru_cache
from maps_generator.checks import check
from mwm import Mwm
class SectionNames:
def __init__(self, sections):
self.sections = sections
def __sub__(self, other):
return SectionNames(
{k: self.sections[k] for k in set(self.sections) - set(other.sections)}
)
def __lt__(self, other):
if isinstance(other, int):
return len(self.sections) < other
elif isinstance(other, SectionNames):
return self.sections < other.sections
assert False, type(other)
def __gt__(self, other):
if isinstance(other, int):
return len(self.sections) > other
elif isinstance(other, SectionNames):
return self.sections > other.sections
assert False, type(other)
def __len__(self):
return len(self.sections)
def __str__(self):
return str(self.sections)
@lru_cache(maxsize=None)
def read_sections(path: str):
return Mwm(path, parse=False).sections_info()
def get_appeared_sections_check_set(
old_path: str, new_path: str
) -> check.CompareCheckSet:
return check.build_check_set_for_files(
f"Appeared sections check",
old_path,
new_path,
ext=".mwm",
do=lambda path: SectionNames(read_sections(path)),
diff_format=lambda s: ", ".join(f"{k}:{v.size}" for k, v in s.sections.items()),
format=lambda s: f"number of sections: {len(s.sections)}",
)
def get_disappeared_sections_check_set(
old_path: str, new_path: str
) -> check.CompareCheckSet:
return check.build_check_set_for_files(
f"Disappeared sections check",
old_path,
new_path,
ext=".mwm",
do=lambda path: SectionNames(read_sections(path)),
op=lambda previous, current: previous - current,
diff_format=lambda s: ", ".join(f"{k}:{v.size}" for k, v in s.sections.items()),
format=lambda s: f"number of sections: {len(s.sections)}",
)
def get_sections_existence_check_set(
old_path: str, new_path: str
) -> check.CompareCheckSet:
"""
Returns a sections existence check set, that checks appeared and
disappeared sections between old mwms and new mwms.
"""
cs = check.CompareCheckSet("Sections existence check")
cs.add_check(get_appeared_sections_check_set(old_path, new_path))
cs.add_check(get_disappeared_sections_check_set(old_path, new_path))
return cs
def _get_sections_set(path):
sections = set()
for file in os.listdir(path):
p = os.path.join(path, file)
if os.path.isfile(p) and file.endswith(".mwm"):
sections.update(read_sections(p).keys())
return sections
def get_sections_size_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
"""
Returns a sections size check set, that checks a difference in a size
of each sections of mwm between old mwms and new mwms.
"""
sections_set = _get_sections_set(old_path)
sections_set.update(_get_sections_set(new_path))
cs = check.CompareCheckSet("Sections size check")
def make_do(section):
def do(path):
sections = read_sections(path)
if section not in sections:
return None
return sections[section].size
return do
for section in sections_set:
cs.add_check(
check.build_check_set_for_files(
f"Size of {section} check",
old_path,
new_path,
ext=".mwm",
do=make_do(section),
)
)
return cs

View file

@ -0,0 +1,17 @@
import os
from maps_generator.checks import check
def get_size_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
"""
Returns a size check set, that checks a difference in a size of mwm between
old mwms and new mwms.
"""
return check.build_check_set_for_files(
"Size check",
old_path,
new_path,
ext=".mwm",
do=lambda path: os.path.getsize(path),
)

View file

@ -0,0 +1,167 @@
import sys
from collections import namedtuple
from enum import Enum
from typing import Callable
from typing import Mapping
from typing import Optional
from typing import Set
from typing import Tuple
from maps_generator.checks import check
from maps_generator.checks.check_addresses import get_addresses_check_set
from maps_generator.checks.check_categories import get_categories_check_set
from maps_generator.checks.check_log_levels import get_log_levels_check_set
from maps_generator.checks.check_mwm_types import get_mwm_type_check_set
from maps_generator.checks.check_mwm_types import get_mwm_types_check_set
from maps_generator.checks.check_sections import get_sections_existence_check_set
from maps_generator.checks.check_sections import get_sections_size_check_set
from maps_generator.checks.check_size import get_size_check_set
class CheckType(Enum):
low = 1
medium = 2
hard = 3
strict = 4
Threshold = namedtuple("Threshold", ["abs", "rel"])
_default_thresholds = {
CheckType.low: Threshold(abs=20, rel=20),
CheckType.medium: Threshold(abs=15, rel=15),
CheckType.hard: Threshold(abs=10, rel=10),
CheckType.strict: Threshold(abs=0, rel=0),
}
def set_thresholds(check_type_map: Mapping[CheckType, Threshold]):
global _default_thresholds
_default_thresholds = check_type_map
def make_tmap(
low: Optional[Tuple[float, float]] = None,
medium: Optional[Tuple[float, float]] = None,
hard: Optional[Tuple[float, float]] = None,
strict: Optional[Tuple[float, float]] = None,
):
thresholds = _default_thresholds.copy()
if low is not None:
thresholds[CheckType.low] = Threshold(*low)
if medium is not None:
thresholds[CheckType.medium] = Threshold(*medium)
if hard is not None:
thresholds[CheckType.hard] = Threshold(*hard)
if strict is not None:
thresholds[CheckType.strict] = Threshold(*strict)
return thresholds
def make_default_filter(check_type_map: Mapping[CheckType, Threshold] = None):
if check_type_map is None:
check_type_map = _default_thresholds
def maker(check_type: CheckType):
threshold = check_type_map[check_type]
def default_filter(r: check.ResLine):
return (
check.norm(r.diff) > threshold.abs and check.get_rel(r) > threshold.rel
)
return default_filter
return maker
class MwmsChecks(Enum):
sections_existence = 1
sections_size = 2
mwm_size = 3
types = 4
booking = 5
categories = 6
def get_mwm_check_sets_and_filters(
old_path: str, new_path: str, checks: Set[MwmsChecks] = None, **kwargs
) -> Mapping[check.Check, Callable]:
def need_add(t: MwmsChecks):
return checks is None or t in checks
m = {get_sections_existence_check_set(old_path, new_path): None}
if need_add(MwmsChecks.sections_size):
c = get_sections_size_check_set(old_path, new_path)
thresholds = make_tmap(low=(0, 20), medium=(0, 10), hard=(0, 5))
m[c] = make_default_filter(thresholds)
mb = 1 << 20
if need_add(MwmsChecks.mwm_size):
c = get_size_check_set(old_path, new_path)
thresholds = make_tmap(low=(2 * mb, 10), medium=(mb, 5), hard=(0.5 * mb, 2))
m[c] = make_default_filter(thresholds)
if need_add(MwmsChecks.types):
c = get_mwm_types_check_set(old_path, new_path)
thresholds = make_tmap(low=(500, 30), medium=(100, 20), hard=(100, 10))
m[c] = make_default_filter(thresholds)
if need_add(MwmsChecks.booking):
c = get_mwm_type_check_set(old_path, new_path, "sponsored-booking")
thresholds = make_tmap(low=(500, 20), medium=(50, 10), hard=(50, 5))
m[c] = make_default_filter(thresholds)
if need_add(MwmsChecks.categories):
c = get_categories_check_set(old_path, new_path, kwargs["categories_path"])
thresholds = make_tmap(low=(200, 20), medium=(50, 10), hard=(50, 5))
m[c] = make_default_filter(thresholds)
return m
class LogsChecks(Enum):
log_levels = 1
addresses = 2
def get_logs_check_sets_and_filters(
old_path: str, new_path: str, checks: Set[LogsChecks] = None
) -> Mapping[check.Check, Callable]:
def need_add(t: LogsChecks):
return checks is None or t in checks
m = {get_log_levels_check_set(old_path, new_path): None}
if need_add(LogsChecks.addresses):
c = get_addresses_check_set(old_path, new_path)
thresholds = make_tmap(low=(50, 20), medium=(20, 10), hard=(10, 5))
m[c] = make_default_filter(thresholds)
return m
def _print_header(file, header, width=100, s="="):
stars = s * ((width - len(header)) // 2)
rstars = stars
if 2 * len(stars) + len(header) < width:
rstars += s
print(stars, header, rstars, file=file)
def run_checks_and_print_results(
checks: Mapping[check.Check, Callable],
check_type: CheckType,
silent_if_no_results: bool = True,
file=sys.stdout,
):
for check, make_filt in checks.items():
check.check()
_print_header(file, check.name)
check.print(
silent_if_no_results=silent_if_no_results,
filt=None if make_filt is None else make_filt(check_type),
file=file,
)

View file

@ -0,0 +1,241 @@
import datetime
import logging
import os
import re
from collections import Counter
from collections import namedtuple
from enum import Enum
from pathlib import Path
from typing import List
from typing import Tuple
from typing import Union
import maps_generator.generator.env as env
from maps_generator.generator.stages import get_stage_type
from maps_generator.utils.algo import parse_timedelta
logger = logging.getLogger(__name__)
FLAGS = re.MULTILINE | re.DOTALL
GEN_LINE_PATTERN = re.compile(
r"^LOG\s+TID\((?P<tid>\d+)\)\s+(?P<level>[A-Z]+)\s+"
r"(?P<timestamp>[-.e0-9]+)\s+(?P<message>.+)$",
FLAGS,
)
GEN_LINE_CHECK_PATTERN = re.compile(
r"^TID\((?P<tid>\d+)\)\s+" r"ASSERT FAILED\s+(?P<message>.+)$", FLAGS
)
MAPS_GEN_LINE_PATTERN = re.compile(
r"^\[(?P<time_string>[0-9-:, ]+)\]\s+(?P<level>\w+)\s+"
r"(?P<module>\w+)\s+(?P<message>.+)$",
FLAGS,
)
STAGE_START_MSG_PATTERN = re.compile(r"^Stage (?P<name>\w+): start ...$")
STAGE_FINISH_MSG_PATTERN = re.compile(
r"^Stage (?P<name>\w+): finished in (?P<duration_string>.+)$"
)
LogLine = namedtuple("LogLine", ["timestamp", "level", "tid", "message", "type"])
LogStage = namedtuple("LogStage", ["name", "duration", "lines"])
class LogType(Enum):
gen = 1
maps_gen = 2
class Log:
def __init__(self, path: str):
self.path = Path(path)
self.name = self.path.stem
self.is_stage_log = False
self.is_mwm_log = False
try:
get_stage_type(self.name)
self.is_stage_log = True
except AttributeError:
if self.name in env.COUNTRIES_NAMES or self.name in env.WORLDS_NAMES:
self.is_mwm_log = True
self.lines = self._parse_lines()
def _parse_lines(self) -> List[LogLine]:
logline = ""
state = None
lines = []
base_timestamp = 0.0
def try_parse_and_insert():
nonlocal logline
logline = logline.strip()
if not logline:
return
nonlocal base_timestamp
line = None
if state == LogType.gen:
line = Log._parse_gen_line(logline, base_timestamp)
elif state == LogType.maps_gen:
line = Log._parse_maps_gen_line(logline)
base_timestamp = line.timestamp
if line is not None:
lines.append(line)
else:
logger.warn(f"{self.name}: line was not parsed: {logline}")
logline = ""
with self.path.open() as logfile:
for line in logfile:
if line.startswith("LOG") or line.startswith("TID"):
try_parse_and_insert()
state = LogType.gen
elif line.startswith("["):
try_parse_and_insert()
state = LogType.maps_gen
logline += line
try_parse_and_insert()
return lines
@staticmethod
def _parse_gen_line(line: str, base_time: float = 0.0) -> LogLine:
m = GEN_LINE_PATTERN.match(line)
if m:
return LogLine(
timestamp=base_time + float(m["timestamp"]),
level=logging.getLevelName(m["level"]),
tid=int(m["tid"]),
message=m["message"],
type=LogType.gen,
)
m = GEN_LINE_CHECK_PATTERN.match(line)
if m:
return LogLine(
timestamp=None,
level=logging.getLevelName("CRITICAL"),
tid=None,
message=m["message"],
type=LogType.gen,
)
assert False, line
@staticmethod
def _parse_maps_gen_line(line: str) -> LogLine:
m = MAPS_GEN_LINE_PATTERN.match(line)
time_string = m["time_string"].split(",")[0]
timestamp = datetime.datetime.strptime(
time_string, logging.Formatter.default_time_format
).timestamp()
if m:
return LogLine(
timestamp=float(timestamp),
level=logging.getLevelName(m["level"]),
tid=None,
message=m["message"],
type=LogType.maps_gen,
)
assert False, line
class LogsReader:
def __init__(self, path: str):
self.path = os.path.abspath(os.path.expanduser(path))
def __iter__(self):
for filename in os.listdir(self.path):
if filename.endswith(".log"):
yield Log(os.path.join(self.path, filename))
def split_into_stages(log: Log) -> List[LogStage]:
log_stages = []
name = None
lines = []
for line in log.lines:
if line.message.startswith("Stage"):
m = STAGE_START_MSG_PATTERN.match(line.message)
if m:
if name is not None:
logger.warn(f"{log.name}: stage {name} has not finish line.")
log_stages.append(LogStage(name=name, duration=None, lines=lines))
name = m["name"]
m = STAGE_FINISH_MSG_PATTERN.match(line.message)
if m:
# assert name == m["name"], line
duration = parse_timedelta(m["duration_string"])
log_stages.append(LogStage(name=name, duration=duration, lines=lines))
name = None
lines = []
else:
lines.append(line)
if name is not None:
logger.warn(f"{log.name}: stage {name} has not finish line.")
log_stages.append(LogStage(name=name, duration=None, lines=lines))
return log_stages
def _is_worse(lhs: LogStage, rhs: LogStage) -> bool:
if (lhs.duration is None) ^ (rhs.duration is None):
return lhs.duration is None
if len(rhs.lines) > len(lhs.lines):
return True
return rhs.duration > lhs.duration
def normalize_logs(llogs: List[LogStage]) -> List[LogStage]:
normalized_logs = []
buckets = {}
for log in llogs:
if log.name in buckets:
if _is_worse(normalized_logs[buckets[log.name]], log):
normalized_logs[buckets[log.name]] = log
else:
normalized_logs.append(log)
buckets[log.name] = len(normalized_logs) - 1
return normalized_logs
def count_levels(logs: Union[List[LogLine], LogStage]) -> Counter:
if isinstance(logs, list):
return Counter(log.level for log in logs)
if isinstance(logs, LogStage):
return count_levels(logs.lines)
assert False, f"Type {type(logs)} is unsupported."
def find_and_parse(
logs: Union[List[LogLine], LogStage], pattern: Union[str, type(re.compile(""))],
) -> List[Tuple[dict, str]]:
if isinstance(pattern, str):
pattern = re.compile(pattern, FLAGS)
if isinstance(logs, list):
found = []
for log in logs:
m = pattern.match(log.message)
if m:
found.append((m.groupdict(), log))
return found
if isinstance(logs, LogStage):
return find_and_parse(logs.lines, pattern)
assert False, f"Type {type(logs)} is unsupported."