From 5ca31c5069c75507ab459a5f7cd662914a504abb Mon Sep 17 00:00:00 2001 From: Leonard Ehrenfried Date: Wed, 20 May 2026 13:09:12 +0200 Subject: [PATCH 1/5] Add fix script for rewriting STA ScheduledStopPoint IDs --- fix/rewrite_sta_ssp_ids.py | 121 +++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 fix/rewrite_sta_ssp_ids.py diff --git a/fix/rewrite_sta_ssp_ids.py b/fix/rewrite_sta_ssp_ids.py new file mode 100644 index 0000000..1aa6893 --- /dev/null +++ b/fix/rewrite_sta_ssp_ids.py @@ -0,0 +1,121 @@ +""" +Fix ScheduledStopPoint IDs to match SIRI feed format. + +Transforms IDs from: IT:ITH1:ScheduledStopPoint:it-22021-7010-51-32073: +to: IT:ITH10:ScheduledStopPoint:7010:51:32073 + +This is needed so that NeTEx and SIRI feeds reference the same stops. + +It is a mystery to me why this cannot be fixed at the source. +""" + +import dataclasses +import logging +import re +from pathlib import Path +from typing import Any + +from domain.netex.model import ( + PassengerStopAssignment, + Route, + RoutePoint, + RoutePointRef, + ScheduledStopPoint, + ScheduledStopPointRef, + ServiceJourneyPattern, + ServiceLink, + TimingLink, +) +from storage.mdbx.core.implementation import MdbxStorage +from utils.aux_logging import log_all, prepare_logger + +_PATTERN = re.compile(r'^.*:ScheduledStopPoint:it-22021-(.+):$') + + +def _new_id(old_id: str) -> str | None: + m = _PATTERN.match(old_id) + if m: + return 'IT:ITH10:ScheduledStopPoint:' + m.group(1).replace('-', ':') + return None + + +def _update_refs(obj: Any, id_map: dict[str, str]) -> bool: + if obj is None or not dataclasses.is_dataclass(obj) or isinstance(obj, type): + return False + modified = False + for f in dataclasses.fields(obj): + val = getattr(obj, f.name) + if isinstance(val, (ScheduledStopPointRef, RoutePointRef)): + new_ref = id_map.get(val.ref) + if new_ref is not None: + val.ref = new_ref + modified = True + elif isinstance(val, list): + for item in val: + if _update_refs(item, id_map): + modified = True + elif dataclasses.is_dataclass(val): + if _update_refs(val, id_map): + modified = True + return modified + + +# Object types that may transitively contain ScheduledStopPointRef or RoutePointRef. +_REF_BEARING_TYPES = [ + ServiceJourneyPattern, + ServiceLink, + TimingLink, + PassengerStopAssignment, + Route, + RoutePoint, +] + + +def fix_ssp_ids(database: Path) -> None: + with MdbxStorage(database, readonly=False) as db: + with db.env.rw_transaction() as txn: + id_map: dict[str, str] = {} + old_ssps: list[ScheduledStopPoint] = [] + for ssp in db.iter_only_objects(txn, ScheduledStopPoint): + new_id = _new_id(ssp.id) + if new_id is not None: + id_map[ssp.id] = new_id + old_ssps.append(ssp) + + print(f"{len(id_map)} ScheduledStopPoints to rewrite") + + updated: list[Any] = [] + for cls in _REF_BEARING_TYPES: + for obj in db.iter_only_objects(txn, cls): + if _update_refs(obj, id_map): + updated.append(obj) + + new_ssps = [dataclasses.replace(ssp, id=id_map[ssp.id]) for ssp in old_ssps] + + print(f"Updating refs in {len(updated)} objects") + print(f"Inserting {len(new_ssps)} renamed ScheduledStopPoints") + # TODO: delete the old ScheduledStopPoint objects (no delete API available yet) + + db.insert_any_object_on_queue(txn, updated) + db.insert_any_object_on_queue(txn, new_ssps) + txn.commit() + + +def main(source_database_file: str) -> None: + fix_ssp_ids(Path(source_database_file)) + + +if __name__ == "__main__": + import argparse + import traceback + + parser = argparse.ArgumentParser(description="Fix ScheduledStopPoint IDs to SIRI format") + parser.add_argument("source", type=str, help="mdbx file to fix in-place") + parser.add_argument("--log_file", type=str, required=False, help="log file path") + args = parser.parse_args() + prepare_logger(logging.INFO, args.log_file) + try: + main(args.source) + except Exception as e: + log_all(logging.ERROR, f"{e}") + raise e From 81bc47a84d59ef24568a08612e8d4c795c1abf67 Mon Sep 17 00:00:00 2001 From: Leonard Ehrenfried Date: Wed, 20 May 2026 15:14:20 +0200 Subject: [PATCH 2/5] Use generators everywhere --- fix/rewrite_sta_ssp_ids.py | 52 ++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/fix/rewrite_sta_ssp_ids.py b/fix/rewrite_sta_ssp_ids.py index 1aa6893..0ac6629 100644 --- a/fix/rewrite_sta_ssp_ids.py +++ b/fix/rewrite_sta_ssp_ids.py @@ -12,6 +12,7 @@ import dataclasses import logging import re +from collections.abc import Generator from pathlib import Path from typing import Any @@ -39,23 +40,23 @@ def _new_id(old_id: str) -> str | None: return None -def _update_refs(obj: Any, id_map: dict[str, str]) -> bool: +def _update_refs(obj: Any) -> bool: if obj is None or not dataclasses.is_dataclass(obj) or isinstance(obj, type): return False modified = False for f in dataclasses.fields(obj): val = getattr(obj, f.name) if isinstance(val, (ScheduledStopPointRef, RoutePointRef)): - new_ref = id_map.get(val.ref) + new_ref = _new_id(val.ref) if new_ref is not None: val.ref = new_ref modified = True elif isinstance(val, list): for item in val: - if _update_refs(item, id_map): + if _update_refs(item): modified = True elif dataclasses.is_dataclass(val): - if _update_refs(val, id_map): + if _update_refs(val): modified = True return modified @@ -71,33 +72,30 @@ def _update_refs(obj: Any, id_map: dict[str, str]) -> bool: ] +def _iter_updated_objects( + db: MdbxStorage, txn: Any, +) -> Generator[Any, None, None]: + for cls in _REF_BEARING_TYPES: + for obj in db.iter_only_objects(txn, cls): + if _update_refs(obj): + yield obj + + +def _iter_renamed_ssps( + db: MdbxStorage, txn: Any, +) -> Generator[ScheduledStopPoint, None, None]: + for ssp in db.iter_only_objects(txn, ScheduledStopPoint): + new_id = _new_id(ssp.id) + if new_id is not None: + yield dataclasses.replace(ssp, id=new_id) + + def fix_ssp_ids(database: Path) -> None: with MdbxStorage(database, readonly=False) as db: with db.env.rw_transaction() as txn: - id_map: dict[str, str] = {} - old_ssps: list[ScheduledStopPoint] = [] - for ssp in db.iter_only_objects(txn, ScheduledStopPoint): - new_id = _new_id(ssp.id) - if new_id is not None: - id_map[ssp.id] = new_id - old_ssps.append(ssp) - - print(f"{len(id_map)} ScheduledStopPoints to rewrite") - - updated: list[Any] = [] - for cls in _REF_BEARING_TYPES: - for obj in db.iter_only_objects(txn, cls): - if _update_refs(obj, id_map): - updated.append(obj) - - new_ssps = [dataclasses.replace(ssp, id=id_map[ssp.id]) for ssp in old_ssps] - - print(f"Updating refs in {len(updated)} objects") - print(f"Inserting {len(new_ssps)} renamed ScheduledStopPoints") # TODO: delete the old ScheduledStopPoint objects (no delete API available yet) - - db.insert_any_object_on_queue(txn, updated) - db.insert_any_object_on_queue(txn, new_ssps) + db.insert_any_object_on_queue(txn, _iter_updated_objects(db, txn)) + db.insert_any_object_on_queue(txn, _iter_renamed_ssps(db, txn)) txn.commit() From fbf72e25696555855614b3a88d83273613c39aff Mon Sep 17 00:00:00 2001 From: Leonard Ehrenfried Date: Wed, 20 May 2026 15:44:35 +0200 Subject: [PATCH 3/5] Use recursive_attributes --- fix/rewrite_sta_ssp_ids.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/fix/rewrite_sta_ssp_ids.py b/fix/rewrite_sta_ssp_ids.py index 0ac6629..bc997b5 100644 --- a/fix/rewrite_sta_ssp_ids.py +++ b/fix/rewrite_sta_ssp_ids.py @@ -27,6 +27,7 @@ ServiceLink, TimingLink, ) +from domain.netex.services.recursive_attributes import recursive_attributes from storage.mdbx.core.implementation import MdbxStorage from utils.aux_logging import log_all, prepare_logger @@ -41,22 +42,12 @@ def _new_id(old_id: str) -> str | None: def _update_refs(obj: Any) -> bool: - if obj is None or not dataclasses.is_dataclass(obj) or isinstance(obj, type): - return False modified = False - for f in dataclasses.fields(obj): - val = getattr(obj, f.name) - if isinstance(val, (ScheduledStopPointRef, RoutePointRef)): - new_ref = _new_id(val.ref) + for ref, _path in recursive_attributes(obj, []): + if isinstance(ref, (ScheduledStopPointRef, RoutePointRef)): + new_ref = _new_id(ref.ref) if new_ref is not None: - val.ref = new_ref - modified = True - elif isinstance(val, list): - for item in val: - if _update_refs(item): - modified = True - elif dataclasses.is_dataclass(val): - if _update_refs(val): + ref.ref = new_ref modified = True return modified From 070c3b422a45c31025085ca200cbcfe1832f6a25 Mon Sep 17 00:00:00 2001 From: Leonard Ehrenfried Date: Wed, 20 May 2026 16:58:38 +0200 Subject: [PATCH 4/5] Inline method --- fix/rewrite_sta_ssp_ids.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/fix/rewrite_sta_ssp_ids.py b/fix/rewrite_sta_ssp_ids.py index bc997b5..c3d0a5e 100644 --- a/fix/rewrite_sta_ssp_ids.py +++ b/fix/rewrite_sta_ssp_ids.py @@ -41,17 +41,6 @@ def _new_id(old_id: str) -> str | None: return None -def _update_refs(obj: Any) -> bool: - modified = False - for ref, _path in recursive_attributes(obj, []): - if isinstance(ref, (ScheduledStopPointRef, RoutePointRef)): - new_ref = _new_id(ref.ref) - if new_ref is not None: - ref.ref = new_ref - modified = True - return modified - - # Object types that may transitively contain ScheduledStopPointRef or RoutePointRef. _REF_BEARING_TYPES = [ ServiceJourneyPattern, @@ -68,7 +57,14 @@ def _iter_updated_objects( ) -> Generator[Any, None, None]: for cls in _REF_BEARING_TYPES: for obj in db.iter_only_objects(txn, cls): - if _update_refs(obj): + changed = False + for ref, _path in recursive_attributes(obj, []): + if isinstance(ref, (ScheduledStopPointRef, RoutePointRef)): + new_ref = _new_id(ref.ref) + if new_ref is not None: + ref.ref = new_ref + changed = True + if changed: yield obj From 2eda928075cb371ce89f0fd1f2cc6b27b42c5b05 Mon Sep 17 00:00:00 2001 From: Leonard Ehrenfried Date: Thu, 21 May 2026 12:21:16 +0200 Subject: [PATCH 5/5] Add test script --- test/rewrite_sta_ssp_ids.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 test/rewrite_sta_ssp_ids.py diff --git a/test/rewrite_sta_ssp_ids.py b/test/rewrite_sta_ssp_ids.py new file mode 100644 index 0000000..b482790 --- /dev/null +++ b/test/rewrite_sta_ssp_ids.py @@ -0,0 +1,10 @@ +import unittest + +from fix.rewrite_sta_ssp_ids import main + +class FixSSPTestCase(unittest.TestCase): + def test(self): + main("sta.lmdb") + +if __name__ == '__main__': + unittest.main() \ No newline at end of file