From 606d0fd2a3017d916aa13222f94456e6205bd027 Mon Sep 17 00:00:00 2001 From: Thomas Seiler Date: Mon, 16 Jun 2025 07:55:31 +0200 Subject: [PATCH 01/12] debug: enable faulthandler reporting for parallelized child interpreters --- strictdoc/helpers/parallelizer.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/strictdoc/helpers/parallelizer.py b/strictdoc/helpers/parallelizer.py index 1b1133db1..7a4f088ac 100644 --- a/strictdoc/helpers/parallelizer.py +++ b/strictdoc/helpers/parallelizer.py @@ -19,7 +19,9 @@ def map_does_not_work(self, contents, processing_func): """ import atexit +import faulthandler import multiprocessing +import os import sys import traceback from abc import ABC, abstractmethod @@ -82,6 +84,14 @@ def __init__(self) -> None: ) process_number = fixed_process_number + # + # FIXME: Debugging + # Bug: Process parallelization has become flaky on Windows (rarely) #2121 + # https://github.com/strictdoc-project/strictdoc/issues/2121 + # Here we enable unbuffered prints to stdout and stderr + # + os.environ["PYTHONUNBUFFERED"] = "1" + self.processes = [ multiprocessing.Process( target=MultiprocessingParallelizer._run, @@ -184,6 +194,14 @@ def _run( input_queue: "multiprocessing.Queue[Tuple[int, Any, MultiprocessingLambdaType]]", output_queue: "multiprocessing.Queue[Tuple[int, Any]]", ) -> None: + # + # FIXME: Debugging: + # Bug: Process parallelization has become flaky on Windows (rarely) #2121 + # https://github.com/strictdoc-project/strictdoc/issues/2121 + # Enable the faulthandler to get stack trace on python interpreter errors + # + faulthandler.enable() + register_code_coverage_hook() def close_queues() -> None: From 1ddfb0656b6d05013680089e4f4c8f041370c837 Mon Sep 17 00:00:00 2001 From: Thomas Seiler Date: Mon, 16 Jun 2025 19:19:06 +0200 Subject: [PATCH 02/12] trigger build by commiting a new line --- strictdoc/helpers/parallelizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/strictdoc/helpers/parallelizer.py b/strictdoc/helpers/parallelizer.py index 7a4f088ac..04951095b 100644 --- a/strictdoc/helpers/parallelizer.py +++ b/strictdoc/helpers/parallelizer.py @@ -91,6 +91,7 @@ def __init__(self) -> None: # Here we enable unbuffered prints to stdout and stderr # os.environ["PYTHONUNBUFFERED"] = "1" + self.processes = [ multiprocessing.Process( From 15e12a6afe092145a8885f9530ad4be3447fa861 Mon Sep 17 00:00:00 2001 From: Thomas Seiler Date: Mon, 16 Jun 2025 19:26:47 +0200 Subject: [PATCH 03/12] Update parallelizer.py --- strictdoc/helpers/parallelizer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/strictdoc/helpers/parallelizer.py b/strictdoc/helpers/parallelizer.py index 04951095b..7a4f088ac 100644 --- a/strictdoc/helpers/parallelizer.py +++ b/strictdoc/helpers/parallelizer.py @@ -91,7 +91,6 @@ def __init__(self) -> None: # Here we enable unbuffered prints to stdout and stderr # os.environ["PYTHONUNBUFFERED"] = "1" - self.processes = [ multiprocessing.Process( From 5774b95d8f7251f8821e6af7b9a7b582dea41455 Mon Sep 17 00:00:00 2001 From: Thomas Seiler Date: Mon, 16 Jun 2025 21:20:23 +0200 Subject: [PATCH 04/12] use buffered io, unbuffered seems to be too slow... --- strictdoc/helpers/parallelizer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/strictdoc/helpers/parallelizer.py b/strictdoc/helpers/parallelizer.py index 7a4f088ac..d83780818 100644 --- a/strictdoc/helpers/parallelizer.py +++ b/strictdoc/helpers/parallelizer.py @@ -21,7 +21,6 @@ def map_does_not_work(self, contents, processing_func): import atexit import faulthandler import multiprocessing -import os import sys import traceback from abc import ABC, abstractmethod @@ -90,7 +89,7 @@ def __init__(self) -> None: # https://github.com/strictdoc-project/strictdoc/issues/2121 # Here we enable unbuffered prints to stdout and stderr # - os.environ["PYTHONUNBUFFERED"] = "1" + # os - . environ["PYTHONUNBUFFERED"] = "1" self.processes = [ multiprocessing.Process( From b369b41995a69c949f923a2b1284268d92404107 Mon Sep 17 00:00:00 2001 From: Thomas Seiler Date: Mon, 16 Jun 2025 21:58:33 +0200 Subject: [PATCH 05/12] commit one line to trigger a build --- strictdoc/helpers/parallelizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/strictdoc/helpers/parallelizer.py b/strictdoc/helpers/parallelizer.py index d83780818..3b06b745f 100644 --- a/strictdoc/helpers/parallelizer.py +++ b/strictdoc/helpers/parallelizer.py @@ -199,6 +199,7 @@ def _run( # https://github.com/strictdoc-project/strictdoc/issues/2121 # Enable the faulthandler to get stack trace on python interpreter errors # + # faulthandler.enable() register_code_coverage_hook() From ea86e168aa28b8032449630033ba3342aaf682ad Mon Sep 17 00:00:00 2001 From: Thomas Seiler Date: Mon, 16 Jun 2025 22:17:33 +0200 Subject: [PATCH 06/12] remove one line to trigger a built --- strictdoc/helpers/parallelizer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/strictdoc/helpers/parallelizer.py b/strictdoc/helpers/parallelizer.py index 3b06b745f..d83780818 100644 --- a/strictdoc/helpers/parallelizer.py +++ b/strictdoc/helpers/parallelizer.py @@ -199,7 +199,6 @@ def _run( # https://github.com/strictdoc-project/strictdoc/issues/2121 # Enable the faulthandler to get stack trace on python interpreter errors # - # faulthandler.enable() register_code_coverage_hook() From b32eecad00544512f8a8d13be47233922cda2480 Mon Sep 17 00:00:00 2001 From: Thomas Seiler Date: Wed, 18 Jun 2025 08:03:48 +0200 Subject: [PATCH 07/12] this dumps the stack of the hanging child using py-spy --- requirements.check.txt | 3 +++ strictdoc/helpers/parallelizer.py | 20 +++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/requirements.check.txt b/requirements.check.txt index 46c722a0c..1bf5980ee 100644 --- a/requirements.check.txt +++ b/requirements.check.txt @@ -34,3 +34,6 @@ requests>=2.27.1 # Test HTML2PDF-produced artifacts in the integration tests. pypdf + +# investigate the stack of child processes when they hang +py-spy diff --git a/strictdoc/helpers/parallelizer.py b/strictdoc/helpers/parallelizer.py index d83780818..5811b54d8 100644 --- a/strictdoc/helpers/parallelizer.py +++ b/strictdoc/helpers/parallelizer.py @@ -21,11 +21,12 @@ def map_does_not_work(self, contents, processing_func): import atexit import faulthandler import multiprocessing +import subprocess import sys import traceback from abc import ABC, abstractmethod from queue import Empty -from typing import Any, Callable, Iterable, Tuple, Union +from typing import Any, Callable, Iterable, Optional, Tuple, Union from strictdoc import environment from strictdoc.helpers.coverage import register_code_coverage_hook @@ -132,6 +133,7 @@ def shutdown(self) -> None: f"failed to join within timeout.", flush=True, ) + MultiprocessingParallelizer.inspect_with_pyspy(process_.pid) process_.terminate() process_.join() @@ -235,6 +237,22 @@ def close_queues() -> None: sys.stdout.flush() sys.stderr.flush() + @staticmethod + def inspect_with_pyspy(pid: Optional[int]) -> None: + if pid is not None: + try: + result = subprocess.run( + ["py-spy", "dump", "--pid", str(pid)], + capture_output=True, + check=True, + text=True, + ) + print(result.stdout) # noqa: T201 + if result.stderr: + print("[py-spy stderr]:", result.stderr, file=sys.stderr) # noqa: T201 + except Exception as e: + print(f"[Error] Failed to run py-spy: {e}") # noqa: T201 + class NullParallelizer(Parallelizer): def run_parallel( From 2d1c81f6c3d05887cfd0b916686eca4c4aeca620 Mon Sep 17 00:00:00 2001 From: Thomas Seiler Date: Wed, 18 Jun 2025 08:43:07 +0200 Subject: [PATCH 08/12] actually signal the error in CI to we get a log... --- strictdoc/helpers/parallelizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/strictdoc/helpers/parallelizer.py b/strictdoc/helpers/parallelizer.py index 5811b54d8..dd85fa743 100644 --- a/strictdoc/helpers/parallelizer.py +++ b/strictdoc/helpers/parallelizer.py @@ -136,6 +136,7 @@ def shutdown(self) -> None: MultiprocessingParallelizer.inspect_with_pyspy(process_.pid) process_.terminate() process_.join() + bad_child_exit_code = True for process_ in self.processes: if process_.exitcode != 0: From 5b960c611b6f9fdc762f855eec29ee0ff072d2de Mon Sep 17 00:00:00 2001 From: Thomas Seiler Date: Wed, 18 Jun 2025 09:14:10 +0200 Subject: [PATCH 09/12] try again... --- strictdoc/helpers/parallelizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/strictdoc/helpers/parallelizer.py b/strictdoc/helpers/parallelizer.py index dd85fa743..70df85b42 100644 --- a/strictdoc/helpers/parallelizer.py +++ b/strictdoc/helpers/parallelizer.py @@ -91,6 +91,7 @@ def __init__(self) -> None: # Here we enable unbuffered prints to stdout and stderr # # os - . environ["PYTHONUNBUFFERED"] = "1" + # self.processes = [ multiprocessing.Process( From 17a4a2af45242d5d9f92e1f8f97817a1bc91dc32 Mon Sep 17 00:00:00 2001 From: Thomas Seiler Date: Wed, 18 Jun 2025 09:32:27 +0200 Subject: [PATCH 10/12] try again --- strictdoc/helpers/parallelizer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/strictdoc/helpers/parallelizer.py b/strictdoc/helpers/parallelizer.py index 70df85b42..dd85fa743 100644 --- a/strictdoc/helpers/parallelizer.py +++ b/strictdoc/helpers/parallelizer.py @@ -91,7 +91,6 @@ def __init__(self) -> None: # Here we enable unbuffered prints to stdout and stderr # # os - . environ["PYTHONUNBUFFERED"] = "1" - # self.processes = [ multiprocessing.Process( From a4177056e49a90ab563fb13474178ecd66c24821 Mon Sep 17 00:00:00 2001 From: Thomas Seiler Date: Wed, 18 Jun 2025 11:43:08 +0200 Subject: [PATCH 11/12] seems difficult to trigger this today, interesting... --- strictdoc/helpers/parallelizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/strictdoc/helpers/parallelizer.py b/strictdoc/helpers/parallelizer.py index dd85fa743..89678af8d 100644 --- a/strictdoc/helpers/parallelizer.py +++ b/strictdoc/helpers/parallelizer.py @@ -91,6 +91,7 @@ def __init__(self) -> None: # Here we enable unbuffered prints to stdout and stderr # # os - . environ["PYTHONUNBUFFERED"] = "1" + # self.processes = [ multiprocessing.Process( From bd635775bea64a35b0185f238696ce55ff13f912 Mon Sep 17 00:00:00 2001 From: Thomas Seiler Date: Wed, 18 Jun 2025 11:49:29 +0200 Subject: [PATCH 12/12] fix the format issue and try again... --- strictdoc/helpers/parallelizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strictdoc/helpers/parallelizer.py b/strictdoc/helpers/parallelizer.py index 89678af8d..70df85b42 100644 --- a/strictdoc/helpers/parallelizer.py +++ b/strictdoc/helpers/parallelizer.py @@ -91,7 +91,7 @@ def __init__(self) -> None: # Here we enable unbuffered prints to stdout and stderr # # os - . environ["PYTHONUNBUFFERED"] = "1" - # + # self.processes = [ multiprocessing.Process(