From 7f6abac89932177ba962a7f2d81c4db1099a2d35 Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sat, 9 May 2026 23:47:31 +0300 Subject: [PATCH 1/4] fix(ghidra): use absolute paths for Docker volume mounts Converted filenames to absolute paths before passing them to Docker. This ensures correct bind-mounting and avoids 400 Bad Request errors on Linux hosts. Fixes: #7 Signed-off-by: Andrei Carp --- commons/ghidra/ghidra.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commons/ghidra/ghidra.py b/commons/ghidra/ghidra.py index 2cef9eb..ecc206b 100644 --- a/commons/ghidra/ghidra.py +++ b/commons/ghidra/ghidra.py @@ -32,7 +32,7 @@ class GhidraAnalysis: calls: typing.Set[str] def __init__(self, filename: str) -> None: - self.filename = filename + self.filename = os.path.abspath(filename) self.decompiled_code = "" self.calls = set() From 993ea798f793376d6ab550d3cc194ae2528bb3c9 Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sun, 10 May 2026 00:13:08 +0300 Subject: [PATCH 2/4] fix(ghidra): map custom C types for pycparser compatibility Added replacements for Ghidra-specific types like undefined8, bool, and uint in the decompiled code. This prevents ParseError when pycparser processes the main function. Signed-off-by: Andrei Carp --- commons/ghidra/ghidra.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/commons/ghidra/ghidra.py b/commons/ghidra/ghidra.py index ecc206b..840b217 100644 --- a/commons/ghidra/ghidra.py +++ b/commons/ghidra/ghidra.py @@ -118,10 +118,22 @@ def __process_decompiled_code(self, lines: typing.List[str]) -> str: return code def __replace_undefs(self, code: str) -> None: - return code.replace("undefined4", "int").replace("undefined", "char") + return ( + code.replace("undefined8", "long long") + .replace("undefined4", "int") + .replace("undefined2", "short") + .replace("undefined1", "char") + .replace("undefined", "char") + ) def __replace_longs(self, code: str) -> None: - return code.replace("char8", "long") + return ( + code.replace("char8", "long") + .replace("ulong", "unsigned long") + .replace("uint", "unsigned int") + .replace("ushort", "unsigned short") + .replace("bool", "int") + ) def __replace_double_lines(self, code: str) -> None: return code.replace("\n\n", "\n") From 20b423dc9f96df2e2d17036f09f8130e4b9a11da Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sun, 10 May 2026 03:46:15 +0300 Subject: [PATCH 3/4] feat(ghidra): support stripped binaries and clean decompilation artifacts Implemented fallback to Entry Point in decompile_function.py when 'main' is missing. Updated ghidra.py with a robust regex to strip all C-style comments and a cleaner for Ghidra-specific artifacts (processEntry, PTR_FUN_, stack references) to ensure compatibility with pycparser. Fixes: #9 Signed-off-by: Andrei Carp --- commons/ghidra/ghidra.py | 22 ++++++---- commons/ghidra/scripts/decompile_function.py | 43 ++++++++++++++++---- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/commons/ghidra/ghidra.py b/commons/ghidra/ghidra.py index 840b217..026a79e 100644 --- a/commons/ghidra/ghidra.py +++ b/commons/ghidra/ghidra.py @@ -2,6 +2,7 @@ import subprocess import typing from enum import Enum +import re import docker @@ -112,6 +113,7 @@ def __process_decompiled_code(self, lines: typing.List[str]) -> str: code = self.__replace_undefs(code) code = self.__replace_longs(code) + code = self.__replace_ghidra_artifacts(code) code = self.__replace_double_lines(code) code = self.__replace_comments_for_pycparser(code) @@ -135,17 +137,21 @@ def __replace_longs(self, code: str) -> None: .replace("bool", "int") ) + def __replace_ghidra_artifacts(self, code: str) -> str: + # Remove Ghidra specific keywords and formatting that break pycparser + code = code.replace("processEntry", "") + code = re.sub(r'PTR_FUN_[0-9a-f]+', '0', code) + code = re.sub(r'FUN_[0-9a-f]+', '0', code) + code = re.sub(r'&stack0x[0-9a-f]+', '0', code) + return code + def __replace_double_lines(self, code: str) -> None: return code.replace("\n\n", "\n") - def __replace_comments_for_pycparser(self, code: str) -> None: - # pycparser won't be able to parse lines with comments. - no_comments_code = [] - for line in code.splitlines(): - if COMMENT_PREFIX not in line: - no_comments_code.append(line) - - return "\n".join(no_comments_code) + def __replace_comments_for_pycparser(self, code: str) -> str: + # Remove all /* ... */ blocks + code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL) + return "\n".join([line for line in code.splitlines() if line.strip()]) def decompile_function(self, function_name: str) -> str: analysis_report = self.__run_headless_ghidra( diff --git a/commons/ghidra/scripts/decompile_function.py b/commons/ghidra/scripts/decompile_function.py index 1a9cb31..a887ead 100644 --- a/commons/ghidra/scripts/decompile_function.py +++ b/commons/ghidra/scripts/decompile_function.py @@ -11,20 +11,47 @@ def main(): - function_name = getScriptArgs()[0] - if not function_name: - exit(1) - + args = getScriptArgs() + if not args: + print("ERROR: No function name provided") + return + + function_name = args[0] program = getCurrentProgram() decompiler = DecompInterface() decompiler.openProgram(program) - function = getGlobalFunctions(function_name)[0] + # Try 1: Find by name (e.g., 'main') + functions = getGlobalFunctions(function_name) + function = None + + if functions: + function = functions[0] + else: + # Try 2: Look for any symbol containing 'entry' or 'main' + symbol_table = program.getSymbolTable() + for symbol in symbol_table.getAllSymbols(True): + if symbol.getName() in ["entry", "_start", "main"]: + function = getFunctionAt(symbol.getAddress()) + if function: break + + # Try 3: Force get function at the primary entry point address + if not function: + entry_points = program.getSymbolTable().getExternalSymbols("entry") + # If still nothing, pick the absolute first function in the manager + if not function: + function = program.getFunctionManager().getFunctions(True).next() + + if not function: + print("ERROR: No functions identified in the binary.") + return results = decompiler.decompileFunction(function, 0, ConsoleTaskMonitor()) - code = results.getDecompiledFunction().getC() - - print(code) + if results and results.getDecompiledFunction(): + code = results.getDecompiledFunction().getC() + print(code) + else: + print("ERROR: Decompilation failed for function at {}".format(function.getEntryPoint())) if __name__ == "__main__": From 5f17997c4015f62a5972ebf921e733d1b434c73b Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sun, 10 May 2026 16:28:33 +0300 Subject: [PATCH 4/4] build(deps): stabilize python version and pin dependencies in commons Updated pyproject.toml to use a fixed Python version (3.12.7) and ensured all core dependencies are pinned to exact versions for consistency across the CRS environment. Signed-off-by: Andrei Carp --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 978d17e..6f7ae30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,9 +7,9 @@ license = "MIT" readme = "README.md" [tool.poetry.dependencies] -python = "^3.10" -pwntools = "^4.10.0" -docker = "^6.1.2" +python = "==3.12.7" +pwntools = "==4.15.0" +docker = "==7.1.0" [build-system] requires = ["poetry-core"]