From d6c290b3620892ecf8ba95e5318659b6cadd68ad Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sat, 9 May 2026 19:56:25 +0300 Subject: [PATCH 01/15] fix(generate): handle missing or inaccessible files in man_parsing Fixes: #10 Signed-off-by: Andrei Carp --- .../dictionaries_generators/heuristics/man_parsing.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/attack_surface_approximation/dictionaries_generators/heuristics/man_parsing.py b/attack_surface_approximation/dictionaries_generators/heuristics/man_parsing.py index 94eed32..b1fe403 100644 --- a/attack_surface_approximation/dictionaries_generators/heuristics/man_parsing.py +++ b/attack_surface_approximation/dictionaries_generators/heuristics/man_parsing.py @@ -22,18 +22,15 @@ def __get_arguments_from_manual( unescape: typing.Callable = None, ) -> typing.Generator[str, None, None]: try: - manual = gzip.open(filename, "rt") - except: - return - - try: - content = manual.read() - except UnicodeDecodeError: + with gzip.open(filename, "rt") as manual: + content = manual.read() + except (UnicodeDecodeError, FileNotFoundError, OSError): return if unescape: content = unescape(content) + arguments = filter_func(content) yield from arguments From 8f99447d02d90b472ac4351f62b079040bc87b1b Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sat, 9 May 2026 20:51:41 +0300 Subject: [PATCH 02/15] fix(generate): validate ELF path for binary_pattern_matching Added a check in the CLI to ensure the --elf option is provided when using binary_pattern_matching, preventing crashes and misleading results. Fixes: #12 Signed-off-by: Andrei Carp --- attack_surface_approximation/cli.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/attack_surface_approximation/cli.py b/attack_surface_approximation/cli.py index 1d0984c..84939cc 100644 --- a/attack_surface_approximation/cli.py +++ b/attack_surface_approximation/cli.py @@ -54,6 +54,10 @@ def cli() -> None: ), ) def generate(heuristic: str, output: str, top: int, elf: str = None) -> None: + if heuristic == "binary_pattern_matching" and elf is None: + print("[ERROR] The 'binary_pattern_matching' heuristic requires an ELF file. Please provide one using the --elf option.") + return + generator = ArgumentsGenerator() generator.generate(heuristic, elf) arguments_count = generator.dump(output, top_count=top) From b5d169aee984644db38b7c1cf63e7162b3e4eef0 Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sat, 9 May 2026 23:23:35 +0300 Subject: [PATCH 03/15] fix(deps): add local commons and update Docker SDK Added commons library as a local path dependency to fix ModuleNotFoundError and updated Docker SDK to 7.1.0 to support modern URL schemes. Fixes: #13, Fixes: #15 Signed-off-by: Andrei Carp --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ee6d50d..9ed3c52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,10 +9,9 @@ commons = { path = "../commons", develop = false } python = "^3.10" pycparser = "^2.21" pyelftools = "^0.29" -docker = "^6.1.2" +docker = "==7.1.0" rich = "^12.5.1" click = "^8.1.3" -requests = "2.31.0" [tool.poetry.dev-dependencies] black = "^24.3.0" From 66b28de9f16a90320e89ca04b459a57fcf4fcbc0 Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sun, 10 May 2026 00:24:04 +0300 Subject: [PATCH 04/15] fix(qbdi): use configured executable instead of hardcoded uname Replaced the hardcoded 'uname' command with the dynamic CONTAINER_EXECUTABLE from configuration. This ensures the fuzzer analyzes the intended binary. Signed-off-by: Andrei Carp --- .../arguments_fuzzing/qbdi_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py index 4b9b402..5aa7a85 100644 --- a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py +++ b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py @@ -153,11 +153,11 @@ def __build_analyze_command( stringified_arguments = argument.to_str() stdin_avoidance_command = "echo '\n' |" if timeout_retry else "" - return ( # TODO: {self.__configuration.CONTAINER_EXECUTABLE} + return ( f"timeout {self.timeout} sh -c " f"'{stdin_avoidance_command} LD_BIND_NOW=1 " "LD_PRELOAD=./libqbdi_tracer.so " - "uname " + f"{self.__configuration.CONTAINER_EXECUTABLE} " f"{stringified_arguments}'" ) From 56e8994b13a0418fe6e32695e910b6d3ecafbf71 Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sun, 10 May 2026 00:33:56 +0300 Subject: [PATCH 05/15] fix(tracer): prevent buffer underflow during baseline execution Added a check for argc > 1 before accessing command line arguments in the C tracer. This fixes intermittent crashes when running the binary without arguments during calibration. Signed-off-by: Andrei Carp --- .../qbdi_analysis_scripts/qbdi_preload_template.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis_scripts/qbdi_preload_template.c b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis_scripts/qbdi_preload_template.c index fa04b9d..01349df 100644 --- a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis_scripts/qbdi_preload_template.c +++ b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis_scripts/qbdi_preload_template.c @@ -173,10 +173,14 @@ int qbdipreload_on_main(int argc, char **argv) { // Copy the arguments for (i = 1; i < argc; i++) { + if (strlen(command_line) + strlen(argv[i]) + 2 >= MAX_ARGS_LENGTH) + break; strcat(command_line, argv[i]); strcat(command_line, " "); } - command_line[strlen(command_line) - 1] = '\0'; + if (argc > 1 && strlen(command_line) > 0) { + command_line[strlen(command_line) - 1] = '\0'; + } return QBDIPRELOAD_NOT_HANDLED; } From 73f3110be8ff9e7110ff3c8ab9173af640d42d22 Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sun, 10 May 2026 00:39:42 +0300 Subject: [PATCH 06/15] fix(tracer): fix heap corruption in memory segment tracking Introduced a separate counter for executable segments to avoid out-of-bounds writes. Previously, the global map index was used for a restricted array, causing SIGSEGV. Signed-off-by: Andrei Carp --- .../qbdi_analysis_scripts/qbdi_preload_template.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis_scripts/qbdi_preload_template.c b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis_scripts/qbdi_preload_template.c index 01349df..6f4933a 100644 --- a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis_scripts/qbdi_preload_template.c +++ b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis_scripts/qbdi_preload_template.c @@ -81,7 +81,7 @@ char *encode_command_line(const unsigned char *command, size_t length) { static VMAction show_basic_block_callback(VMInstanceRef vm, const VMState* vmState, GPRState* gprState, FPRState* fprState, void* data) { size_t start_address, end_address; int abstract_address; - char parent_segment = -1, i; + int parent_segment = -1, i; // Check if the program reached main if (!start_trace) return QBDI_CONTINUE; @@ -102,6 +102,10 @@ static VMAction show_basic_block_callback(VMInstanceRef vm, const VMState* vmSta } } + // Safety check: if parent segment not found, skip this block + if (parent_segment == -1) + return QBDI_CONTINUE; + // Compute the abstract address start_address -= segments[parent_segment].start; abstract_address = (parent_segment << 24) + start_address; @@ -188,7 +192,7 @@ int qbdipreload_on_main(int argc, char **argv) { void get_segments() { qbdi_MemoryMap *maps; size_t maps_count; - int i; + int i, j = 0; // Get the memory maps maps = qbdi_getCurrentProcessMaps(false, &maps_count); @@ -206,8 +210,9 @@ void get_segments() { // Store the segments for (i = 0; i < maps_count; i++) { if (maps[i].permission >= QBDI_PF_EXEC && maps[i].end < MIN_MAPPED_ADDRESS) { - segments[i].start = maps[i].start; - segments[i].end = maps[i].end; + segments[j].start = maps[i].start; + segments[j].end = maps[i].end; + j++; } } } From 6b685357ff41289946c3a5b3c1a81856e606562d Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sun, 10 May 2026 01:10:05 +0300 Subject: [PATCH 07/15] fix(tracer): prevent memory corruption during hash generation Migrated from stack allocation to dynamic allocation (malloc) for the hashed buffer and increased its size. This prevents stack corruption caused by buffer overflow when processing a large number of basic blocks. Signed-off-by: Andrei Carp --- .../qbdi_preload_template.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis_scripts/qbdi_preload_template.c b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis_scripts/qbdi_preload_template.c index 6f4933a..291135e 100644 --- a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis_scripts/qbdi_preload_template.c +++ b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis_scripts/qbdi_preload_template.c @@ -17,6 +17,7 @@ #define BLOCKS_USED_IN_HASH 10000 #define MAX_ARGS_LENGTH 100 #define OUTPUT_FOLDER "results/" +#define HASH_BUF_SIZE (BLOCKS_USED_IN_HASH * 10) // Enough for 10000 hex ints /* Structures */ @@ -234,16 +235,19 @@ int qbdipreload_on_run(VMInstanceRef vm, rword start, rword stop) { int qbdipreload_on_exit(int status) { FILE *output_file; - char hashed[2 * BLOCKS_USED_IN_HASH * sizeof(int)] = {'\0'}; - char current_hash[2 * sizeof(int)]; + char *hashed = malloc(HASH_BUF_SIZE); + char current_hash[16]; char output_filename[2 * MAX_ARGS_LENGTH + sizeof(OUTPUT_FOLDER) + 1] = {'\0'}; int *p; int i = 0; - char uses_canaries_str; + + if (!hashed) return QBDIPRELOAD_NO_ERROR; + memset(hashed, 0, HASH_BUF_SIZE); // Create the string to be hashed for (p = (int*)utarray_front(blocks); p != NULL && i < BLOCKS_USED_IN_HASH; p = (int*)utarray_next(blocks, p), i++) { - sprintf(current_hash, "%x", *p); + int written = sprintf(current_hash, "%x", *p); + if (strlen(hashed) + written >= HASH_BUF_SIZE - 1) break; strcat(hashed, current_hash); } @@ -251,7 +255,11 @@ int qbdipreload_on_exit(int status) { strcat(output_filename, OUTPUT_FOLDER); strcat(output_filename, encode_command_line(command_line, strlen(command_line))); output_file = fopen(output_filename, "w"); - fprintf(output_file, "%d %ld %d", utarray_len(blocks), hash(hashed), uses_canaries); + if (output_file) { + fprintf(output_file, "%d %lu %d", utarray_len(blocks), hash(hashed), uses_canaries); + fclose(output_file); + } + free(hashed); return QBDIPRELOAD_NO_ERROR; } \ No newline at end of file From cfde9822028d80d2e13ee405113df7dec24bd601 Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sun, 10 May 2026 01:13:53 +0300 Subject: [PATCH 08/15] fix(qbdi): fix results accessibility between Docker and Host Added chmod calls to ensure result directories and files created by the root user in Docker are readable by the host Python process. Signed-off-by: Andrei Carp --- .../arguments_fuzzing/qbdi_analysis.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py index 5aa7a85..6e0ed30 100644 --- a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py +++ b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py @@ -121,6 +121,14 @@ def __create_container(self) -> None: f"sudo chmod 555 {self.__configuration.CONTAINER_EXECUTABLE}" ) + # Ensure the results directory is writable by everyone + self.__container.exec_run( + f"mkdir -p {self.__configuration.CONTAINER_RESULTS_FOLDER}" + ) + self.__container.exec_run( + f"sudo chmod 777 {self.__configuration.CONTAINER_RESULTS_FOLDER}" + ) + self.__container.exec_run( "cmake .", workdir=self.__configuration.CONTAINER_SO_FOLDER, @@ -187,7 +195,12 @@ def __run_analysis( raw_result = self.__build_and_run_analyze_command( argument, timeout_retry ) - print(raw_result.output) # TODO: remove + + # Ensure the result file is readable by the host user + argument_identifier = argument.to_hex_id() + self.__container.exec_run( + f"chmod 666 {os.path.join(self.__configuration.CONTAINER_RESULTS_FOLDER, argument_identifier)}" + ) result_filename = self.__get_analysis_result_filename(argument) bbs_count, bbs_hash, uses_file = self.__parse_raw_output( From 3e6e3f1418d1b520289cfd20283d982436e22efe Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sun, 10 May 2026 01:18:29 +0300 Subject: [PATCH 09/15] fix(qbdi): force clean build for the C tracer Added a command to remove CMakeCache.txt before compilation. This ensures that changes to the tracer source or headers are correctly reflected in the compiled library. Signed-off-by: Andrei Carp --- .../arguments_fuzzing/qbdi_analysis.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py index 6e0ed30..46e257c 100644 --- a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py +++ b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py @@ -130,10 +130,14 @@ def __create_container(self) -> None: ) self.__container.exec_run( + "rm -f CMakeCache.txt libqbdi_tracer.so", + workdir=self.__configuration.CONTAINER_SO_FOLDER, + ) + cmake_result = self.__container.exec_run( "cmake .", workdir=self.__configuration.CONTAINER_SO_FOLDER, ) - self.__container.exec_run( + make_result = self.__container.exec_run( "make", workdir=self.__configuration.CONTAINER_SO_FOLDER, ) From 154ed4a3f77d93e3a2d339c26754e6a417ec0c8d Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sun, 10 May 2026 01:20:55 +0300 Subject: [PATCH 10/15] fix(fuzzer): prioritize simple flags in fuzzing sequence Modified the generator to test simple flags before complex combinations. This prevents valid flags from being ignored due to hash collisions with previously seen invalid combinations. Signed-off-by: Andrei Carp --- .../arguments_fuzzing/fuzzing_sequence_generator.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/attack_surface_approximation/arguments_fuzzing/fuzzing_sequence_generator.py b/attack_surface_approximation/arguments_fuzzing/fuzzing_sequence_generator.py index 5a5be19..90a1efd 100644 --- a/attack_surface_approximation/arguments_fuzzing/fuzzing_sequence_generator.py +++ b/attack_surface_approximation/arguments_fuzzing/fuzzing_sequence_generator.py @@ -78,14 +78,15 @@ def generate_fuzzing_arguments( ) -> ArgumentsGenerator: arg = FileArgument(self.canary_filename) yield arg - if ArgumentRole.FILE_ENABLER not in arg.get_roles_based_on_analysis( - self.last_analysis_result, bbs_hashes_baseline - ): - for argument in self.arguments: - yield ArgumentPlusFileArgument(argument, self.canary_filename) yield ArgumentArgument("-") for argument in self.arguments: yield ArgumentArgument(argument) yield ArgumentStringArgument(argument, self.canary_string) + + if ArgumentRole.FILE_ENABLER not in arg.get_roles_based_on_analysis( + self.last_analysis_result, bbs_hashes_baseline + ): + for argument in self.arguments: + yield ArgumentPlusFileArgument(argument, self.canary_filename) From 040dba52c7753bae12bc0fcdf316000c41c3e03b Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sun, 10 May 2026 01:44:03 +0300 Subject: [PATCH 11/15] fix(fuzzer): add null hash guards and fix return types Corrected the return type to bool and added safety checks for null instrumentation hashes in both the validation logic and history tracking. This prevents crashes and incorrect deduplication when Docker runs fail. Signed-off-by: Andrei Carp --- .../arguments_fuzzing/fuzzer.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/attack_surface_approximation/arguments_fuzzing/fuzzer.py b/attack_surface_approximation/arguments_fuzzing/fuzzer.py index e622fe0..8f9320d 100644 --- a/attack_surface_approximation/arguments_fuzzing/fuzzer.py +++ b/attack_surface_approximation/arguments_fuzzing/fuzzer.py @@ -56,11 +56,15 @@ def __generate_baseline_hashes(self) -> typing.Generator[str, None, None]: for argument in arguments: analysis_result = self.analysis.analyze(argument) - yield analysis_result.bbs_hash + if analysis_result.bbs_hash is not None: + yield analysis_result.bbs_hash def __check_if_argument_is_valid( self, argument: ArgumentsPair, result: QBDIAnalysis - ) -> None: + ) -> bool: + if result.bbs_hash is None: + return False + if ( argument.get_roles_based_on_analysis(result, self.baseline_hashes) and result.bbs_hash not in self.old_hashes # noqa: W503 @@ -91,7 +95,8 @@ def get_valid_argument( # generates a different hash than the baseline ones, it will be detected # as a false flag because of the sequence generation: --flag first, --flag # afterwards. - self.old_hashes.append(result.bbs_hash) + if result.bbs_hash is not None: + self.old_hashes.append(result.bbs_hash) self.arguments_generator.update_last_analysis_result(result) From 5e224de4f94dd7f4f8533fe623ca7673c7b214b8 Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sun, 10 May 2026 14:58:53 +0300 Subject: [PATCH 12/15] refactor(cli): implement output buffering for analyze command Decoupled business logic from presentation in cli.py by introducing run_detection and run_fuzzing helpers. Updated the analyze command to collect all results before rendering, ensuring instantaneous output and eliminating visual latency between static and dynamic analysis phases. Signed-off-by: Andrei Carp --- attack_surface_approximation/cli.py | 42 +++++++++++++++++------------ 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/attack_surface_approximation/cli.py b/attack_surface_approximation/cli.py index 84939cc..8cc45e7 100644 --- a/attack_surface_approximation/cli.py +++ b/attack_surface_approximation/cli.py @@ -67,6 +67,11 @@ def generate(heuristic: str, output: str, top: int, elf: str = None) -> None: ) +def run_detection(elf: str) -> typing.List[InputStreams]: + detector = InputStreamsDetector(elf) + return detector.detect_all() + + @cli.command( help="Statically detect what input streams are used by an executable." ) @@ -77,13 +82,11 @@ def generate(heuristic: str, output: str, top: int, elf: str = None) -> None: help="ELF Executable", ) def detect(elf: str) -> None: - detector = InputStreamsDetector(elf) - streams = detector.detect_all() - + streams = run_detection(elf) print_detected_streams(streams) -def print_detected_streams(streams: InputStreams) -> None: +def print_detected_streams(streams: typing.List[InputStreams]) -> None: if not any(streams): print_no_detected_stream() else: @@ -94,13 +97,22 @@ def print_no_detected_stream() -> None: print("No input mechanism was detected for the given program.") -def print_multiple_detected_streams(streams: dict) -> None: +def print_multiple_detected_streams(streams: typing.List[InputStreams]) -> None: print("Several input mechanisms were detected for the given program:\n") table = build_detected_streams_table(streams) print(table) +def run_fuzzing(elf: str, dictionary: str) -> typing.List[ArgumentsPair]: + generator = ArgumentsGenerator() + generator.load(dictionary) + possible_arguments = generator.get_arguments() + + fuzzer = ArgumentsFuzzer(elf, possible_arguments) + return fuzzer.get_all_valid_arguments() + + @cli.command(help="Fuzz the arguments of an executable.") @click.option( "--elf", @@ -115,13 +127,7 @@ def print_multiple_detected_streams(streams: dict) -> None: help="Arguments dictionary", ) def fuzz(elf: str, dictionary: str) -> None: - generator = ArgumentsGenerator() - generator.load(dictionary) - possible_arguments = generator.get_arguments() - - fuzzer = ArgumentsFuzzer(elf, possible_arguments) - actual_arguments = fuzzer.get_all_valid_arguments() - + actual_arguments = run_fuzzing(elf, dictionary) print_arguments(actual_arguments) @@ -161,7 +167,7 @@ def build_arguments_table(arguments: typing.List[ArgumentsPair]) -> Table: return table -def build_detected_streams_table(streams: dict) -> Table: +def build_detected_streams_table(streams: typing.List[InputStreams]) -> Table: table = Table() table.add_column("Stream") @@ -187,11 +193,13 @@ def build_detected_streams_table(streams: dict) -> Table: required=True, help="Arguments dictionary", ) -@click.pass_context -def analyze(ctx: click.Context, elf: str, dictionary: str) -> None: - ctx.invoke(detect, elf=elf) +def analyze(elf: str, dictionary: str) -> None: + streams = run_detection(elf) + actual_arguments = run_fuzzing(elf, dictionary) + + print_detected_streams(streams) print("") - ctx.invoke(fuzz, elf=elf, dictionary=dictionary) + print_arguments(actual_arguments) def main() -> None: From b6f0e302eb4d701b9d1af112e00819934c25085e Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Sun, 10 May 2026 15:11:45 +0300 Subject: [PATCH 13/15] build(deps): stabilize versions and patch security vulnerabilities Pinned all dependencies in pyproject.toml to exact versions to ensure environment reproducibility. Updated the black package to a secure version to resolve two critical security vulnerabilities: arbitrary file write via unsanitized cache filenames and Regular Expression Denial of Service (ReDoS). Fixes: #1, Fixes: #2 Signed-off-by: Andrei Carp --- pyproject.toml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9ed3c52..5f029b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,19 +6,19 @@ version = "0.1.0" [tool.poetry.dependencies] commons = { path = "../commons", develop = false } -python = "^3.10" -pycparser = "^2.21" -pyelftools = "^0.29" +python = "==3.12.7" +pycparser = "==2.23" +pyelftools = "==0.29" docker = "==7.1.0" -rich = "^12.5.1" -click = "^8.1.3" +rich = "==12.6.0" +click = "==8.3.3" [tool.poetry.dev-dependencies] -black = "^24.3.0" -isort = "^5.10.1" -pylint = "^2.14.4" -pyproject-flake8 = "^0.0.1-alpha.5" -flake8-annotations = "^2.9.1" +black = "==26.3.1" +isort = "==5.13.2" +pylint = "==2.17.7" +pyproject-flake8 = "==0.0.1a5" +flake8-annotations = "==2.9.1" [tool.poetry.scripts] attack_surface_approximation = "attack_surface_approximation.cli:main" From d65a46904bf038b93127d43598d0767f3ee8cda6 Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Mon, 15 Jun 2026 13:42:44 +0300 Subject: [PATCH 14/15] docs(readme): Add setup steps, binary compatibility requirements and known limitations Signed-off-by: Andrei Carp --- README.md | 227 +++++++++++++++++++++--------------------------------- 1 file changed, 88 insertions(+), 139 deletions(-) diff --git a/README.md b/README.md index 8d22928..791f8cd 100644 --- a/README.md +++ b/README.md @@ -8,13 +8,13 @@ - [Setup](#setup) - [Usage](#usage) - [As a CLI Tool](#as-a-cli-tool) - - [Generate Dictionary for Arguments](#generate-dictionary-for-arguments) - - [Input Streams Detection](#detect-input-streams) - - [Arguments Fuzzing](#fuzz-arguments) - - [Get Help](#get-help) + - [Arguments Dictionary Generation](#arguments-dictionary-generation) + - [Input Streams Detection](#input-streams-detection) + - [Arguments Fuzzing](#arguments-fuzzing) + - [Help](#help) - [As a Python Module](#as-a-python-module) - - [Input Streams Detection](#detect-input-streams-1) - - [Arguments Fuzzing](#fuzz-arguments-1) + - [Input Streams Detection](#input-streams-detection-1) + - [Arguments Fuzzing](#arguments-fuzzing-1) --- @@ -23,179 +23,128 @@ `attack_surface_approximation` is the CRS module that deals with the approximation of the attack surface in a vulnerable program. Some input mechanisms are omitted: elements of the user interface, signals, devices and interrupts. At the moment, the supported mechanisms are the following: +- Files; +- Arguments; +- Standard input; +- Networking; and +- Environment variables. -- files -- command-line arguments -- standard input -- networking -- environment variables - -In addition, a custom fuzzer is implemented to discover arguments that trigger different code coverage. -It takes arguments from a dictionary which can be handcrafted or generated with an exposed command, with an implemented heuristic. +In addition, a custom fuzzer is implemented to discover arguments that trigger different code coverage. It takes arguments from a dictionary which can be handcrafted or generated with an exposed command, with an implemented heuristic. Examples of arguments dictionaries can be found in `examples/dictionaries`: - -- `man.txt`: generated with the `man_parsing` heuristic and having 6605 entries -- `generation.txt`: generated with the `generation` heuristic and having 62 entries +- `man.txt`, generated with the `man_parsing` heuristic and having 6605 entries; and +- `common.txt`, generated with the `generation` heuristic and having 62 entries. ### Limitations - ELF format -- x86 architecture -- dynamic binaries (static binaries are not supported) -- symbols present (namely, no stripping is involved) -- no obfuscation technique involved +- x86 architecture (32-bit) +- Non-static binaries +- Symbols present (namely, no stripping is involved); binaries compiled without debug symbols (`-g`) may cause Ghidra to fail resolving function calls, leading to incomplete detection results +- No obfuscation technique involved +- **Binary compatibility for fuzzing**: the argument fuzzer runs inside a Docker container based on Ubuntu 18.04 (GLIBC 2.27). Binaries compiled on modern systems that require a newer GLIBC version will fail to execute inside the container. To work around this, compile the target binary inside the QBDI Docker container itself before fuzzing. +- **Incomplete argument detection**: flags that trigger identical QBDI basic block paths (e.g., multiple simple flags that all resolve to a `break` in a switch statement) will share the same hash. Only the first occurrence is reported; subsequent flags with the same hash are suppressed by the deduplication mechanism. +- **False positive filtering relies on `getopt` stderr reporting**: the module filters out invalid options by checking whether the binary writes to stderr when run with that argument — standard `getopt` behavior. Programs that use custom option parsers and suppress error output may still produce false positives. ## How It Works -The module works by automating [Ghidra](https://ghidra-sre.org/) for static binary analysis. -It extracts information and applies heuristics to determine if a given input stream is present. +The module works by automating Ghidra for static binary analysis. It extracts information and applies heuristics to determine if a given input stream is present. Examples of such heuristics are: +- For standard input, calls to `getc()` and `gets()` +- For networking, calls to `recv()` and `recvfrom()` +- For arguments, occurrences of `argc` and `argv` in the `main()`'s decompilation. -- for standard input: calls to `getc()` and `gets()` -- for networking: calls to `recv()` and `recvfrom()` -- for command-line arguments: occurrences of `argc` and `argv` in `main()` - -The argument fuzzer uses [Docker](https://www.docker.com/) for running and [QBDI](https://qbdi.quarkslab.com/) to detect basic-block coverage. +The argument fuzzer uses Docker and QBDI to detect basic block coverage. ## Setup -1. Make sure you have set up the repositories and Python environment according to the [top-level instructions](https://github.com/open-crs#requirements). - That is: - - - Docker is installed and is properly running. - Check using: - - ```console - docker version - docker ps -a - docker run --rm hello-world - ``` - - These commands should run without errors. - - - The current module repository and all other module repositories (particularly the [`dataset` repository](https://github.com/open-crs/dataset) and the [`commons` repository](https://github.com/open-crs/commons)) are cloned in the same directory. - - - You are running all commands inside a Python virtual environment. - There should be `(.venv)` prefix to your prompt. - - - You have installed Poetry in the virtual environment. - If you run: - - ```console - which poetry - ``` - - you should get a path ending with `.venv/bin/poetry`. - -1. Disable the Python Keyring: - - ```console - export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring +1. Ensure you have Docker installed. +2. Install the required Python 3 packages via `poetry install`. +3. Build the QBDI Docker image: ``` - - This is a problem that may occur in certain situations, preventing Poetry from getting packages. - -1. Install the required packages with Poetry (based on `pyprojects.toml`): - - ```console - poetry install --only main + cd commons/commons/qbdi/docker + docker build -t opencrs/qbdi . ``` - -1. Create the `ghidra` and `qbdi_args_fuzzing` Docker images by using the [instructions in the `commons` repository](https://github.com/open-crs/commons?tab=readme-ov-file#setup). - -1. Optionally, generate executables by using the [instructions in the `dataset` repository](https://github.com/open-crs/dataset). +4. Ensure the Docker API is accessible by: + - Running the module as `root`; or + - Changing the Docker socket permissions (unsecure approach) via `chmod 777 /var/run/docker.sock`. ## Usage -You can use the `attack_surface_approximation` module either standalone, as a CLI tool, or integrated into Python applications, as a Python module. - ### As a CLI Tool -As a CLI tool, you can either use the `cli.py` module: - -```console -python attack_surface_approximation/cli.py -``` - -or the Poetry interface: +#### Arguments Dictionary Generation -```console -poetry run attack_surface_approximation ``` - -#### Generate Dictionary for Arguments - -```console -$ poetry run attack_surface_approximation generate --heuristic man_parsing --output args.txt --top 100 +➜ poetry run attack_surface_approximation generate --heuristic man --output args.txt --top 10 Successfully generated dictionary with 10 arguments - -$ head args.txt ---allow-unrelated-histories ---analysis-display-unstable-clusters ---auto-area-segmentation ---backup-dir ---callstack-filter ---cidfile ---class ---codename ---column ---contained +➜ cat args.txt +--and +--get +--get-feedbacks +--no-progress-meter +--print-name +-input +-lmydep2 +-miniswhite +-nM +-prune ``` -#### Detect Input Streams +#### Input Streams Detection -Use an ELF i386 (32 bit) executable as target for detecting input streams. - -For example, you can use one of the executables generated in the [`dataset` repository](https://github.com/open-crs/dataset): +``` +➜ ./crackme +Enter the password: pass +Wrong password! +➜ poetry run attack_surface_approximation detect --elf crackme +Several input mechanisms were detected for the given program: -```console -$ ../dataset/executables/toy_test_suite_1.elf -Gimme two lines of input: -aaa -bbb +┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓ +┃ Stream ┃ Present ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩ +│ files │ No │ +│ arguments │ No │ +│ stdin │ Yes │ +│ networking │ No │ +│ environment_variables │ No │ +└───────────────────────┴─────────┘ ``` -Now, do the attack surface approximation: +#### Arguments Fuzzing -```console -$ poetry run attack_surface_approximation detect --elf $(pwd)/../dataset/executables/toy_test_suite_1.elf -Several input mechanisms were detected for the given program: +The target binary must be a 32-bit ELF dynamically linked against GLIBC 2.27 or earlier. If your binary was compiled on a modern system, compile it inside the QBDI container first: -┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓ -┃ Stream ┃ Present ┃ -┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩ -│ STDIN │ Yes │ -│ ARGUMENTS │ Yes │ -│ FILES │ Yes │ -│ ENVIRONMENT_VARIABLE │ Yes │ -│ NETWORKING │ Yes │ -└──────────────────────┴─────────┘ +``` +➜ docker run --rm --user root \ + -v $(pwd)/examples:/examples \ + opencrs/qbdi \ + bash -c "gcc -m32 /examples/target.c -o /examples/target" ``` -The executable used uses all potential input streams. - -#### Fuzz Arguments +Then run the fuzzer: -```console -$ poetry run attack_surface_approximation fuzz --elf $(pwd)/../dataset/executables/toy_test_suite_1.elf --dictionary args.txt +``` +➜ poetry run attack_surface_approximation fuzz --elf examples/target --dictionary args.txt Several arguments were detected for the given program: -┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓ -┃ Argument ┃ Role ┃ -┡━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩ -│ - │ FLAG │ -│ --re │ FLAG │ -│ --re string │ STRING_ENABLER │ -│ -mmusl │ FLAG │ -└─────────────┴────────────────┘ +┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓ +┃ Argument ┃ Role ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩ +│ -d │ FLAG │ +│ -f string │ STRING_ENABLER │ +│ -r │ FLAG │ +│ -s │ FLAG │ +│ -v │ FLAG │ +│ -f /tmp/canary.opencrs │ FILE_ENABLER │ +└────────────────────────┴────────────────┘ ``` -#### Get Help +#### Help -```console -$ poetry run attack_surface_approximation +``` +➜ poetry run attack_surface_approximation Usage: attack_surface_approximation [OPTIONS] COMMAND [ARGS]... Discovers the attack surface of vulnerable programs. @@ -212,7 +161,7 @@ Commands: ### As a Python Module -#### Detect Input Streams +#### Input Streams Detection ```python from attack_surface_approximation.static_input_streams_detection import \ @@ -222,7 +171,7 @@ detector = InputStreamsDetector(elf_filename) streams_list = detector.detect_all() ``` -#### Fuzz Arguments +#### Arguments Fuzzing ```python from attack_surface_approximation.arguments_fuzzing import ArgumentsFuzzer From 3b48a7d9f73c3dbffcbca4c95f59bacb6606f3af Mon Sep 17 00:00:00 2001 From: Andrei Carp Date: Mon, 15 Jun 2026 13:43:26 +0300 Subject: [PATCH 15/15] fix(fuzzer): Filter false positive arguments by checking binary stderr output Run each FLAG and STRING_ENABLER candidate without QBDI and check stderr. Arguments that produce stderr output are invalid getopt options and are removed from results. Signed-off-by: Andrei Carp --- .../arguments_fuzzing/fuzzer.py | 9 ++++++++- .../arguments_fuzzing/qbdi_analysis.py | 10 ++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/attack_surface_approximation/arguments_fuzzing/fuzzer.py b/attack_surface_approximation/arguments_fuzzing/fuzzer.py index 8f9320d..2bdcf2f 100644 --- a/attack_surface_approximation/arguments_fuzzing/fuzzer.py +++ b/attack_surface_approximation/arguments_fuzzing/fuzzer.py @@ -3,6 +3,7 @@ from attack_surface_approximation.arguments_fuzzing.arguments_types import ( ArgumentsPair, ) +from commons.arguments import ArgumentRole from attack_surface_approximation.arguments_fuzzing.fuzzing_sequence_generator import ( FuzzingSequenceGenerator, ) @@ -100,5 +101,11 @@ def get_valid_argument( self.arguments_generator.update_last_analysis_result(result) + def __is_false_positive(self, argument: ArgumentsPair) -> bool: + if ArgumentRole.FLAG not in argument.valid_roles and ArgumentRole.STRING_ENABLER not in argument.valid_roles: + return False + return self.analysis.produces_stderr(argument) + def get_all_valid_arguments(self) -> typing.List[ArgumentsPair]: - return list(self.get_valid_argument()) + candidates = list(self.get_valid_argument()) + return [a for a in candidates if not self.__is_false_positive(a)] diff --git a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py index 46e257c..cd19bb5 100644 --- a/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py +++ b/attack_surface_approximation/arguments_fuzzing/qbdi_analysis.py @@ -142,6 +142,16 @@ def __create_container(self) -> None: workdir=self.__configuration.CONTAINER_SO_FOLDER, ) + def produces_stderr(self, argument: ArgumentsPair) -> bool: + stringified_arguments = argument.to_str() + result = self.__container.exec_run( + f"timeout {self.timeout} {self.__configuration.CONTAINER_EXECUTABLE} {stringified_arguments}", + workdir="/home/docker", + demux=True, + ) + _, stderr = result.output + return bool(stderr) + def create_temp_file_inside_container(self) -> str: self.__container.exec_run( f"touch {self.__configuration.CONTAINER_TEMP_FILE}"