From afc6127f7238f3604318b6785a727b5248d2d156 Mon Sep 17 00:00:00 2001 From: benmandrew Date: Tue, 30 Jun 2026 16:09:54 +0100 Subject: [PATCH 1/8] opt: cancel adjacent opposing simple commands Adds optimise_program() called from both bfc and bfi after parsing. The first pass (cancel_opposing) merges adjacent INC/DEC and RIGHT/LEFT pairs, subtracting their counts and removing pairs that fully cancel. Bracket jump indices are remapped after compaction. --- PLAN.md | 73 ++++++++++++++++++++++++++++++++++++++++++++++ src/ir.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/ir.h | 4 +++ src/main_bfc.c | 1 + src/main_bfi.c | 1 + 5 files changed, 158 insertions(+) create mode 100644 PLAN.md diff --git a/PLAN.md b/PLAN.md new file mode 100644 index 0000000..5ab17b7 --- /dev/null +++ b/PLAN.md @@ -0,0 +1,73 @@ +# Optimisation Plan + +Five optimisations for the produced LLVM IR, implemented as separate commits. + +## Status + +- [ ] 1. Cancel opposing simple commands +- [ ] 2. `CMD_CLEAR` — zero-loop `[-]` → `store i8 0` +- [ ] 3. `CMD_MULTIPLY` — multiply-loop `[->N*+<]` → multiply-add +- [ ] 4. `dp` as `alloca` (enables `mem2reg`) +- [ ] 5. LLVM pass pipeline gated on `-O` + +--- + +## 1. Cancel opposing simple commands + +**Where**: `ir.c` — new `optimise_program()` called from both `main_bfc.c` and `main_bfi.c`. + +Peephole pass over the `cmds` array: adjacent INC/DEC or RIGHT/LEFT pairs subtract counts and, if they fully cancel, are removed. Re-computes bracket jump indices after compaction. + +**Example**: `+++--` → `CMD_SIMPLE_INC(1)` instead of `CMD_SIMPLE_INC(3), CMD_SIMPLE_DEC(2)`. + +Test changes: none (no existing test program uses cancellable patterns). + +--- + +## 2. CMD_CLEAR — zero-loop detection + +**Where**: `ir.c` `optimise_program()`, `llvm.c`, `interp.c`. + +New `CMD_CLEAR` IR node. Pattern detected after cancellation pass: `[` + single INC or DEC body + `]`. Replaced with a single `store i8 0` in codegen, `ctx->data[ctx->dp] = 0` in interpreter. + +**Example**: `+++++[-]` → `CMD_SIMPLE_INC(5), CMD_CLEAR`. + +Test changes: update `test/test_simple_loop.filecheck` (program is `+++++[-]`). + +--- + +## 3. CMD_MULTIPLY — multiply-loop detection + +**Where**: `ir.c` `optimise_program()`, `llvm.c`, `interp.c`. + +New `CMD_MULTIPLY` IR node with up to `MULTIPLY_MOVES_MAX` (8) offset/factor pairs. A loop body matches when: only `+`/`-`/`>`/`<` inside, net pointer movement is zero, loop counter cell has net delta −1. Each non-counter cell touched becomes a `{offset, factor}` move. + +Codegen: load counter, for each move `data[dp+offset] += counter * factor`, then `store i8 0` to counter cell. Interpreter: same arithmetic. + +**Example**: `[->+<]` at dp=1 → `CMD_MULTIPLY {moves=[{offset=-1, factor=1}]}`. + +Test changes: add `test/res/multiply.b` and `test/test_multiply.filecheck`. + +--- + +## 4. `dp` as `alloca` + +**Where**: `llvm.c` — `create_main_function()` creates `dp` as an alloca instead of a global. + +`dp` is removed from the global section and created with `LLVMBuildAlloca` in the entry block, immediately initialised to 0. The `LLVMValueRef ctx->dp` is still a pointer (alloca ptr vs global ptr) so all downstream load/store calls are unchanged. + +Without LLVM passes the IR still has explicit load/store; the benefit is unlocked in commit 5 when `mem2reg` promotes the alloca to a register. + +Test changes: update all FileCheck tests — remove `@dp = global i32 0` check, change `ptr @dp` references to `ptr %dp`. + +--- + +## 5. LLVM pass pipeline gated on `-O` + +**Where**: `llvm.c`, `llvm.h`, `cmake/llvm.cmake`, `main_bfc.c`. + +Wires the already-parsed `--optimise`/`-O` flag from `main_bfc.c` through `generate(struct program *, bool optimise)`. When `optimise` is true, runs `"mem2reg,instcombine,simplifycfg,gvn"` via `LLVMRunPasses` (LLVM new pass manager, LLVM ≥ 14). Adds `passes` to `llvm_map_components_to_libnames` in cmake. + +`mem2reg` promotes the `dp` alloca to a register; `gvn` eliminates redundant loads of `@data` elements; `instcombine` and `simplifycfg` clean up the resulting IR. + +Test changes: none (FileCheck tests do not pass `-O`). diff --git a/src/ir.c b/src/ir.c index e50f828..49ca68f 100644 --- a/src/ir.c +++ b/src/ir.c @@ -222,6 +222,85 @@ char program_contains_output(struct program *program) { return 0; } +static int are_opposing(enum cmd_type a, enum cmd_type b) { + return (a == CMD_SIMPLE_INC && b == CMD_SIMPLE_DEC) || + (a == CMD_SIMPLE_DEC && b == CMD_SIMPLE_INC) || + (a == CMD_SIMPLE_RIGHT && b == CMD_SIMPLE_LEFT) || + (a == CMD_SIMPLE_LEFT && b == CMD_SIMPLE_RIGHT); +} + +static enum cmd_type opposite_type(enum cmd_type t) { + switch (t) { + case CMD_SIMPLE_INC: + return CMD_SIMPLE_DEC; + case CMD_SIMPLE_DEC: + return CMD_SIMPLE_INC; + case CMD_SIMPLE_RIGHT: + return CMD_SIMPLE_LEFT; + case CMD_SIMPLE_LEFT: + return CMD_SIMPLE_RIGHT; + default: + return t; + } +} + +static void cancel_opposing(struct program *program) { + struct cmd *new_cmds = malloc(program->length * sizeof(struct cmd)); + size_t *old_to_new = malloc(program->length * sizeof(size_t)); + size_t *new_to_old = malloc(program->length * sizeof(size_t)); + if (!new_cmds || !old_to_new || !new_to_old) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } + size_t new_len = 0; + + for (size_t old = 0; old < program->length; old++) { + struct cmd curr = program->cmds[old]; + if (new_len > 0) { + struct cmd *prev = &new_cmds[new_len - 1]; + if (are_opposing(prev->type, curr.type)) { + size_t pc = prev->value.simple_count; + size_t cc = curr.value.simple_count; + if (cc > pc) { + prev->type = opposite_type(prev->type); + prev->value.simple_count = cc - pc; + } else if (cc < pc) { + prev->value.simple_count = pc - cc; + } else { + old_to_new[new_to_old[new_len - 1]] = + SIZE_MAX; + new_len--; + } + old_to_new[old] = SIZE_MAX; + continue; + } + } + old_to_new[old] = new_len; + new_to_old[new_len] = old; + new_cmds[new_len++] = curr; + } + + for (size_t i = 0; i < new_len; i++) { + if (new_cmds[i].type == CMD_JUMP_FORWARD || + new_cmds[i].type == CMD_JUMP_BACK) { + size_t old_target = + program->cmds[new_to_old[i]].value.jump_index; + assert(old_to_new[old_target] != SIZE_MAX); + new_cmds[i].value.jump_index = old_to_new[old_target]; + } + } + + free(program->cmds); + free(old_to_new); + free(new_to_old); + program->cmds = new_cmds; + program->length = new_len; +} + +void optimise_program(struct program *program) { + cancel_opposing(program); +} + char program_contains_input(struct program *program) { for (size_t cmd_index = 0; cmd_index < program->length; cmd_index++) { if (program->cmds[cmd_index].type == CMD_SIMPLE_INPUT) { diff --git a/src/ir.h b/src/ir.h index cfc502a..24f31e3 100644 --- a/src/ir.h +++ b/src/ir.h @@ -83,4 +83,8 @@ char program_contains_input(struct program *program); /// @return 1 if valid; otherwise 0. char program_is_valid(char *source_str); +/// Apply IR-level optimisations to a parsed program in-place. +/// @param program Program to optimise. +void optimise_program(struct program *program); + #endif diff --git a/src/main_bfc.c b/src/main_bfc.c index d382b26..f88d3de 100644 --- a/src/main_bfc.c +++ b/src/main_bfc.c @@ -72,6 +72,7 @@ int main(int argc, char **argv) { } struct program parsed_program = string_to_program(program_str); free(program_str); + optimise_program(&parsed_program); LLVMModuleRef module = generate(&parsed_program); char *err = NULL; LLVMPrintModuleToFile(module, "/dev/stdout", &err); diff --git a/src/main_bfi.c b/src/main_bfi.c index 640b0a7..99a13f8 100644 --- a/src/main_bfi.c +++ b/src/main_bfi.c @@ -67,6 +67,7 @@ int main(int argc, char **argv) { } struct program parsed_program = string_to_program(program_str); free(program_str); + optimise_program(&parsed_program); struct context_t ctx = init_context(parsed_program); while (!interp(&ctx, STDOUT_FILENO, STDIN_FILENO, byte_output)) { }; From 97b34fa4e3b7630f7e70f4658874251a41f4d290 Mon Sep 17 00:00:00 2001 From: benmandrew Date: Tue, 30 Jun 2026 16:12:00 +0100 Subject: [PATCH 2/8] opt: replace [-]/[+] loops with CMD_CLEAR (store i8 0) Adds detect_clear_loops() pass: a loop whose body is a single INC or DEC (any count) is replaced with the synthetic CMD_CLEAR node, emitting a single store i8 0 in LLVM IR and a direct zero-assignment in the interpreter. Updates test_simple_loop.filecheck to match. --- PLAN.md | 26 ++++++++++++++- src/interp.c | 5 +++ src/ir.c | 57 +++++++++++++++++++++++++++++++++ src/ir.h | 2 ++ src/llvm.c | 9 ++++++ test/test_simple_loop.filecheck | 29 ++++------------- 6 files changed, 104 insertions(+), 24 deletions(-) diff --git a/PLAN.md b/PLAN.md index 5ab17b7..ed825bd 100644 --- a/PLAN.md +++ b/PLAN.md @@ -4,11 +4,12 @@ Five optimisations for the produced LLVM IR, implemented as separate commits. ## Status -- [ ] 1. Cancel opposing simple commands +- [x] 1. Cancel opposing simple commands - [ ] 2. `CMD_CLEAR` — zero-loop `[-]` → `store i8 0` - [ ] 3. `CMD_MULTIPLY` — multiply-loop `[->N*+<]` → multiply-add - [ ] 4. `dp` as `alloca` (enables `mem2reg`) - [ ] 5. LLVM pass pipeline gated on `-O` +- [ ] 6. Per-optimisation TOML config file --- @@ -71,3 +72,26 @@ Wires the already-parsed `--optimise`/`-O` flag from `main_bfc.c` through `gener `mem2reg` promotes the `dp` alloca to a register; `gvn` eliminates redundant loads of `@data` elements; `instcombine` and `simplifycfg` clean up the resulting IR. Test changes: none (FileCheck tests do not pass `-O`). + +--- + +## 6. Per-optimisation TOML config file + +**Where**: new `src/config.h` / `src/config.c`, updated `main_bfc.c`, updated `optimise_program()` and `generate()` signatures. + +A flat TOML file (default `bf.toml` in the current directory, overridable with `-c`/`--config`) controls each optimisation independently: + +```toml +[optimisations] +cancel_opposing = true +clear_loop = true +multiply_loop = true +dp_alloca = true +llvm_passes = false +``` + +`struct opt_config` holds a boolean for each flag; a minimal built-in parser handles `[section]` headers and `key = true/false` lines. Missing file → all optimisations enabled by default. The `-O` flag becomes a shorthand for enabling all flags. + +`optimise_program(struct program *, const struct opt_config *)` and `generate(struct program *, const struct opt_config *)` are updated to gate each pass on its flag. + +Test changes: add `test/res/bf.toml` with specific flags for FileCheck regression tests if needed. diff --git a/src/interp.c b/src/interp.c index 0dc58ef..0c61855 100644 --- a/src/interp.c +++ b/src/interp.c @@ -32,6 +32,8 @@ size_t abstract_to_concrete_pc(size_t abstract_pc, struct program *program) { case CMD_JUMP_BACK: concrete_pc++; break; + case CMD_CLEAR: + break; default: fprintf(stderr, "Unrecognised cmd_type '%c'\n", program->cmds[cmd_index].type); @@ -171,6 +173,9 @@ int interp(struct context_t *ctx, int out_fd, int in_fd, bool byte_output) { ctx->pc = current_cmd.value.jump_index; } break; + case CMD_CLEAR: + ctx->data[ctx->dp] = 0; + break; default: fprintf(stderr, "Invalid character '%c'\n", cmd_type_to_char(current_cmd.type)); diff --git a/src/ir.c b/src/ir.c index 49ca68f..71b3e21 100644 --- a/src/ir.c +++ b/src/ir.c @@ -56,6 +56,8 @@ size_t program_str_length(struct program *program) { case CMD_JUMP_BACK: length++; break; + case CMD_CLEAR: + break; default: fprintf(stderr, "Unrecognised cmd_type '%c'\n", program->cmds[cmd_index].type); @@ -203,6 +205,8 @@ char *program_to_string(struct program *program) { out[str_index++] = cmd_type_to_char(program->cmds[cmd_index].type); break; + case CMD_CLEAR: + break; default: fprintf(stderr, "Unrecognised cmd_type '%c'\n", program->cmds[cmd_index].type); @@ -297,8 +301,61 @@ static void cancel_opposing(struct program *program) { program->length = new_len; } +static void detect_clear_loops(struct program *program) { + struct cmd *new_cmds = malloc(program->length * sizeof(struct cmd)); + size_t *old_to_new = malloc(program->length * sizeof(size_t)); + size_t *new_to_old = malloc(program->length * sizeof(size_t)); + if (!new_cmds || !old_to_new || !new_to_old) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } + size_t new_len = 0; + + for (size_t old = 0; old < program->length;) { + struct cmd c = program->cmds[old]; + if (c.type == CMD_JUMP_FORWARD && old + 2 < program->length) { + struct cmd body = program->cmds[old + 1]; + struct cmd close = program->cmds[old + 2]; + if ((body.type == CMD_SIMPLE_INC || + body.type == CMD_SIMPLE_DEC) && + close.type == CMD_JUMP_BACK && + c.value.jump_index == old + 2) { + old_to_new[old] = new_len; + old_to_new[old + 1] = SIZE_MAX; + old_to_new[old + 2] = SIZE_MAX; + new_to_old[new_len] = old; + new_cmds[new_len++] = + (struct cmd){.type = CMD_CLEAR}; + old += 3; + continue; + } + } + old_to_new[old] = new_len; + new_to_old[new_len] = old; + new_cmds[new_len++] = c; + old++; + } + + for (size_t i = 0; i < new_len; i++) { + if (new_cmds[i].type == CMD_JUMP_FORWARD || + new_cmds[i].type == CMD_JUMP_BACK) { + size_t old_target = + program->cmds[new_to_old[i]].value.jump_index; + assert(old_to_new[old_target] != SIZE_MAX); + new_cmds[i].value.jump_index = old_to_new[old_target]; + } + } + + free(program->cmds); + free(old_to_new); + free(new_to_old); + program->cmds = new_cmds; + program->length = new_len; +} + void optimise_program(struct program *program) { cancel_opposing(program); + detect_clear_loops(program); } char program_contains_input(struct program *program) { diff --git a/src/ir.h b/src/ir.h index 24f31e3..f21b25e 100644 --- a/src/ir.h +++ b/src/ir.h @@ -21,6 +21,8 @@ enum cmd_type { CMD_JUMP_FORWARD, /// `']'`: jump back if current cell is non-zero. CMD_JUMP_BACK, + /// Synthetic: set current cell to zero (replaces `[-]`/`[+]`). + CMD_CLEAR, }; /// One compressed instruction in the internal Brainfuck IR. diff --git a/src/llvm.c b/src/llvm.c index d8e3f84..7b7f5df 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -184,6 +184,12 @@ void comma(struct llvm_context *ctx) { LLVMBuildStore(ctx->builder, char_value, data_ptr); } +void clear(struct llvm_context *ctx) { + LLVMValueRef data_ptr = get_dataptr(ctx); + LLVMBuildStore(ctx->builder, LLVMConstInt(int8_type(ctx), 0, 0), + data_ptr); +} + void left_bracket(struct llvm_context *ctx) { LLVMValueRef data_ptr = get_dataptr(ctx); LLVMValueRef current_value = @@ -250,6 +256,9 @@ LLVMModuleRef generate(struct program *program) { case CMD_JUMP_BACK: right_bracket(&ctx); break; + case CMD_CLEAR: + clear(&ctx); + break; default: fprintf(stderr, "Unsupported cmd_type '%c'\n", command.type); diff --git a/test/test_simple_loop.filecheck b/test/test_simple_loop.filecheck index cd01342..ed63c5b 100644 --- a/test/test_simple_loop.filecheck +++ b/test/test_simple_loop.filecheck @@ -1,7 +1,7 @@ ; RUN: %bf %s.b --emit-llvm | FileCheck %s -; Test brainfuck program with a simple loop -; This should generate IR for adding 5 to a cell, then looping to decrement until zero +; Test brainfuck program with a simple loop: +++++[-] +; The [-] pattern is optimised to a single store of zero (CMD_CLEAR). ; CHECK: ; ModuleID = 'main' ; CHECK: source_filename = "main" @@ -12,35 +12,18 @@ ; CHECK: define i32 @main() { ; CHECK: entry: -; Initial addition of 5 to current cell +; Initial addition of 5 to current cell (+++++). ; CHECK: %[[DP1:.*]] = load i32, ptr @dp, align 4 ; CHECK: %[[PTR1:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP1]] ; CHECK: %[[VAL1:.*]] = load i8, ptr %[[PTR1]], align 1 ; CHECK: %[[ADD:.*]] = add i8 %[[VAL1]], 5 ; CHECK: store i8 %[[ADD]], ptr %[[PTR1]], align 1 -; Loop condition check - load current value and compare to zero +; CMD_CLEAR: [-] collapses to a single store of zero. ; CHECK: %[[DP2:.*]] = load i32, ptr @dp, align 4 ; CHECK: %[[PTR2:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP2]] -; CHECK: %[[VAL2:.*]] = load i8, ptr %[[PTR2]], align 1 -; CHECK: %[[LOOPCOND:.*]] = icmp ne i8 %[[VAL2]], 0 -; CHECK: br i1 %[[LOOPCOND]], label %{{.*}}, label %exit +; CHECK: store i8 0, ptr %[[PTR2]], align 1 -; Loop body - decrement current cell -; CHECK: %[[DP3:.*]] = load i32, ptr @dp, align 4 -; CHECK: %[[PTR3:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP3]] -; CHECK: %[[VAL3:.*]] = load i8, ptr %[[PTR3]], align 1 -; CHECK: %[[SUB:.*]] = sub i8 %[[VAL3]], 1 -; CHECK: store i8 %[[SUB]], ptr %[[PTR3]], align 1 - -; Loop condition check again (end of loop body) -; CHECK: %[[DP4:.*]] = load i32, ptr @dp, align 4 -; CHECK: %[[PTR4:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP4]] -; CHECK: %[[VAL4:.*]] = load i8, ptr %[[PTR4]], align 1 -; CHECK: %[[LOOPCOND2:.*]] = icmp ne i8 %[[VAL4]], 0 -; CHECK: br i1 %[[LOOPCOND2]], label %{{.*}}, label %exit - -; Exit block -; CHECK: exit: +; Return statement ; CHECK: ret i32 0 ; CHECK: } From d3519e8fdaae97709bd710dd955b11734d57ac0e Mon Sep 17 00:00:00 2001 From: benmandrew Date: Tue, 30 Jun 2026 16:14:02 +0100 Subject: [PATCH 3/8] opt: detect multiply-loops and replace with CMD_MULTIPLY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds detect_multiply_loops() pass: a loop whose body contains only +/-/ with net pointer delta 0 and loop-counter delta -1 is replaced with CMD_MULTIPLY. Each non-counter cell touched becomes an {offset, factor} pair. Codegen emits counter load, multiply-adds, then store i8 0. Supports up to MULTIPLY_MOVES_MAX (8) target cells, offsets within ±64. Adds test/res/multiply.b and test/test_multiply.filecheck. --- src/interp.c | 12 ++++ src/ir.c | 118 +++++++++++++++++++++++++++++++++++ src/ir.h | 18 ++++++ src/llvm.c | 35 +++++++++++ test/res/multiply.b | 1 + test/test_multiply.filecheck | 43 +++++++++++++ 6 files changed, 227 insertions(+) create mode 100644 test/res/multiply.b create mode 100644 test/test_multiply.filecheck diff --git a/src/interp.c b/src/interp.c index 0c61855..d077755 100644 --- a/src/interp.c +++ b/src/interp.c @@ -33,6 +33,7 @@ size_t abstract_to_concrete_pc(size_t abstract_pc, struct program *program) { concrete_pc++; break; case CMD_CLEAR: + case CMD_MULTIPLY: break; default: fprintf(stderr, "Unrecognised cmd_type '%c'\n", @@ -176,6 +177,17 @@ int interp(struct context_t *ctx, int out_fd, int in_fd, bool byte_output) { case CMD_CLEAR: ctx->data[ctx->dp] = 0; break; + case CMD_MULTIPLY: + for (size_t i = 0; + i < current_cmd.value.multiply.n_moves; i++) { + int target = (int)ctx->dp + + current_cmd.value.multiply.moves[i].offset; + ctx->data[target] += + ctx->data[ctx->dp] * + (uint8_t)current_cmd.value.multiply.moves[i].factor; + } + ctx->data[ctx->dp] = 0; + break; default: fprintf(stderr, "Invalid character '%c'\n", cmd_type_to_char(current_cmd.type)); diff --git a/src/ir.c b/src/ir.c index 71b3e21..7ca216d 100644 --- a/src/ir.c +++ b/src/ir.c @@ -57,6 +57,7 @@ size_t program_str_length(struct program *program) { length++; break; case CMD_CLEAR: + case CMD_MULTIPLY: break; default: fprintf(stderr, "Unrecognised cmd_type '%c'\n", @@ -206,6 +207,7 @@ char *program_to_string(struct program *program) { cmd_type_to_char(program->cmds[cmd_index].type); break; case CMD_CLEAR: + case CMD_MULTIPLY: break; default: fprintf(stderr, "Unrecognised cmd_type '%c'\n", @@ -353,9 +355,125 @@ static void detect_clear_loops(struct program *program) { program->length = new_len; } +#define DELTA_RANGE 64 + +static void detect_multiply_loops(struct program *program) { + struct cmd *new_cmds = malloc(program->length * sizeof(struct cmd)); + size_t *old_to_new = malloc(program->length * sizeof(size_t)); + size_t *new_to_old = malloc(program->length * sizeof(size_t)); + if (!new_cmds || !old_to_new || !new_to_old) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } + size_t new_len = 0; + + for (size_t old = 0; old < program->length;) { + struct cmd c = program->cmds[old]; + if (c.type == CMD_JUMP_FORWARD) { + size_t close_idx = c.value.jump_index; + int valid = 1; + int dp_delta = 0; + int deltas[2 * DELTA_RANGE + 1]; + memset(deltas, 0, sizeof(deltas)); + + for (size_t k = old + 1; k < close_idx && valid; k++) { + struct cmd bk = program->cmds[k]; + switch (bk.type) { + case CMD_SIMPLE_RIGHT: + dp_delta += (int)bk.value.simple_count; + if (dp_delta > DELTA_RANGE || + dp_delta < -DELTA_RANGE) + valid = 0; + break; + case CMD_SIMPLE_LEFT: + dp_delta -= (int)bk.value.simple_count; + if (dp_delta > DELTA_RANGE || + dp_delta < -DELTA_RANGE) + valid = 0; + break; + case CMD_SIMPLE_INC: + deltas[dp_delta + DELTA_RANGE] += + (int)bk.value.simple_count; + break; + case CMD_SIMPLE_DEC: + deltas[dp_delta + DELTA_RANGE] -= + (int)bk.value.simple_count; + break; + default: + valid = 0; + break; + } + } + + if (valid && dp_delta == 0 && + deltas[DELTA_RANGE] == -1) { + struct multiply_move moves[MULTIPLY_MOVES_MAX]; + size_t n_moves = 0; + int overflow = 0; + for (int d = -DELTA_RANGE; + d <= DELTA_RANGE && !overflow; d++) { + if (d == 0 || + deltas[d + DELTA_RANGE] == 0) + continue; + if (n_moves >= MULTIPLY_MOVES_MAX) { + overflow = 1; + break; + } + moves[n_moves++] = + (struct multiply_move){ + .offset = d, + .factor = + deltas[d + DELTA_RANGE]}; + } + if (!overflow) { + for (size_t k = old; k <= close_idx; + k++) { + old_to_new[k] = + (k == old) ? new_len + : SIZE_MAX; + } + new_to_old[new_len] = old; + struct cmd mc = { + .type = CMD_MULTIPLY, + .value.multiply.n_moves = n_moves}; + for (size_t i = 0; i < n_moves; i++) + mc.value.multiply.moves[i] = + moves[i]; + new_cmds[new_len++] = mc; + old = close_idx + 1; + continue; + } + } + } + old_to_new[old] = new_len; + new_to_old[new_len] = old; + new_cmds[new_len++] = c; + old++; + } + + for (size_t i = 0; i < new_len; i++) { + if (new_cmds[i].type == CMD_JUMP_FORWARD || + new_cmds[i].type == CMD_JUMP_BACK) { + size_t old_target = + program->cmds[new_to_old[i]].value.jump_index; + assert(old_to_new[old_target] != SIZE_MAX); + new_cmds[i].value.jump_index = old_to_new[old_target]; + } + } + + free(program->cmds); + free(old_to_new); + free(new_to_old); + program->cmds = new_cmds; + program->length = new_len; +} + +#undef DELTA_RANGE + void optimise_program(struct program *program) { cancel_opposing(program); detect_clear_loops(program); + detect_multiply_loops(program); } char program_contains_input(struct program *program) { diff --git a/src/ir.h b/src/ir.h index f21b25e..7d02668 100644 --- a/src/ir.h +++ b/src/ir.h @@ -23,6 +23,19 @@ enum cmd_type { CMD_JUMP_BACK, /// Synthetic: set current cell to zero (replaces `[-]`/`[+]`). CMD_CLEAR, + /// Synthetic: multiply-add loop (replaces `[-offset1*factor1...]`). + CMD_MULTIPLY, +}; + +/// Maximum number of target cells in a CMD_MULTIPLY instruction. +#define MULTIPLY_MOVES_MAX 8 + +/// One (offset, factor) pair in a CMD_MULTIPLY instruction. +struct multiply_move { + /// Cell offset from the current data pointer. + int offset; + /// Multiplier applied to the loop counter cell. + int factor; }; /// One compressed instruction in the internal Brainfuck IR. @@ -35,6 +48,11 @@ struct cmd { size_t simple_count; /// Matching bracket command index. size_t jump_index; + /// Moves for CMD_MULTIPLY. + struct { + struct multiply_move moves[MULTIPLY_MOVES_MAX]; + size_t n_moves; + } multiply; } value; }; diff --git a/src/llvm.c b/src/llvm.c index 7b7f5df..384a840 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -184,6 +184,37 @@ void comma(struct llvm_context *ctx) { LLVMBuildStore(ctx->builder, char_value, data_ptr); } +void multiply(struct llvm_context *ctx, struct multiply_move *moves, + size_t n_moves) { + LLVMValueRef counter_ptr = get_dataptr(ctx); + LLVMValueRef counter = + LLVMBuildLoad2(ctx->builder, int8_type(ctx), counter_ptr, ""); + for (size_t i = 0; i < n_moves; i++) { + LLVMValueRef dp_value = + LLVMBuildLoad2(ctx->builder, int32_type(ctx), ctx->dp, ""); + LLVMValueRef offset = + LLVMConstInt(int32_type(ctx), (unsigned long long)moves[i].offset, 1); + LLVMValueRef target_idx = + LLVMBuildAdd(ctx->builder, dp_value, offset, ""); + LLVMValueRef indices[] = {LLVMConstInt(int32_type(ctx), 0, 0), + target_idx}; + LLVMValueRef target_ptr = + LLVMBuildGEP2(ctx->builder, data_array_type(ctx), + ctx->data, indices, 2, ""); + LLVMValueRef target = + LLVMBuildLoad2(ctx->builder, int8_type(ctx), target_ptr, ""); + LLVMValueRef factor = + LLVMConstInt(int8_type(ctx), (unsigned long long)moves[i].factor, 1); + LLVMValueRef product = + LLVMBuildMul(ctx->builder, counter, factor, ""); + LLVMValueRef new_val = + LLVMBuildAdd(ctx->builder, target, product, ""); + LLVMBuildStore(ctx->builder, new_val, target_ptr); + } + LLVMBuildStore(ctx->builder, LLVMConstInt(int8_type(ctx), 0, 0), + counter_ptr); +} + void clear(struct llvm_context *ctx) { LLVMValueRef data_ptr = get_dataptr(ctx); LLVMBuildStore(ctx->builder, LLVMConstInt(int8_type(ctx), 0, 0), @@ -259,6 +290,10 @@ LLVMModuleRef generate(struct program *program) { case CMD_CLEAR: clear(&ctx); break; + case CMD_MULTIPLY: + multiply(&ctx, command.value.multiply.moves, + command.value.multiply.n_moves); + break; default: fprintf(stderr, "Unsupported cmd_type '%c'\n", command.type); diff --git a/test/res/multiply.b b/test/res/multiply.b new file mode 100644 index 0000000..bfcc969 --- /dev/null +++ b/test/res/multiply.b @@ -0,0 +1 @@ +>+++++[<+>-] \ No newline at end of file diff --git a/test/test_multiply.filecheck b/test/test_multiply.filecheck new file mode 100644 index 0000000..235130a --- /dev/null +++ b/test/test_multiply.filecheck @@ -0,0 +1,43 @@ +; RUN: %bf %s.b --emit-llvm | FileCheck %s + +; Test multiply-loop optimisation: >+++++[<+>-] +; Moves dp right, sets cell[1]=5, then [<+>-] adds cell[1] into cell[0] +; and zeros cell[1]. Optimised to CMD_MULTIPLY {offset=-1, factor=1}. + +; CHECK: ; ModuleID = 'main' +; CHECK: source_filename = "main" + +; CHECK: @dp = global i32 0 +; CHECK: @data = global [65536 x i8] zeroinitializer + +; CHECK: define i32 @main() { +; CHECK: entry: + +; > (move right) +; CHECK: %[[DP1:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[RIGHT:.*]] = add i32 %[[DP1]], 1 +; CHECK: store i32 %[[RIGHT]], ptr @dp, align 4 + +; +++++ (set cell[1] = 5) +; CHECK: %[[DP2:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[PTR1:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP2]] +; CHECK: %[[VAL1:.*]] = load i8, ptr %[[PTR1]], align 1 +; CHECK: %[[ADD:.*]] = add i8 %[[VAL1]], 5 +; CHECK: store i8 %[[ADD]], ptr %[[PTR1]], align 1 + +; CMD_MULTIPLY {offset=-1, factor=1}: load counter (cell[1]), +; compute cell[0] += counter * 1, zero cell[1]. +; CHECK: %[[DP3:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[CPTR:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP3]] +; CHECK: %[[COUNTER:.*]] = load i8, ptr %[[CPTR]], align 1 +; CHECK: %[[DP4:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[TIDX:.*]] = add i32 %[[DP4]], -1 +; CHECK: %[[TPTR:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[TIDX]] +; CHECK: %[[TVAL:.*]] = load i8, ptr %[[TPTR]], align 1 +; CHECK: %[[PROD:.*]] = mul i8 %[[COUNTER]], 1 +; CHECK: %[[NEW:.*]] = add i8 %[[TVAL]], %[[PROD]] +; CHECK: store i8 %[[NEW]], ptr %[[TPTR]], align 1 +; CHECK: store i8 0, ptr %[[CPTR]], align 1 + +; CHECK: ret i32 0 +; CHECK: } From af59d879cd1d2bc6ca02d87fad6dbb5ed0d01876 Mon Sep 17 00:00:00 2001 From: benmandrew Date: Tue, 30 Jun 2026 16:17:48 +0100 Subject: [PATCH 4/8] opt: create dp as alloca rather than global Removes the @dp global variable and replaces it with an alloca in the main function entry block. The LLVMValueRef ctx->dp is still a pointer so all load/store callsites are unchanged. With LLVM's mem2reg pass (applied under -O) the alloca is promoted to an SSA register, removing all dp memory traffic. Updates all FileCheck tests accordingly. --- src/llvm.c | 5 +++-- test/test_hi.filecheck | 14 ++++++++------ test/test_multiply.filecheck | 15 +++++++++------ test/test_simple_echo.filecheck | 10 ++++++---- test/test_simple_loop.filecheck | 9 ++++++--- test/test_simple_no_io.filecheck | 19 +++++++++++-------- 6 files changed, 43 insertions(+), 29 deletions(-) diff --git a/src/llvm.c b/src/llvm.c index 384a840..bf149f5 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -95,8 +95,6 @@ struct llvm_context create_module_preamble(struct program *program, if (program_contains_input(program)) { create_getchar_declaration(&ctx); } - ctx.dp = LLVMAddGlobal(ctx.module, int32_type(&ctx), "dp"); - LLVMSetInitializer(ctx.dp, LLVMConstNull(int32_type(&ctx))); ctx.data = LLVMAddGlobal(ctx.module, data_array_type(&ctx), "data"); LLVMSetInitializer(ctx.data, LLVMConstNull(data_array_type(&ctx))); ctx.js = jump_stack_new(); @@ -115,6 +113,9 @@ void create_main_function(struct llvm_context *ctx) { LLVMBasicBlockRef entry_block = LLVMAppendBasicBlockInContext(ctx->context, ctx->main, "entry"); LLVMPositionBuilderAtEnd(ctx->builder, entry_block); + ctx->dp = LLVMBuildAlloca(ctx->builder, int32_type(ctx), "dp"); + LLVMBuildStore(ctx->builder, + LLVMConstInt(int32_type(ctx), 0, 0), ctx->dp); } LLVMValueRef get_dataptr(struct llvm_context *ctx) { diff --git a/test/test_hi.filecheck b/test/test_hi.filecheck index ad965bc..b8f060c 100644 --- a/test/test_hi.filecheck +++ b/test/test_hi.filecheck @@ -1,42 +1,44 @@ ; RUN: %bf %s.b --emit-llvm | FileCheck %s ; Test brainfuck program that outputs "Hi" -; This should generate IR for setting up 'H' (72) and 'i' (105) and calling putchar ; CHECK: ; ModuleID = 'main' ; CHECK: source_filename = "main" -; CHECK: @dp = global i32 0 +; @dp is now an alloca inside main, not a global. +; CHECK-NOT: @dp = global ; CHECK: @data = global [65536 x i8] zeroinitializer ; CHECK: declare i32 @putchar(i32) ; CHECK: define i32 @main() { ; CHECK: entry: +; CHECK: %dp = alloca i32, align 4 +; CHECK: store i32 0, ptr %dp, align 4 ; First, build up to 'H' (ASCII 72) -; CHECK: %[[DP1:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP1:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[PTR1:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP1]] ; CHECK: %[[VAL1:.*]] = load i8, ptr %[[PTR1]], align 1 ; CHECK: %[[ADD1:.*]] = add i8 %[[VAL1]], 72 ; CHECK: store i8 %[[ADD1]], ptr %[[PTR1]], align 1 ; Output 'H' with putchar -; CHECK: %[[DP2:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP2:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[PTR2:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP2]] ; CHECK: %[[VAL2:.*]] = load i8, ptr %[[PTR2]], align 1 ; CHECK: %[[EXT1:.*]] = zext i8 %[[VAL2]] to i32 ; CHECK: %[[CALL1:.*]] = call i32 @putchar(i32 %[[EXT1]]) ; Add 33 more to get to 'i' (72 + 33 = 105) -; CHECK: %[[DP3:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP3:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[PTR3:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP3]] ; CHECK: %[[VAL3:.*]] = load i8, ptr %[[PTR3]], align 1 ; CHECK: %[[ADD2:.*]] = add i8 %[[VAL3]], 33 ; CHECK: store i8 %[[ADD2]], ptr %[[PTR3]], align 1 ; Output 'i' with putchar -; CHECK: %[[DP4:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP4:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[PTR4:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP4]] ; CHECK: %[[VAL4:.*]] = load i8, ptr %[[PTR4]], align 1 ; CHECK: %[[EXT2:.*]] = zext i8 %[[VAL4]] to i32 diff --git a/test/test_multiply.filecheck b/test/test_multiply.filecheck index 235130a..1e70ca0 100644 --- a/test/test_multiply.filecheck +++ b/test/test_multiply.filecheck @@ -7,19 +7,22 @@ ; CHECK: ; ModuleID = 'main' ; CHECK: source_filename = "main" -; CHECK: @dp = global i32 0 +; @dp is now an alloca inside main, not a global. +; CHECK-NOT: @dp = global ; CHECK: @data = global [65536 x i8] zeroinitializer ; CHECK: define i32 @main() { ; CHECK: entry: +; CHECK: %dp = alloca i32, align 4 +; CHECK: store i32 0, ptr %dp, align 4 ; > (move right) -; CHECK: %[[DP1:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP1:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[RIGHT:.*]] = add i32 %[[DP1]], 1 -; CHECK: store i32 %[[RIGHT]], ptr @dp, align 4 +; CHECK: store i32 %[[RIGHT]], ptr %dp, align 4 ; +++++ (set cell[1] = 5) -; CHECK: %[[DP2:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP2:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[PTR1:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP2]] ; CHECK: %[[VAL1:.*]] = load i8, ptr %[[PTR1]], align 1 ; CHECK: %[[ADD:.*]] = add i8 %[[VAL1]], 5 @@ -27,10 +30,10 @@ ; CMD_MULTIPLY {offset=-1, factor=1}: load counter (cell[1]), ; compute cell[0] += counter * 1, zero cell[1]. -; CHECK: %[[DP3:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP3:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[CPTR:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP3]] ; CHECK: %[[COUNTER:.*]] = load i8, ptr %[[CPTR]], align 1 -; CHECK: %[[DP4:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP4:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[TIDX:.*]] = add i32 %[[DP4]], -1 ; CHECK: %[[TPTR:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[TIDX]] ; CHECK: %[[TVAL:.*]] = load i8, ptr %[[TPTR]], align 1 diff --git a/test/test_simple_echo.filecheck b/test/test_simple_echo.filecheck index 40fb089..fde5073 100644 --- a/test/test_simple_echo.filecheck +++ b/test/test_simple_echo.filecheck @@ -1,12 +1,12 @@ ; RUN: %bf %s.b --emit-llvm | FileCheck %s ; Test brainfuck program that echoes a character: ,. -; This should generate IR for getchar() followed by putchar() ; CHECK: ; ModuleID = 'main' ; CHECK: source_filename = "main" -; CHECK: @dp = global i32 0 +; @dp is now an alloca inside main, not a global. +; CHECK-NOT: @dp = global ; CHECK: @data = global [65536 x i8] zeroinitializer ; CHECK: declare i32 @putchar(i32) @@ -15,16 +15,18 @@ ; CHECK: define i32 @main() { ; CHECK: entry: +; CHECK: %dp = alloca i32, align 4 +; CHECK: store i32 0, ptr %dp, align 4 ; Input operation (,) - read character with getchar -; CHECK: %[[DP1:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP1:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[PTR1:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP1]] ; CHECK: %[[GETCHAR:.*]] = call i32 @getchar() ; CHECK: %[[TRUNC:.*]] = trunc i32 %[[GETCHAR]] to i8 ; CHECK: store i8 %[[TRUNC]], ptr %[[PTR1]], align 1 ; Output operation (.) - write character with putchar -; CHECK: %[[DP2:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP2:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[PTR2:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP2]] ; CHECK: %[[VAL:.*]] = load i8, ptr %[[PTR2]], align 1 ; CHECK: %[[EXT:.*]] = zext i8 %[[VAL]] to i32 diff --git a/test/test_simple_loop.filecheck b/test/test_simple_loop.filecheck index ed63c5b..34479d5 100644 --- a/test/test_simple_loop.filecheck +++ b/test/test_simple_loop.filecheck @@ -6,21 +6,24 @@ ; CHECK: ; ModuleID = 'main' ; CHECK: source_filename = "main" -; CHECK: @dp = global i32 0 +; @dp is now an alloca inside main, not a global. +; CHECK-NOT: @dp = global ; CHECK: @data = global [65536 x i8] zeroinitializer ; CHECK: define i32 @main() { ; CHECK: entry: +; CHECK: %dp = alloca i32, align 4 +; CHECK: store i32 0, ptr %dp, align 4 ; Initial addition of 5 to current cell (+++++). -; CHECK: %[[DP1:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP1:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[PTR1:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP1]] ; CHECK: %[[VAL1:.*]] = load i8, ptr %[[PTR1]], align 1 ; CHECK: %[[ADD:.*]] = add i8 %[[VAL1]], 5 ; CHECK: store i8 %[[ADD]], ptr %[[PTR1]], align 1 ; CMD_CLEAR: [-] collapses to a single store of zero. -; CHECK: %[[DP2:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP2:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[PTR2:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP2]] ; CHECK: store i8 0, ptr %[[PTR2]], align 1 diff --git a/test/test_simple_no_io.filecheck b/test/test_simple_no_io.filecheck index 28b7d14..b9cbb1f 100644 --- a/test/test_simple_no_io.filecheck +++ b/test/test_simple_no_io.filecheck @@ -3,38 +3,41 @@ ; CHECK: ; ModuleID = 'main' ; CHECK: source_filename = "main" -; CHECK: @dp = global i32 0 +; @dp is now an alloca inside main, not a global. +; CHECK-NOT: @dp = global ; CHECK: @data = global [65536 x i8] zeroinitializer ; CHECK: define i32 @main() { ; CHECK: entry: +; CHECK: %dp = alloca i32, align 4 +; CHECK: store i32 0, ptr %dp, align 4 ; First increment operation (++) -; CHECK: %[[DP1:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP1:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[PTR1:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP1]] ; CHECK: %[[VAL1:.*]] = load i8, ptr %[[PTR1]], align 1 ; CHECK: %[[ADD1:.*]] = add i8 %[[VAL1]], 2 ; CHECK: store i8 %[[ADD1]], ptr %[[PTR1]], align 1 ; Move right operation (>) -; CHECK: %[[DP2:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP2:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[RIGHT:.*]] = add i32 %[[DP2]], 1 -; CHECK: store i32 %[[RIGHT]], ptr @dp, align 4 +; CHECK: store i32 %[[RIGHT]], ptr %dp, align 4 ; Decrement operation (--) -; CHECK: %[[DP3:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP3:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[PTR2:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP3]] ; CHECK: %[[VAL2:.*]] = load i8, ptr %[[PTR2]], align 1 ; CHECK: %[[SUB:.*]] = sub i8 %[[VAL2]], 2 ; CHECK: store i8 %[[SUB]], ptr %[[PTR2]], align 1 ; Move left operation (<) -; CHECK: %[[DP4:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP4:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[LEFT:.*]] = sub i32 %[[DP4]], 1 -; CHECK: store i32 %[[LEFT]], ptr @dp, align 4 +; CHECK: store i32 %[[LEFT]], ptr %dp, align 4 ; Final increment operation (++) -; CHECK: %[[DP5:.*]] = load i32, ptr @dp, align 4 +; CHECK: %[[DP5:.*]] = load i32, ptr %dp, align 4 ; CHECK: %[[PTR3:.*]] = getelementptr [65536 x i8], ptr @data, i32 0, i32 %[[DP5]] ; CHECK: %[[VAL3:.*]] = load i8, ptr %[[PTR3]], align 1 ; CHECK: %[[ADD2:.*]] = add i8 %[[VAL3]], 2 From 75e03155545817e6188b25652a51a3311f006412 Mon Sep 17 00:00:00 2001 From: benmandrew Date: Tue, 30 Jun 2026 16:19:47 +0100 Subject: [PATCH 5/8] opt: add LLVM pass pipeline gated on -O/--optimise Wires the already-parsed --optimise flag from main_bfc.c through generate(program, optimise). When true, runs mem2reg,instcombine, simplifycfg,gvn via LLVMRunPasses (new pass manager, LLVM >= 14). mem2reg promotes the dp alloca to SSA registers; gvn eliminates redundant loads; instcombine and simplifycfg clean up the result. Adds the passes component to llvm_map_components_to_libnames. FileCheck tests are unaffected as they do not pass -O. --- cmake/llvm.cmake | 2 +- src/llvm.c | 17 ++++++++++++++++- src/llvm.h | 5 ++++- src/main_bfc.c | 2 +- test/main_fuzz.c | 3 ++- 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/cmake/llvm.cmake b/cmake/llvm.cmake index 3ddb958..8253637 100644 --- a/cmake/llvm.cmake +++ b/cmake/llvm.cmake @@ -68,7 +68,7 @@ else() link_directories(${LLVM_LIBRARY_DIRS}) endif() - llvm_map_components_to_libnames(llvm_libs support core irreader) + llvm_map_components_to_libnames(llvm_libs support core irreader passes) # Use an OBJECT library for shared sources to avoid flag leakage set(LIB_SOURCES diff --git a/src/llvm.c b/src/llvm.c index bf149f5..bd73c8e 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -4,6 +4,8 @@ #include #include +#include + #include "common.h" #include "ir.h" @@ -250,7 +252,7 @@ void right_bracket(struct llvm_context *ctx) { LLVMPositionBuilderAtEnd(ctx->builder, pair.exit); } -LLVMModuleRef generate(struct program *program) { +LLVMModuleRef generate(struct program *program, bool optimise) { struct llvm_context ctx = create_module_preamble(program, "main"); create_main_function(&ctx); for (size_t cmd_index = 0; cmd_index < program->length; cmd_index++) { @@ -303,5 +305,18 @@ LLVMModuleRef generate(struct program *program) { } LLVMBuildRet(ctx.builder, LLVMConstInt(int32_type(&ctx), 0, 0)); LLVMDisposeBuilder(ctx.builder); + if (optimise) { + LLVMPassBuilderOptionsRef opts = + LLVMCreatePassBuilderOptions(); + LLVMErrorRef err = LLVMRunPasses( + ctx.module, "mem2reg,instcombine,simplifycfg,gvn", NULL, + opts); + if (err) { + char *msg = LLVMGetErrorMessage(err); + fprintf(stderr, "Pass error: %s\n", msg); + LLVMDisposeErrorMessage(msg); + } + LLVMDisposePassBuilderOptions(opts); + } return ctx.module; } diff --git a/src/llvm.h b/src/llvm.h index 426b767..21adf80 100644 --- a/src/llvm.h +++ b/src/llvm.h @@ -1,14 +1,17 @@ #ifndef LLVM_H #define LLVM_H +#include + #include #include "ir.h" /// Generate LLVM IR for a parsed Brainfuck program. /// @param program Parsed Brainfuck program. +/// @param optimise Run LLVM optimisation passes (mem2reg, instcombine, etc.). /// @return Generated LLVM module. -LLVMModuleRef generate(struct program *program); +LLVMModuleRef generate(struct program *program, bool optimise); /// Release an LLVM module created by generate(). /// @param module LLVM module created by `generate`. diff --git a/src/main_bfc.c b/src/main_bfc.c index f88d3de..1b5f4f4 100644 --- a/src/main_bfc.c +++ b/src/main_bfc.c @@ -73,7 +73,7 @@ int main(int argc, char **argv) { struct program parsed_program = string_to_program(program_str); free(program_str); optimise_program(&parsed_program); - LLVMModuleRef module = generate(&parsed_program); + LLVMModuleRef module = generate(&parsed_program, optimise); char *err = NULL; LLVMPrintModuleToFile(module, "/dev/stdout", &err); if (err) diff --git a/test/main_fuzz.c b/test/main_fuzz.c index 9b8fd04..670bb05 100644 --- a/test/main_fuzz.c +++ b/test/main_fuzz.c @@ -31,7 +31,8 @@ int main(int argc, char **argv) { input[input_len] = '\0'; clean_whitespace(input); struct program p = string_to_program(input); - LLVMModuleRef module = generate(&p); + optimise_program(&p); + LLVMModuleRef module = generate(&p, false); char *module_str = LLVMPrintModuleToString(module); // Optionally, do something with module_str (e.g., hash, check, // etc.) From da64baf903e3092aa08222ee2d8248e25e322def Mon Sep 17 00:00:00 2001 From: benmandrew Date: Tue, 30 Jun 2026 16:20:05 +0100 Subject: [PATCH 6/8] plan: remove plan --- PLAN.md | 97 --------------------------------------------------------- 1 file changed, 97 deletions(-) delete mode 100644 PLAN.md diff --git a/PLAN.md b/PLAN.md deleted file mode 100644 index ed825bd..0000000 --- a/PLAN.md +++ /dev/null @@ -1,97 +0,0 @@ -# Optimisation Plan - -Five optimisations for the produced LLVM IR, implemented as separate commits. - -## Status - -- [x] 1. Cancel opposing simple commands -- [ ] 2. `CMD_CLEAR` — zero-loop `[-]` → `store i8 0` -- [ ] 3. `CMD_MULTIPLY` — multiply-loop `[->N*+<]` → multiply-add -- [ ] 4. `dp` as `alloca` (enables `mem2reg`) -- [ ] 5. LLVM pass pipeline gated on `-O` -- [ ] 6. Per-optimisation TOML config file - ---- - -## 1. Cancel opposing simple commands - -**Where**: `ir.c` — new `optimise_program()` called from both `main_bfc.c` and `main_bfi.c`. - -Peephole pass over the `cmds` array: adjacent INC/DEC or RIGHT/LEFT pairs subtract counts and, if they fully cancel, are removed. Re-computes bracket jump indices after compaction. - -**Example**: `+++--` → `CMD_SIMPLE_INC(1)` instead of `CMD_SIMPLE_INC(3), CMD_SIMPLE_DEC(2)`. - -Test changes: none (no existing test program uses cancellable patterns). - ---- - -## 2. CMD_CLEAR — zero-loop detection - -**Where**: `ir.c` `optimise_program()`, `llvm.c`, `interp.c`. - -New `CMD_CLEAR` IR node. Pattern detected after cancellation pass: `[` + single INC or DEC body + `]`. Replaced with a single `store i8 0` in codegen, `ctx->data[ctx->dp] = 0` in interpreter. - -**Example**: `+++++[-]` → `CMD_SIMPLE_INC(5), CMD_CLEAR`. - -Test changes: update `test/test_simple_loop.filecheck` (program is `+++++[-]`). - ---- - -## 3. CMD_MULTIPLY — multiply-loop detection - -**Where**: `ir.c` `optimise_program()`, `llvm.c`, `interp.c`. - -New `CMD_MULTIPLY` IR node with up to `MULTIPLY_MOVES_MAX` (8) offset/factor pairs. A loop body matches when: only `+`/`-`/`>`/`<` inside, net pointer movement is zero, loop counter cell has net delta −1. Each non-counter cell touched becomes a `{offset, factor}` move. - -Codegen: load counter, for each move `data[dp+offset] += counter * factor`, then `store i8 0` to counter cell. Interpreter: same arithmetic. - -**Example**: `[->+<]` at dp=1 → `CMD_MULTIPLY {moves=[{offset=-1, factor=1}]}`. - -Test changes: add `test/res/multiply.b` and `test/test_multiply.filecheck`. - ---- - -## 4. `dp` as `alloca` - -**Where**: `llvm.c` — `create_main_function()` creates `dp` as an alloca instead of a global. - -`dp` is removed from the global section and created with `LLVMBuildAlloca` in the entry block, immediately initialised to 0. The `LLVMValueRef ctx->dp` is still a pointer (alloca ptr vs global ptr) so all downstream load/store calls are unchanged. - -Without LLVM passes the IR still has explicit load/store; the benefit is unlocked in commit 5 when `mem2reg` promotes the alloca to a register. - -Test changes: update all FileCheck tests — remove `@dp = global i32 0` check, change `ptr @dp` references to `ptr %dp`. - ---- - -## 5. LLVM pass pipeline gated on `-O` - -**Where**: `llvm.c`, `llvm.h`, `cmake/llvm.cmake`, `main_bfc.c`. - -Wires the already-parsed `--optimise`/`-O` flag from `main_bfc.c` through `generate(struct program *, bool optimise)`. When `optimise` is true, runs `"mem2reg,instcombine,simplifycfg,gvn"` via `LLVMRunPasses` (LLVM new pass manager, LLVM ≥ 14). Adds `passes` to `llvm_map_components_to_libnames` in cmake. - -`mem2reg` promotes the `dp` alloca to a register; `gvn` eliminates redundant loads of `@data` elements; `instcombine` and `simplifycfg` clean up the resulting IR. - -Test changes: none (FileCheck tests do not pass `-O`). - ---- - -## 6. Per-optimisation TOML config file - -**Where**: new `src/config.h` / `src/config.c`, updated `main_bfc.c`, updated `optimise_program()` and `generate()` signatures. - -A flat TOML file (default `bf.toml` in the current directory, overridable with `-c`/`--config`) controls each optimisation independently: - -```toml -[optimisations] -cancel_opposing = true -clear_loop = true -multiply_loop = true -dp_alloca = true -llvm_passes = false -``` - -`struct opt_config` holds a boolean for each flag; a minimal built-in parser handles `[section]` headers and `key = true/false` lines. Missing file → all optimisations enabled by default. The `-O` flag becomes a shorthand for enabling all flags. - -`optimise_program(struct program *, const struct opt_config *)` and `generate(struct program *, const struct opt_config *)` are updated to gate each pass on its flag. - -Test changes: add `test/res/bf.toml` with specific flags for FileCheck regression tests if needed. From e9bbacecad7de25ff87593bb09cceba3372cce53 Mon Sep 17 00:00:00 2001 From: benmandrew Date: Tue, 30 Jun 2026 16:25:26 +0100 Subject: [PATCH 7/8] fix: clang-format and cpplint violations in new code Replace (unsigned long long) casts with (uint64_t) to satisfy cpplint runtime/int rule. Re-run clang-format to fix indentation in multiply(), detect_multiply_loops(), and the CMD_MULTIPLY interp case. --- src/interp.c | 4 ++-- src/ir.c | 6 +++--- src/llvm.c | 20 ++++++++++---------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/interp.c b/src/interp.c index d077755..bdd99c2 100644 --- a/src/interp.c +++ b/src/interp.c @@ -178,8 +178,8 @@ int interp(struct context_t *ctx, int out_fd, int in_fd, bool byte_output) { ctx->data[ctx->dp] = 0; break; case CMD_MULTIPLY: - for (size_t i = 0; - i < current_cmd.value.multiply.n_moves; i++) { + for (size_t i = 0; i < current_cmd.value.multiply.n_moves; + i++) { int target = (int)ctx->dp + current_cmd.value.multiply.moves[i].offset; ctx->data[target] += diff --git a/src/ir.c b/src/ir.c index 7ca216d..29ac4eb 100644 --- a/src/ir.c +++ b/src/ir.c @@ -428,9 +428,9 @@ static void detect_multiply_loops(struct program *program) { if (!overflow) { for (size_t k = old; k <= close_idx; k++) { - old_to_new[k] = - (k == old) ? new_len - : SIZE_MAX; + old_to_new[k] = (k == old) + ? new_len + : SIZE_MAX; } new_to_old[new_len] = old; struct cmd mc = { diff --git a/src/llvm.c b/src/llvm.c index bd73c8e..22da5ba 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -1,6 +1,7 @@ #include "llvm.h" #include +#include #include #include @@ -116,8 +117,8 @@ void create_main_function(struct llvm_context *ctx) { LLVMAppendBasicBlockInContext(ctx->context, ctx->main, "entry"); LLVMPositionBuilderAtEnd(ctx->builder, entry_block); ctx->dp = LLVMBuildAlloca(ctx->builder, int32_type(ctx), "dp"); - LLVMBuildStore(ctx->builder, - LLVMConstInt(int32_type(ctx), 0, 0), ctx->dp); + LLVMBuildStore(ctx->builder, LLVMConstInt(int32_type(ctx), 0, 0), + ctx->dp); } LLVMValueRef get_dataptr(struct llvm_context *ctx) { @@ -196,18 +197,18 @@ void multiply(struct llvm_context *ctx, struct multiply_move *moves, LLVMValueRef dp_value = LLVMBuildLoad2(ctx->builder, int32_type(ctx), ctx->dp, ""); LLVMValueRef offset = - LLVMConstInt(int32_type(ctx), (unsigned long long)moves[i].offset, 1); + LLVMConstInt(int32_type(ctx), (uint64_t)moves[i].offset, 1); LLVMValueRef target_idx = LLVMBuildAdd(ctx->builder, dp_value, offset, ""); LLVMValueRef indices[] = {LLVMConstInt(int32_type(ctx), 0, 0), target_idx}; LLVMValueRef target_ptr = - LLVMBuildGEP2(ctx->builder, data_array_type(ctx), - ctx->data, indices, 2, ""); - LLVMValueRef target = - LLVMBuildLoad2(ctx->builder, int8_type(ctx), target_ptr, ""); + LLVMBuildGEP2(ctx->builder, data_array_type(ctx), ctx->data, + indices, 2, ""); + LLVMValueRef target = LLVMBuildLoad2( + ctx->builder, int8_type(ctx), target_ptr, ""); LLVMValueRef factor = - LLVMConstInt(int8_type(ctx), (unsigned long long)moves[i].factor, 1); + LLVMConstInt(int8_type(ctx), (uint64_t)moves[i].factor, 1); LLVMValueRef product = LLVMBuildMul(ctx->builder, counter, factor, ""); LLVMValueRef new_val = @@ -306,8 +307,7 @@ LLVMModuleRef generate(struct program *program, bool optimise) { LLVMBuildRet(ctx.builder, LLVMConstInt(int32_type(&ctx), 0, 0)); LLVMDisposeBuilder(ctx.builder); if (optimise) { - LLVMPassBuilderOptionsRef opts = - LLVMCreatePassBuilderOptions(); + LLVMPassBuilderOptionsRef opts = LLVMCreatePassBuilderOptions(); LLVMErrorRef err = LLVMRunPasses( ctx.module, "mem2reg,instcombine,simplifycfg,gvn", NULL, opts); From 5df02a7c4016fe9af58c1e94978550a01244f900 Mon Sep 17 00:00:00 2001 From: benmandrew Date: Tue, 30 Jun 2026 16:27:42 +0100 Subject: [PATCH 8/8] fix: add missing stdint.h includes for SIZE_MAX and uint8_t The clang static analyzer in CI flags SIZE_MAX (ir.c) and uint8_t (interp.c) as undeclared without an explicit stdint.h include. --- src/interp.c | 1 + src/ir.c | 1 + 2 files changed, 2 insertions(+) diff --git a/src/interp.c b/src/interp.c index bdd99c2..7dc2543 100644 --- a/src/interp.c +++ b/src/interp.c @@ -1,6 +1,7 @@ #include "interp.h" #include +#include #include #include #include diff --git a/src/ir.c b/src/ir.c index 29ac4eb..baafdbf 100644 --- a/src/ir.c +++ b/src/ir.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include