From 0479f443ef64b3595be1c17d34d11ebd53756808 Mon Sep 17 00:00:00 2001 From: Aaron Webster Date: Wed, 3 Jun 2026 18:34:29 -0700 Subject: [PATCH] Add embedded code-size benchmarking scripts scripts/embedded_bench.sh cross-compiles a small freestanding driver that forces LargeConditionals::Ok() into an object file, then reports the object's `size` totals and the Ok() symbol size on: * ARM Cortex-M4 Thumb-2 (arm-none-eabi-g++) * MicroBlaze big-endian (microblaze-buildroot-linux-gnu-g++) * Host x86-64 (reference) -ffunction-sections/-fdata-sections keep the per-symbol and TU numbers comparable across compiles; EMBOSS_BENCH_FLAGS overrides the flags and missing toolchains are skipped with a warning. The measured header is refreshed from the live generator via scripts/regenerate_goldens.py. scripts/profile_tool.py drives embedded_bench.sh across multiple git revisions and compiler configurations, pulling the schema and harness forward from the starting revision so the comparison measures generator output rather than the test surface. It writes a markdown report of TU and per-symbol sizes with deltas against the baseline revision. --- scripts/embedded_bench.sh | 127 ++++++++++++++ scripts/profile_tool.py | 342 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 469 insertions(+) create mode 100755 scripts/embedded_bench.sh create mode 100755 scripts/profile_tool.py diff --git a/scripts/embedded_bench.sh b/scripts/embedded_bench.sh new file mode 100755 index 00000000..35c0ef2f --- /dev/null +++ b/scripts/embedded_bench.sh @@ -0,0 +1,127 @@ +#!/bin/bash + +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Cross-compile the generated many_conditionals.emb.h header on embedded +# toolchains and report .text and per-function sizes for the generated Ok() +# methods. +# +# Usage: scripts/embedded_bench.sh [out-dir] +# out-dir defaults to /tmp/embedded-bench. +# +# Run from a clean checkout. Each target compiles a tiny freestanding TU that +# forces the relevant Ok() methods to be emitted, then reports `size` on the +# resulting object file plus the key Ok() symbol sizes from `nm`. Only the +# out-dir is written to: the header is refreshed in place via +# scripts/regenerate_goldens.py, whose output is byte-identical to the +# checked-in goldens on a clean tree. +# +# Targets are skipped (with a warning) when their toolchain is missing: +# * ARM Cortex-M4 / Thumb-2 arm-none-eabi-g++ +# * MicroBlaze (big-endian) /opt/microblaze/.../microblaze-buildroot-linux-gnu-g++ +# * Host x86-64 g++ (reference) +# +# Compiler flags can be overridden via the EMBOSS_BENCH_FLAGS environment +# variable; profile_tool.py uses this to sweep optimization levels. + +set -euo pipefail + +OUT="${1:-/tmp/embedded-bench}" +REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +mkdir -p "$OUT" + +# Refresh the golden header from the current .emb + generator so we measure the +# live state of the code generator and not a stale checked-in header. +python3 "$REPO/scripts/regenerate_goldens.py" >/dev/null + +# Tiny TU that pulls Ok() into the object file. Without a caller each Ok() is a +# weak inline symbol the linker would dead-strip, leaving nothing to measure. +DRIVER="$OUT/driver.cc" +cat >"$DRIVER" <<'CPP' +#include + +#include "testdata/many_conditionals.emb.h" + +// Volatile sink so the optimizer can't fold the Ok() call away. +volatile bool emboss_result_sink; + +extern "C" void large_ok(const char *buf) { + auto v = emboss::test::MakeLargeConditionalsView(buf, 100); + emboss_result_sink = v.Ok(); +} +CPP + +# -Os for embedded space optimization; -ffunction-sections / -fdata-sections so +# each function lands in its own section (makes the object's `size` and +# `nm --size-sort` directly comparable across compiles); -fno-exceptions / +# -fno-rtti to match typical embedded builds. +EMBEDDED_FLAGS="${EMBOSS_BENCH_FLAGS:-"-std=c++17 -Os -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti"}" + +# -I"$REPO" resolves the header's runtime/cpp/*.h includes. The driver includes +# "testdata/many_conditionals.emb.h", but the generated header lives under +# testdata/golden_cpp/, so drop a symlink and add it to the include path. +mkdir -p "$OUT/include/testdata" +ln -sf "$REPO/testdata/golden_cpp/many_conditionals.emb.h" \ + "$OUT/include/testdata/many_conditionals.emb.h" +INCLUDES="-I$REPO -I$OUT/include" + +report_size() { + local label="$1" + local nm_bin="$2" + local obj="$3" + echo "=== $label ===" + # Host binutils `size` reads ELF objects for every arch we emit here, so use + # it unconditionally rather than a per-target `size`. + size "$obj" + echo "--- LargeConditionals::Ok() ---" + # The template parameter list itself contains '>' characters, so anchor on + # '>::Ok() const' at end of line to match the outermost Ok() and not any + # nested inner view's Ok(). + "$nm_bin" --size-sort -S --demangle "$obj" 2>/dev/null | + grep -E "GenericLargeConditionalsView<.*>::Ok\(\) const$" | tail -1 || + true +} + +# --- ARM Cortex-M4 / Thumb-2 (STM32 family) --- +if command -v arm-none-eabi-g++ >/dev/null 2>&1; then + ARM_OBJ="$OUT/many_conditionals.thumb.o" + arm-none-eabi-g++ $EMBEDDED_FLAGS \ + -mthumb -mcpu=cortex-m4 -mfloat-abi=soft \ + $INCLUDES -c "$DRIVER" -o "$ARM_OBJ" + report_size "ARM Cortex-M4 (Thumb-2, -Os)" arm-none-eabi-nm "$ARM_OBJ" +else + echo "WARNING: arm-none-eabi-g++ not found, skipping ARM Cortex-M4 bench." >&2 +fi + +# --- MicroBlaze (big-endian) --- +MB_PREFIX="/opt/microblaze/microblazebe--glibc--stable-2025.08-1/bin/microblaze-buildroot-linux-gnu" +if [ -x "$MB_PREFIX-g++" ]; then + MB_OBJ="$OUT/many_conditionals.microblaze.o" + "$MB_PREFIX-g++" $EMBEDDED_FLAGS \ + $INCLUDES -c "$DRIVER" -o "$MB_OBJ" + report_size "MicroBlaze (big-endian, -Os)" "$MB_PREFIX-nm" "$MB_OBJ" +else + echo "WARNING: MicroBlaze g++ not found at $MB_PREFIX-g++, skipping MicroBlaze bench." >&2 +fi + +# --- Host x86-64 reference, same flags, for comparison --- +if command -v g++ >/dev/null 2>&1; then + HOST_OBJ="$OUT/many_conditionals.x86_64.o" + g++ $EMBEDDED_FLAGS \ + $INCLUDES -c "$DRIVER" -o "$HOST_OBJ" + report_size "Host x86-64 (-Os)" nm "$HOST_OBJ" +else + echo "WARNING: g++ not found, skipping Host x86-64 bench." >&2 +fi diff --git a/scripts/profile_tool.py b/scripts/profile_tool.py new file mode 100755 index 00000000..b0576fa1 --- /dev/null +++ b/scripts/profile_tool.py @@ -0,0 +1,342 @@ +#!/usr/bin/env python3 + +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Profiles Emboss generator output size across git revisions and compiler flags. + +For each requested revision the tool: + 1. Checks out the revision. + 2. Pulls the benchmark schema and harness forward from the starting revision, + so the comparison measures the code generator and not the test surface. + 3. Runs scripts/embedded_bench.sh under each compiler configuration. + 4. Collects the per-target TU size and per-Ok()-symbol sizes. + 5. Writes a markdown report with deltas against the first (baseline) revision. + +Must be run from the repo root with a clean working tree. The originally +checked-out branch and a clean tree are restored on completion (or failure). + +Example: + python3 scripts/profile_tool.py --revisions HEAD +""" + +import argparse +import json +import os +import re +import shutil +import subprocess +import tempfile + +# Default compiler configurations to sweep, as name -> flag string. Override +# with --configs pointing at a JSON file of the same shape. +DEFAULT_CONFIGS = { + "Os": "-std=c++17 -Os -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti", + "O2": "-std=c++17 -O2 -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti", + "O0": "-std=c++17 -O0 -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti", +} + +# Files pulled forward from the starting revision to every profiled revision so +# the schema and harness stay fixed while only the generator under test varies. +BENCHMARK_FILES = [ + "testdata/many_conditionals.emb", + "scripts/embedded_bench.sh", + "scripts/regenerate_goldens.py", + "scripts/profile_tool.py", # keep this script itself consistent +] + + +def run_cmd(args, cwd=None, env=None, capture=True): + """Runs a command, returning stripped stdout, raising on non-zero exit.""" + try: + result = subprocess.run( + args, cwd=cwd, env=env, capture_output=capture, text=True, check=True + ) + return result.stdout.strip() if capture else "" + except subprocess.CalledProcessError as e: + print(f"Command failed: {' '.join(args)}") + if capture: + print(f"Stdout:\n{e.stdout}") + print(f"Stderr:\n{e.stderr}") + raise + + +def get_current_branch_or_commit(repo_dir): + """Returns the current branch name, or the commit SHA if detached.""" + result = subprocess.run( + ["git", "symbolic-ref", "--short", "HEAD"], + cwd=repo_dir, + capture_output=True, + text=True, + ) + if result.returncode == 0: + return result.stdout.strip() + return run_cmd(["git", "rev-parse", "HEAD"], cwd=repo_dir) + + +def is_dirty(repo_dir): + """Returns True if the working tree has uncommitted changes.""" + return bool(run_cmd(["git", "status", "--porcelain"], cwd=repo_dir)) + + +def parse_bench_output(text): + """Parses embedded_bench.sh output into {target: {tu_size, symbols}}.""" + results = {} + current_target = None + lines = text.splitlines() + i = 0 + while i < len(lines): + line = lines[i].strip() + if not line: + i += 1 + continue + + # Target header: "=== ===" + m = re.match(r"=== (.*) ===", line) + if m: + current_target = m.group(1) + results[current_target] = {"symbols": {}} + i += 1 + continue + + if current_target: + # `size` column header, followed by the size row on the next line. + if re.match(r"text\s+data\s+bss\s+dec\s+hex\s+filename", line): + i += 1 + if i < len(lines): + m_sizes = re.match( + r"(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+([0-9a-fA-F]+)\s+(\S+)", + lines[i].strip(), + ) + if m_sizes: + results[current_target]["tu_size"] = int(m_sizes.group(1)) + i += 1 + continue + + # Symbol header: "--- ---", followed by the nm line. + m_sym = re.match(r"--- (.*) ---", line) + if m_sym: + current_symbol = m_sym.group(1) + i += 1 + if i < len(lines): + nm_line = lines[i].strip() + if ( + nm_line + and not nm_line.startswith("---") + and not nm_line.startswith("===") + ): + # nm --size-sort -S: "
" + parts = nm_line.split() + if len(parts) >= 2: + try: + size = int(parts[1], 16) + results[current_target]["symbols"][ + current_symbol + ] = size + except ValueError: + pass + i += 1 + continue + + i += 1 + return results + + +def run_bench_for_config(repo_dir, config_flags, out_dir): + """Runs embedded_bench.sh once with the given flags and parses its output.""" + env = os.environ.copy() + env["EMBOSS_BENCH_FLAGS"] = config_flags + bench_out_dir = os.path.join(out_dir, "bench_run") + if os.path.exists(bench_out_dir): + shutil.rmtree(bench_out_dir) + os.makedirs(bench_out_dir) + + stdout = run_cmd( + ["bash", "scripts/embedded_bench.sh", bench_out_dir], + cwd=repo_dir, + env=env, + ) + return parse_bench_output(stdout) + + +def simplify_symbol_name(sym): + """Shortens a GenericView<...>::Method symbol to ::Method.""" + m = re.search(r"Generic([A-Za-z0-9_]+)View<.*>::([A-Za-z0-9_]+)", sym) + if m: + return f"{m.group(1)}::{m.group(2)}" + parts = sym.split("::") + if len(parts) >= 2: + return "::".join(parts[-2:]) + return sym + + +def _delta_cell(value, baseline): + """Formats a "+N (+P%)" delta cell, or "-"/"N/A" when not comparable.""" + if baseline is None or not isinstance(value, int): + return "N/A" + delta = value - baseline + pct = (delta / baseline * 100) if baseline else 0.0 + return f"{delta:+d} ({pct:+.1f}%)" + + +def generate_report(results, configs, out_dir): + """Writes a markdown report of TU and per-symbol sizes with baseline deltas.""" + report_path = os.path.join(out_dir, "profile_report.md") + revisions = list(results.keys()) + with open(report_path, "w") as f: + f.write("# Emboss Optimization Profile Report\n\n") + if not revisions: + f.write("No results.\n") + return + + baseline_rev = revisions[0] + f.write(f"Baseline revision: `{baseline_rev}`\n\n") + + for config_name, config_flags in configs.items(): + f.write(f"## Configuration: {config_name}\n") + f.write(f"Flags: `{config_flags}`\n\n") + + targets = set() + for rev in revisions: + if results[rev].get(config_name): + targets.update(results[rev][config_name].keys()) + + if not targets: + f.write("No targets built for this configuration.\n\n") + continue + + for target in sorted(targets): + f.write(f"### Target: {target}\n\n") + + symbols = set() + for rev in revisions: + data = results[rev].get(config_name, {}).get(target) + if data: + symbols.update(data["symbols"].keys()) + symbols = sorted(symbols) + + headers = ["Revision", "TU Size (bytes)", "Delta"] + for sym in symbols: + headers.extend([f"{simplify_symbol_name(sym)} (bytes)", "Delta"]) + f.write("| " + " | ".join(headers) + " |\n") + f.write("| " + " | ".join(["---"] * len(headers)) + " |\n") + + base = results[baseline_rev].get(config_name, {}).get(target) + for rev in revisions: + data = results[rev].get(config_name, {}).get(target) + row = [f"`{rev}`"] + if not data: + row.extend(["N/A", ""] * (1 + len(symbols))) + f.write("| " + " | ".join(row) + " |\n") + continue + + tu_size = data.get("tu_size", "N/A") + row.append(str(tu_size)) + if rev == baseline_rev: + row.append("-") + else: + base_tu = base.get("tu_size") if base else None + row.append(_delta_cell(tu_size, base_tu)) + + for sym in symbols: + sym_size = data["symbols"].get(sym, "N/A") + row.append(str(sym_size)) + if rev == baseline_rev: + row.append("-") + else: + base_sym = base["symbols"].get(sym) if base else None + row.append(_delta_cell(sym_size, base_sym)) + + f.write("| " + " | ".join(row) + " |\n") + f.write("\n") + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--revisions", + nargs="+", + default=["HEAD"], + help="Git revisions to profile; the first is the baseline.", + ) + parser.add_argument( + "--configs", + help="JSON file of compiler configurations (name -> flag string).", + ) + parser.add_argument( + "--out-dir", + default="profile_results", + help="Directory for the report and raw results.", + ) + args = parser.parse_args() + + repo_dir = os.getcwd() + if is_dirty(repo_dir): + print("Error: working tree is dirty. Please commit or stash changes.") + return 1 + + configs = DEFAULT_CONFIGS + if args.configs: + with open(args.configs) as f: + configs = json.load(f) + + original_rev = get_current_branch_or_commit(repo_dir) + print(f"Original revision: {original_rev}") + + results = {} # rev -> config -> target -> metrics + os.makedirs(args.out_dir, exist_ok=True) + tmp_run_dir = tempfile.mkdtemp(dir=args.out_dir) + + try: + for rev in args.revisions: + print(f"\n--- Processing revision: {rev} ---") + rev_sha = run_cmd(["git", "rev-parse", rev], cwd=repo_dir) + print(f"Resolved {rev} to {rev_sha}; checking out...") + run_cmd(["git", "checkout", rev_sha], cwd=repo_dir) + + print("Pulling forward benchmark files...") + for path in BENCHMARK_FILES: + try: + run_cmd(["git", "checkout", original_rev, "--", path], cwd=repo_dir) + except subprocess.CalledProcessError: + print(f" Warning: could not pull forward {path}") + + results[rev] = {} + for config_name, config_flags in configs.items(): + print(f" Running config {config_name} ({config_flags})...") + try: + results[rev][config_name] = run_bench_for_config( + repo_dir, config_flags, tmp_run_dir + ) + except Exception as e: # noqa: BLE001 - report and continue + print(f" Error running config {config_name}: {e}") + results[rev][config_name] = None + + run_cmd(["git", "reset", "--hard"], cwd=repo_dir) + finally: + print(f"\nRestoring original revision: {original_rev}...") + run_cmd(["git", "checkout", original_rev], cwd=repo_dir) + run_cmd(["git", "reset", "--hard"], cwd=repo_dir) + shutil.rmtree(tmp_run_dir, ignore_errors=True) + + with open(os.path.join(args.out_dir, "raw_results.json"), "w") as f: + json.dump(results, f, indent=2) + + generate_report(results, configs, args.out_dir) + print(f"\nProfile complete. Report written to {args.out_dir}/profile_report.md") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())