Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7d56bae
Extract trampoline and profiling code from interpreter-work
May 26, 2026
28c4215
Register main thread for profiling
May 27, 2026
f068249
Add AArch64 trampoline templates and demangle profiler symbols
May 27, 2026
32726f3
Validate return addresses against executable mappings in frame walker
May 27, 2026
7676237
Add ext:with-flame-profile macro
May 27, 2026
6de51ba
Export with-frame-profile from ext
May 27, 2026
39ef401
Merge branch 'main' into trampolines
drmeister May 27, 2026
2a2999c
Add ${PID} to CLASP_FLAME_PROFILE=path=something${PID}.svg
drmeister May 27, 2026
308f64b
Remove the strict test checking hashes
May 27, 2026
1ba1df2
Merge branch 'main' into trampolines
May 27, 2026
7e2bf43
flame graph now uses ${HOME}
Jun 1, 2026
84a927d
Picked up some changes to clasp_gc.sif - I'm not sure where
Jun 1, 2026
2a66fac
Added amber-x86.def
drmeister Jun 1, 2026
d95d632
Silence warnings that happen on __aarch64__
drmeister Jun 4, 2026
f8ad298
Added description of Phase 4 and 5
drmeister Jun 4, 2026
6ec8048
Use sampling_profiler_register_current_thread
drmeister Jun 4, 2026
354e362
Use Alex' code
drmeister Jun 4, 2026
607ded8
Provide a more detailed comment
drmeister Jun 4, 2026
b6ddf96
Remove dead code
drmeister Jun 4, 2026
1c7fc69
Update comments
drmeister Jun 13, 2026
35429fb
When trampolines aren't available use VM stack for nargs/args
drmeister Jun 13, 2026
49c8850
Merge branch 'main' into trampolines
drmeister Jun 13, 2026
42f87d0
Merge branch 'main' into trampolines
drmeister Jun 13, 2026
1b8df5a
Address PR request issues
drmeister Jun 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions apptainer/amber-x86.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
BootStrap: docker
From: ubuntu:26.04

%files
$HOME/Downloads/ambertools26.tar.bz2 /downloads/ambertools26.tar.bz2
$HOME/Downloads/rosetta.tar.bz2 /downloads/rosetta.tar.bz2


%post
set -eo pipefail
export DEBIAN_FRONTEND=noninteractive
export DEBCONF_NONINTERACTIVE_SEEN=true
export TZ=America/New_York
ln -snf /usr/share/zoneinfo/$TZ /etc/localtime
echo $TZ > /etc/timezone

apt upgrade
apt update

apt install -y nodejs npm git curl
npm install -g @anthropic-ai/claude-code
useradd -m user
export PATH="/root/.npm/_npx:$PATH"

apt -y install tcsh make \
gcc gfortran g++ \
flex bison patch bc \
libbz2-dev libzip-dev \
xorg-dev wget cmake \
mpich openssh-client linux-perf \
libtool-bin libvterm-dev bzip2 gdb gdbserver

cd /opt
bunzip2 -c /downloads/rosetta.tar.bz2 | tar xvf - --strip-components=1
bunzip2 -c /downloads/ambertools26.tar.bz2 | tar xvf -
mkdir -p /opt/ambertools26_src/build
cd /opt/ambertools26_src/build
mkdir /opt/ambertools26
ln -s /opt/ambertools26 /opt/amber
cmake /opt/ambertools26_src \
-DCMAKE_INSTALL_PREFIX=/opt/ambertools26 \
-DCOMPILER=GNU \
-DBUILD_GUI=FALSE \
-DMPI=FALSE -DCUDA=FALSE -DINSTALL_TESTS=FALSE \
-DDOWNLOAD_MINICONDA=FALSE \
-DBUILD_PYTHON=FALSE \
2>&1 | tee cmake.log
make -j20 install 2>&1 | tee /tmp/make.log



apt install --yes binutils libboost-all-dev clang-18 libclang-cpp18-dev libclang-18-dev libgmp-dev libfmt-dev libunwind-dev llvm-18 llvm-18-dev ninja-build sbcl jupyterlab emacs openssh-client openssh-server libnetcdf-dev expat gocryptfs nodejs npm strace emacs libelf-dev wget
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh
bash Miniconda3-latest-Linux-aarch64.sh -b -p /home/cando/miniconda3
rm -rf Miniconda3-latest-Linux-aarch64.sh
export PATH="/home/cando/miniconda3/bin:${PATH}"
conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main
conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
conda install -y -c conda-forge jupyterlab sidecar # ambertools not available
jupyter-lab build
# cd /mnt
# ./koga --reproducible-build --extensions=cando --build-mode=bytecode-faso --llvm-config="/usr/bin/llvm-config-18" --build-path=build-apptainer/
# ninja -C build-apptainer
# ninja -C build-apptainer install
# mkdir -p /home/cando/
# chmod ugo+rwx /home/cando/
# tar -xvf /mnt/systems.tar -C /home/cando
# whoami
# ls /home/
# chmod -R ugo+rw /home/cando/

%environment
export AMBERHOME=/opt/amber
export ROSETTA_HOME=/opt/rosetta
export PATH=${AMBERHOME}/bin:/root/.npm/_npx:${PATH}
export LD_LIBRARY_PATH=${AMBERHOME}/lib:${LD_LIBRARY_PATH}
export ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}

# export LD_LIBRARY_PATH=/usr/local/lib
# export XDG_CACHE_HOME=/home/cando/.cache

11 changes: 11 additions & 0 deletions include/clasp/core/clasp_gmpxx.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ THE SOFTWARE.

/* Define a C++ GMP wrapper */

/* One one of the clusters I was building on (I'm going to guess the arm64)
I got a lot of these warnings so I'm adding this #pragma to silence them
*/
#ifdef __aarch64__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-literal-operator"
Comment thread
Bike marked this conversation as resolved.
#endif
#include <gmpxx.h>

#ifdef __arch64__
#pragma clang diagnostic pop
#endif

typedef mpz_class Bignum;
1 change: 1 addition & 0 deletions include/clasp/core/commandLineOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ struct CommandLineOptions {
bool _NoRc;
bool _PauseForDebugger;
bool _GenerateTrampolines;
std::vector<std::string> _ExtensionArguments;

bool validStartupTypeOption(const std::string& arg);
void printVersion();
Expand Down
4 changes: 4 additions & 0 deletions include/clasp/core/lisp.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,10 @@ class Lisp {
std::atomic<T_sp> _AllObjectFiles;
std::atomic<T_sp> _AllCodeBlocks;
std::atomic<T_sp> _AllBytecodeModules;
// Every GFBytecodeSimpleFun ever made (atomic-pushed list of cons cells).
// Walked by arena_post_load_regenerate_trampolines after a snapshot load
// so the dispatch trampoline for each generic function gets re-attached.
std::atomic<T_sp> _AllGFBytecodeFuns;
SimpleFun_sp _UnboundCellFunctionEntryPoint;
T_sp _TerminalIO;
List_sp _ActiveThreads;
Expand Down
96 changes: 96 additions & 0 deletions include/clasp/core/sampling_profiler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* sampling_profiler.h — CPU-time sampling profiler.
*
* At rate `N` Hz, an ITIMER_PROF timer delivers SIGPROF to an arbitrary
* running thread. The handler walks the frame-pointer chain via the
* ucontext registers and appends a sample (timestamp, thread id, depth,
* optional bytecode-VM pc, variable-length PC array) to a per-process
* bump-allocated ring.
*
* Separate from src/core/profiler.cc's RangePush/RangePop instrumentation.
* That profiler measures user-annotated regions; this one periodically
* snapshots whatever code is running.
*
* See Phase 4 / Phase 5 for post-mortem symbolication and flame-graph
Comment thread
drmeister marked this conversation as resolved.
* output — this header covers the recording side only.
* Phase 4 is Symbolication
* Phase 5 is collapsed-stacks aggregation - see sampling_profiler.cc
*/
#pragma once

#include <cstdint>
#include <cstddef>
#include <string>
#include <vector>

namespace core {

// Per-sample header (variable-length record). A SampleHeader is followed
// immediately in the ring buffer by `depth` × uint64_t native PCs.
struct SampleHeader {
uint64_t timestamp_ns; // CLOCK_MONOTONIC at signal delivery
uint64_t vm_pc; // bytecode VM's _pc at sample time, or 0
uint32_t thread_id; // Linux tid / macOS port id (truncated)
uint32_t depth; // number of trailing PCs (0 if walk failed)
};

// Aggregated symbolicated sample: one entry per unique (thread_id, frames)
// group. `frames` is outermost-first (index 0 is the root, last is the
// leaf). `sample_count` is the number of raw samples that collapsed into
// this entry.
struct SymbolicatedSample {
uint32_t thread_id;
size_t sample_count;
std::vector<std::string> frames;
core::T_sp encode();
};

// Start the profiler.
// rate_hz : sampling rate in Hz (e.g. 97). Clamped to [1, 10000].
// max_depth : per-sample stack-depth cap. Clamped to [1, 8192].
// buffer_bytes : ring buffer size (0 = default 256 MiB).
// Returns true on success. Fails if the profiler is already running or the
// OS timer/signal setup fails.
bool sampling_profiler_start(unsigned rate_hz,
unsigned max_depth,
size_t buffer_bytes);

// Stop sampling. The buffer is preserved; call
// sampling_profiler_save / sampling_profiler_reset to drain / clear.
void sampling_profiler_stop();

// True while a profile session is active.
bool sampling_profiler_running();

// Discard all captured samples and reset the bump pointer.
void sampling_profiler_reset();

// Drop the ring buffer contents to `path` as collapsed-stacks format
// (one stack per line, semicolon-separated, trailing ' <count>'), ready
// to feed Brendan Gregg's flamegraph.pl. Symbolicates on the fly using
// the arena side table, ObjectFile lookup, bytecode-module scan, and
// dladdr. Returns true on success, false on I/O error.
bool sampling_profiler_save(const char* path);

// Return one entry per recorded sample. Each inner vector holds the
// symbolicated frame names for that sample, outermost-first (index 0
// is the root, last index is the leaf). Prints a warning and returns
// an empty vector if the profiler is still running.
std::vector<SymbolicatedSample> sampling_profiler_symbolicated_samples();

// Populate the calling thread's stack bounds for later frame-walking.
// Must be called from a non-signal context.
void sampling_profiler_register_current_thread();

// Register an executable memory range with the profiler's return-address
// validator. Call this when new executable pages are allocated (JIT, arena)
// so the frame-pointer walker recognizes return addresses in them.
// Lock-free, safe to call from any thread while the profiler is running.
void sampling_profiler_add_executable_range(uintptr_t lo, uintptr_t hi);

// Diagnostics.
size_t sampling_profiler_samples_recorded();
size_t sampling_profiler_samples_dropped();
size_t sampling_profiler_bytes_used();

} // namespace core
7 changes: 7 additions & 0 deletions include/clasp/llvmo/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ class ObjectFile_O : public LibraryBase_O {
size_t _Size;
size_t _ObjectId;
JITDylib_sp _TheJITDylib;
// If true, this ObjectFile is transient arena-init scaffolding (shared
// trampoline / stub template) that must not be serialized into snapshots.
// The ObjectFile is still registered in _AllObjectFiles normally — LLVM's
// link layer plugin looks it up by name during materialization, so it must
// stay findable at runtime. The snapshot save walker checks this flag and
// skips any ObjectFile with it set.
bool _TransientSkipSnapshot = false;
//
// Code data
void* _TextSectionStart;
Expand Down
1 change: 1 addition & 0 deletions include/clasp/llvmo/jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ THE SOFTWARE.
#include <clasp/llvmo/debugInfoExpose.fwd.h>
#include <clasp/llvmo/translators.h>
#include <clasp/llvmo/insertPoint.fwd.h>
#include <clasp/llvmo/llvmoExpose.h>
#include <clasp/llvmo/debugLoc.fwd.h>
#include <clasp/llvmo/llvmoPackage.h>

Expand Down
5 changes: 5 additions & 0 deletions include/clasp/llvmo/llvmoExpose.h
Original file line number Diff line number Diff line change
Expand Up @@ -3736,4 +3736,9 @@ llvm::raw_pwrite_stream* llvm_stream(core::T_sp stream, llvm::SmallString<1024>&

core::T_sp llvm_sys__lookup_jit_symbol_info(void* ptr);

JITDylib_sp loadModule(Module_sp module, size_t startupID, const std::string& libname);




}; // namespace llvmo
3 changes: 1 addition & 2 deletions include/clasp/llvmo/llvmoPackage.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,8 @@ bool llvm_sys__load_bc(core::Pathname_sp filename, bool verbose, bool print, cor

ClaspJIT_sp llvm_sys__clasp_jit();


void initialize_llvm();
void initialize_ClaspJIT();

core::Pointer_mv cmp__compile_trampoline(core::T_sp name);

}; // namespace llvmo
43 changes: 43 additions & 0 deletions include/clasp/llvmo/trampolineWork.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#pragma once

/*
File: trampolineWork.h
*/

/*
Copyright (c) 2014, Christian E. Schafmeister

CLASP is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.

See directory 'clasp/licenses' for full details.

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* -^- */

#include <clasp/core/common.h>

namespace llvmo {


core::Pointer_mv cmp__compile_trampoline(core::T_sp name);

// Per-generic-function trampoline. Returns the address of an arena slot that
// tail-calls GFBytecodeEntryPoint::entry_point_n. Each GF gets a unique
// address so flame charts and backtraces show its name instead of all GFs
// sharing the static entry_point_n symbol.
core::Pointer_sp cmp__compile_gf_trampoline(core::T_sp name);

}; // namespace llvmo
Loading
Loading