Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
fadf033
Fix the inverted logic error
Apr 18, 2026
f20deb4
Add comments that we don't have overflow check for array access
Apr 18, 2026
2e2fcf4
Add some ASSERTs
Apr 18, 2026
1fc8b00
Add an ASSERT
Apr 18, 2026
8661add
Fix a multithreaded printing bug #1763
Apr 18, 2026
1e5e657
Fix typo
Apr 18, 2026
d8ebc0c
Implement computed gotos
Apr 18, 2026
de3d2de
Added a dynamic binding stack
Apr 18, 2026
ab19281
About to try protect/clean
Apr 18, 2026
8fa3db8
Build worked - moving on to catch/throw/catch_close
Apr 18, 2026
0582374
Added last changes
Apr 18, 2026
23c7b9f
Implemented area trampolines
Apr 19, 2026
3f2607b
backtraces use trampoline info
Apr 19, 2026
b2cfffb
Added missing file
Apr 19, 2026
2644a09
Set up to add trampolines to generic function dispatchers
Apr 20, 2026
f879ad4
Merge branch 'main' into interpreter-work
Apr 20, 2026
7a97c94
Switch from stub/shared trampoline to single trampoline
Apr 20, 2026
4b8516e
Removed legacy trampoline code
Apr 20, 2026
256c7e7
Finishing touches on trampolines
Apr 21, 2026
04b8010
Make trampolines less brittle
Apr 21, 2026
07a709c
Added sampling_profiler source files
Apr 21, 2026
89d8bfc
Improve the sampling_profiler and add flamegraph.lisp
Apr 22, 2026
e8b284e
Add --ext: command line argument extension
Apr 22, 2026
44d0a2d
Fix repos.sexp for closer-mop - it moved
Apr 22, 2026
47dceb4
Add command-line-extensions code
Apr 25, 2026
20bd0d9
Make icicle charts the default
Apr 25, 2026
668dfd0
Restore backtrace info for bytecode
Apr 26, 2026
14f1f5b
Fix a problem on macos with #define _XOPEN_SOURCE 600
drmeister Apr 26, 2026
1d40718
Add repos for cando
drmeister Apr 28, 2026
9e3952f
Silence warnings on macOS
drmeister Apr 28, 2026
995e79e
Improved clasp-analyzer.lisp and fixed some classes
May 2, 2026
c5f12cd
Static analyzer output is now s-expressions
May 6, 2026
b66bb92
Added flexi-streams to repos.sexp
May 10, 2026
23efb34
Change build system to build .dif files and merge them
May 10, 2026
0c9e162
Merged main and analysis-merge
May 10, 2026
612df9e
Set CLASP_ENABLE_TRAMPOLINE to turn on trampolines
May 10, 2026
0a648d6
Optionally turn on trampolines
May 19, 2026
d92d384
Print FLAME_GRAPH_HOME env
May 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 38 additions & 8 deletions analyze
Original file line number Diff line number Diff line change
@@ -1,17 +1,47 @@
#!/usr/bin/env bash
set -euo pipefail

# Separate --jobs from passthrough koga args, and capture any positional
# extension names (./analyze cando seqan-clasp).
NINJA_OPTIONS=""

for i in "$@"; do
if [[ $i == --jobs=* ]]; then
NINJA_OPTIONS="-j ${i#*=}"
KOGA_ARGS=()
SELECTED_EXTS=()
for arg in "$@"; do
if [[ $arg == --jobs=* ]]; then
NINJA_OPTIONS="-j ${arg#*=}"
elif [[ $arg == -* ]]; then
KOGA_ARGS+=("$arg")
else
SELECTED_EXTS+=("$arg")
fi
done

./koga --build-mode=bytecode --build-path=build-analysis --clean --no-extensions "$@"
ninja -C build-analysis $NINJA_OPTIONS analyze
# Discover extensions: every extensions/*/ that contains a src/.
discover_extensions() {
local d
for d in extensions/*/; do
if [[ -d "${d}src" ]]; then
basename "$d"
fi
done
}

if [[ ${#SELECTED_EXTS[@]} -eq 0 ]]; then
EXTS=( $(discover_extensions) )
RUN_BASE=1
else
EXTS=( "${SELECTED_EXTS[@]}" )
RUN_BASE=0 # selective re-run: skip base, diff against committed base
fi

./koga --build-mode=bytecode --build-path=build-analysis --clean --extensions=cando "$@"
ninja -C build-analysis $NINJA_OPTIONS analyze
if [[ $RUN_BASE -eq 1 ]]; then
./koga --build-mode=bytecode --build-path=build-analysis --clean --no-extensions "${KOGA_ARGS[@]}"
ninja -C build-analysis $NINJA_OPTIONS analyze
fi

for ext in "${EXTS[@]}"; do
./koga --build-mode=bytecode --build-path=build-analysis --clean --extensions="$ext" "${KOGA_ARGS[@]}"
ninja -C build-analysis $NINJA_OPTIONS analyze
done

rm -rf build-analysis
9 changes: 8 additions & 1 deletion include/clasp/core/bytecode.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

namespace core {
class Bytecode_O;
struct VMDynRecord; // defined in clasp/gctools/threadlocal.h
};

template <> struct gctools::GCInfo<core::Bytecode_O> {
Expand Down Expand Up @@ -326,19 +327,25 @@ class VMFrameDynEnv_O : public DynEnv_O {
LISP_CLASS(core, CorePkg, VMFrameDynEnv_O, "VMFrameDynEnv", DynEnv_O);

public:
VMFrameDynEnv_O(T_O** a_old_sp, T_O** a_old_fp) : old_sp(a_old_sp), old_fp(a_old_fp) {}
VMFrameDynEnv_O(T_O** a_old_sp, T_O** a_old_fp, VMDynRecord* a_old_dyn_top)
: old_sp(a_old_sp), old_fp(a_old_fp), old_dyn_top(a_old_dyn_top) {}
// Slightly sketchy: We use the destructor to reset the stack pointer,
// so that C++ unwinds are also affected by this dynenv.
// This means VMFrames must be stack allocated.
// old_dyn_top is the _dynRecordTop mark saved when this bytecode_call was
// entered. If an SJLJ longjmp bypasses this frame, proceed() restores it so
// the VM dynenv-record stack does not keep stale records across activations.
~VMFrameDynEnv_O() {
VirtualMachine& vm = my_thread->_VM;
vm._stackPointer = this->old_sp;
vm._framePointer = this->old_fp;
vm._dynRecordTop = this->old_dyn_top;
}

public:
T_O** old_sp;
T_O** old_fp;
VMDynRecord* old_dyn_top;

public:
virtual SearchStatus search() const { return Continue; }
Expand Down
3 changes: 3 additions & 0 deletions include/clasp/core/clasp_gmpxx.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ THE SOFTWARE.

/* Define a C++ GMP wrapper */

#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-literal-operator"
#include <gmpxx.h>
#pragma clang diagnostic pop

typedef mpz_class Bignum;
1 change: 1 addition & 0 deletions include/clasp/core/commandLineOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ struct CommandLineOptions {
bool _NoRc;
bool _PauseForDebugger;
bool _GenerateTrampolines;
std::vector<std::string> _ExtensionArguments;

bool validStartupTypeOption(const std::string& arg);
void printVersion();
Expand Down
15 changes: 14 additions & 1 deletion include/clasp/core/function.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ namespace core {
// check out bytecode simple funs).
FORWARD(SimpleFun);
class SimpleFun_O : public Function_O {
LISP_CLASS(core, CorePkg, SimpleFun_O, "SimpleFun", Function_O);
LISP_ABSTRACT_CLASS(core, CorePkg, SimpleFun_O, "SimpleFun", Function_O);

public:
FunctionDescription_sp _FunctionDescription;
Expand Down Expand Up @@ -243,6 +243,19 @@ class SimpleFun_O : public Function_O {
}
};

// add to function.h, near the other SimpleFun_O subclasses
class XepSimpleFun_O : public SimpleFun_O {
LISP_CLASS(core, CorePkg, XepSimpleFun_O, "XepSimpleFun", SimpleFun_O);

public:
XepSimpleFun_O(FunctionDescription_sp fdesc, T_sp code, const ClaspXepTemplate& xep)
: SimpleFun_O(fdesc, code, xep) {}

virtual size_t templatedSizeof() const override { return sizeof(*this); }
};



// Now that SimpleFun exists we can define these.
template <typename... Ts>
LCC_RETURN Function_O::funcall(Ts... args) {
Expand Down
10 changes: 9 additions & 1 deletion include/clasp/core/iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,17 @@ namespace core {

SMART(Iterator);
class Iterator_O : public General_O {
LISP_CLASS(core, CorePkg, Iterator_O, "Iterator", General_O);
LISP_ABSTRACT_CLASS(core, CorePkg, Iterator_O, "Iterator", General_O);

public:
// Untagged C++ subclasses (e.g. clbind::Iterator<X>) don't have their own
// LISP_CLASS-generated __class() override. Without this, virtual dispatch
// walks past us to General_O::__class() and instances report General_O as
// their class — breaking single-dispatch on iterator= and similar.
virtual core::Instance_sp __class() const override {
return core::lisp_getStaticClass(Iterator_O::static_ValueStampWtagMtag);
}

void initialize() override;

private:
Expand Down
4 changes: 4 additions & 0 deletions include/clasp/core/lisp.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,10 @@ class Lisp {
std::atomic<T_sp> _AllObjectFiles;
std::atomic<T_sp> _AllCodeBlocks;
std::atomic<T_sp> _AllBytecodeModules;
// Every GFBytecodeSimpleFun ever made (atomic-pushed list of cons cells).

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"list of cons cells" indicates that this is a list whose elements are conses, but actually they are GFBytecodeSimpleFuns as suggested by the name.

// Walked by arena_post_load_regenerate_trampolines after a snapshot load
// so the dispatch trampoline for each generic function gets re-attached.
std::atomic<T_sp> _AllGFBytecodeFuns;

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe should be a List_sp

SimpleFun_sp _UnboundCellFunctionEntryPoint;
T_sp _TerminalIO;
List_sp _ActiveThreads;
Expand Down
91 changes: 91 additions & 0 deletions include/clasp/core/sampling_profiler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* sampling_profiler.h — CPU-time sampling profiler.
*
* At rate `N` Hz, an ITIMER_PROF timer delivers SIGPROF to an arbitrary
* running thread. The handler walks the frame-pointer chain via the
* ucontext registers and appends a sample (timestamp, thread id, depth,
* optional bytecode-VM pc, variable-length PC array) to a per-process
* bump-allocated ring.
*
* Separate from src/core/profiler.cc's RangePush/RangePop instrumentation.
* That profiler measures user-annotated regions; this one periodically
* snapshots whatever code is running.
*
* See Phase 4 / Phase 5 for post-mortem symbolication and flame-graph
* output — this header covers the recording side only.
*/
#pragma once

#include <cstdint>
#include <cstddef>
#include <string>
#include <vector>

namespace core {

// Per-sample header (variable-length record). A SampleHeader is followed
// immediately in the ring buffer by `depth` × uint64_t native PCs.
struct SampleHeader {
uint64_t timestamp_ns; // CLOCK_MONOTONIC at signal delivery
uint64_t vm_pc; // bytecode VM's _pc at sample time, or 0
uint32_t thread_id; // Linux tid / macOS port id (truncated)
uint32_t depth; // number of trailing PCs (0 if walk failed)
};

// Aggregated symbolicated sample: one entry per unique (thread_id, frames)
// group. `frames` is outermost-first (index 0 is the root, last is the
// leaf). `sample_count` is the number of raw samples that collapsed into
// this entry.
struct SymbolicatedSample {
uint32_t thread_id;
size_t sample_count;
std::vector<std::string> frames;
core::T_sp encode();
};

// Start the profiler.
// rate_hz : sampling rate in Hz (e.g. 97). Clamped to [1, 10000].
// max_depth : per-sample stack-depth cap. Clamped to [1, 8192].
// buffer_bytes : ring buffer size (0 = default 256 MiB).

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could just have the default be the actual number for 256 MiB rather than giving 0 a special meaning

// Returns true on success. Fails if the profiler is already running or the
// OS timer/signal setup fails.
bool sampling_profiler_start(unsigned rate_hz,
unsigned max_depth,
size_t buffer_bytes);

// Stop sampling. The buffer is preserved; call
// sampling_profiler_save / sampling_profiler_reset to drain / clear.
void sampling_profiler_stop();

// True while a profile session is active.
bool sampling_profiler_running();

// Discard all captured samples and reset the bump pointer.
void sampling_profiler_reset();

// Drop the ring buffer contents to `path` as collapsed-stacks format
// (one stack per line, semicolon-separated, trailing ' <count>'), ready
// to feed Brendan Gregg's flamegraph.pl. Symbolicates on the fly using
// the arena side table, ObjectFile lookup, bytecode-module scan, and
// dladdr. Returns true on success, false on I/O error.
bool sampling_profiler_save(const char* path);

// Return one entry per recorded sample. Each inner vector holds the
// symbolicated frame names for that sample, outermost-first (index 0
// is the root, last index is the leaf). Prints a warning and returns
// an empty vector if the profiler is still running.
std::vector<SymbolicatedSample> sampling_profiler_symbolicated_samples();

// Populate the calling thread's stack bounds for later frame-walking.
// Must be called from a non-signal context. sampling_profiler_start
// calls this automatically for the calling thread; other threads that
// should be fully profiled need to call ext:profile-register-thread
// (or this function) themselves once before being sampled.
void sampling_profiler_register_current_thread();

// Diagnostics.
size_t sampling_profiler_samples_recorded();
size_t sampling_profiler_samples_dropped();
size_t sampling_profiler_bytes_used();

} // namespace core
1 change: 1 addition & 0 deletions include/clasp/core/unwind.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class DynEnv_O : public General_O {
* C++ code with unknown dynamic environment, e.g. because nontrivial
* destructors need to be run, there are catch blocks, or we simply
* don't know. */
FORWARD(UnknownDynEnv);
class UnknownDynEnv_O : public DynEnv_O {
LISP_CLASS(core, CorePkg, UnknownDynEnv_O, "UnknownDynEnv", DynEnv_O);

Expand Down
13 changes: 12 additions & 1 deletion include/clasp/core/wrappedPointer.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,20 @@ namespace core {
SMART(WrappedPointer);
class WrappedPointer_O : public core::General_O {
FRIEND_GC_SCANNER(core::WrappedPointer_O);
LISP_CLASS(core, CorePkg, WrappedPointer_O, "WrappedPointer", core::General_O);
LISP_ABSTRACT_CLASS(core, CorePkg, WrappedPointer_O, "WrappedPointer", core::General_O);

public:
// Untagged C++ subclasses (e.g. clbind::Wrapper<OT, HolderType>) don't
// have their own LISP_CLASS-generated __class() override. Without this,
// virtual dispatch walks past us to General_O::__class() and instances
// report General_O as their class — breaking single-dispatch on methods
// that specialize on WrappedPointer. _instanceClass() below returns
// Class_ for tagged subclasses; this provides the fallback for untagged
// templated subclasses that haven't set Class_.
virtual core::Instance_sp __class() const override {
return core::lisp_getStaticClass(WrappedPointer_O::static_ValueStampWtagMtag);
}

gctools::ShiftedStamp ShiftedStamp_;
core::Instance_sp Class_;

Expand Down
36 changes: 36 additions & 0 deletions include/clasp/gctools/threadlocal.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,39 @@ extern int global_debug_virtual_machine;
#define VM_RESET_COUNTERS(vm)
#endif

// ---------- Dynamic-environment records for the bytecode interpreter ----------
// The bytecode VM establishes dynamic environments (tagbody, catch,
// special-bind, progv, unwind-protect) by pushing records onto a side stack
// instead of recursing into bytecode_vm. The entering opcodes push; the
// matching exit opcodes pop; an outer try/catch(Unwind&) in bytecode_vm walks
// the stack to run cleanups / resume at a saved pc on non-local exits.
//
// Currently only the type and the stack exist — no opcodes are migrated yet.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This entire parallel structure of dynenvs is pointless duplication. Even if we do want to keep the parallel stack of dynenvs, which I don't think we do, the kind, frame, slots, and all are apparent from the existing dynenv classes (TagbodyDynEnv_O etc.). A separate enum for what are types, padding bytes (??), slots that are punned, this is all really unacceptable. Only the bytecode specific bits like the sp mark and target pc should really need recording anyway, and we could probably just add them to the dynenv classes or make new ones.

Adding to dynenv classes might also remove the need for VMFrameDynEnv which would be good.

enum class VMDynKind : uint8_t {
Tagbody = 1, // from `entry` opcode (not yet migrated)
Catch, // from `catch_8/16` (not yet migrated)
SpecialBind, // from `special_bind` (one per bound cell)
Progv, // from `progv` (one record covers N bindings)
UnwindProtect, // from `protect`
};

struct VMDynRecord {
VMDynKind kind;
uint8_t _pad[7];
void* frame; // __builtin_frame_address at establishment
core::T_O* slot0; // kind-specific GC-managed: tag / cell / cleanup closure
core::T_O* slot1; // kind-specific GC-managed: old binding value
core::T_O** sp_mark; // stack pointer at establishment
core::T_O** fp_mark; // frame pointer at establishment
unsigned char* target_pc; // resume pc (Tagbody/Catch)
core::T_O* dynenv_mark; // saved head of my_thread->dynEnvStackGet()
};

struct VirtualMachine {
// Stack size is kind of arbitrary, and really we should make it
// grow and etc.
static constexpr size_t MaxStackWords = 65536;
static constexpr size_t MaxDynRecords = 4096;
bool _Running;
core::T_O** _stackBottom = nullptr;
size_t _stackBytes;
Expand All @@ -101,6 +130,13 @@ struct VirtualMachine {
core::T_O** _literals;
unsigned char* _pc;

// Dynamic-environment record stack. Root-allocated so GC scans the
// T_O*/T_O** slots conservatively. _dynRecordTop points one past the last

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All of the slots in the parallel dynenv structures already exist in the dynenv classes, which are already reachable from the control stack or from the thread local state, so GC should not be a concern here

// live record, so an empty stack has _dynRecordTop == _dynRecordBottom.
VMDynRecord* _dynRecordBottom = nullptr;
VMDynRecord* _dynRecordLimit = nullptr;
VMDynRecord* _dynRecordTop = nullptr;

void error();

void enable_guards();
Expand Down
7 changes: 7 additions & 0 deletions include/clasp/llvmo/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ class ObjectFile_O : public LibraryBase_O {
size_t _Size;
size_t _ObjectId;
JITDylib_sp _TheJITDylib;
// If true, this ObjectFile is transient arena-init scaffolding (shared
// trampoline / stub template) that must not be serialized into snapshots.
// The ObjectFile is still registered in _AllObjectFiles normally — LLVM's
// link layer plugin looks it up by name during materialization, so it must

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our link layer plugin looks it up by name during materialization, not LLVM. And it does so for reasons trampolines don't need - so that we can look up DWARF from an instruction pointer. Since trampolines are for bytecode functions, we can use the bytecode debug info mechanisms instead of using DWARF at all. So I don't think trampolines need to go in _AllObjectFiles.

EDIT: Okay, so actually these changes use the trampoline to get closure etc, for some reason, so we do need DWARF. However to figure out if a PC is in an arena we can use arena_lookup_by_pc so we're still not going through object files.

// stay findable at runtime. The snapshot save walker checks this flag and
// skips any ObjectFile with it set.
bool _TransientSkipSnapshot = false;
//
// Code data
void* _TextSectionStart;
Expand Down
1 change: 1 addition & 0 deletions include/clasp/llvmo/jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ THE SOFTWARE.
#include <clasp/llvmo/debugInfoExpose.fwd.h>
#include <clasp/llvmo/translators.h>
#include <clasp/llvmo/insertPoint.fwd.h>
#include <clasp/llvmo/llvmoExpose.h>
#include <clasp/llvmo/debugLoc.fwd.h>
#include <clasp/llvmo/llvmoPackage.h>

Expand Down
2 changes: 0 additions & 2 deletions include/clasp/llvmo/llvmoPackage.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,4 @@ ClaspJIT_sp llvm_sys__clasp_jit();
void initialize_llvm();
void initialize_ClaspJIT();

core::Pointer_mv cmp__compile_trampoline(core::T_sp name);

}; // namespace llvmo
Loading
Loading