From 06caa59cc3b2787ade201a5ecbe3339930e85105 Mon Sep 17 00:00:00 2001
From: Matt Valentine-House <matt@eightbitraptor.com>
Date: Thu, 7 May 2026 14:24:58 +0100
Subject: [PATCH 01/12] [ruby/mmtk] Introduce support for ractor_belonging.

This is a debug mode in Ruby where an extra word is used after each
object to store the address of the Ractor that owns the object, used for
debug purposes only.

While we're working on Ractors, we also need to be able to test with
MMTk enabled, so we should introduce support for this to the MMTk
binding as well.

As implemented we'll default the binding options to have everything
disabled and hardcoded to 0, as was always the case, but if
RACTOR_CHECK_MODE is enabled, we'll build and pass a valid RubyBinding
object to MMTk.

https://github.com/ruby/mmtk/commit/83cb291313
---
 gc/mmtk/mmtk.c     | 31 ++++++++++++++++++++++++++++---
 gc/mmtk/mmtk.h     |  2 +-
 gc/mmtk/src/api.rs | 12 ++++++++----
 3 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/gc/mmtk/mmtk.c b/gc/mmtk/mmtk.c
index e4cd71925c7ae6..9b1aed4e5bd1f9 100644
--- a/gc/mmtk/mmtk.c
+++ b/gc/mmtk/mmtk.c
@@ -16,6 +16,22 @@
 #include <sys/sysctl.h>
 #endif
 
+#ifndef VM_CHECK_MODE
+# define VM_CHECK_MODE RUBY_DEBUG
+#endif
+
+// From ractor_core.h
+#ifndef RACTOR_CHECK_MODE
+# define RACTOR_CHECK_MODE (VM_CHECK_MODE || RUBY_DEBUG) && (SIZEOF_UINT64_T == SIZEOF_VALUE)
+#endif
+
+#if RACTOR_CHECK_MODE
+# define RVALUE_SUFFIX_SIZE sizeof(VALUE)
+void rb_ractor_setup_belonging(VALUE obj);
+#else
+# define RVALUE_SUFFIX_SIZE 0
+#endif
+
 struct objspace {
     bool measure_gc_time;
     bool gc_stress;
@@ -557,7 +573,11 @@ void *
 rb_gc_impl_objspace_alloc(void)
 {
     MMTk_Builder *builder = rb_mmtk_builder_init();
-    mmtk_init_binding(builder, NULL, &ruby_upcalls);
+    MMTk_RubyBindingOptions binding_options = {
+        .ractor_check_mode = RACTOR_CHECK_MODE != 0,
+        .suffix_size = RVALUE_SUFFIX_SIZE,
+    };
+    mmtk_init_binding(builder, &binding_options, &ruby_upcalls);
 
     return calloc(1, sizeof(struct objspace));
 }
@@ -885,7 +905,8 @@ rb_gc_impl_new_obj(void *objspace_ptr, void *cache_ptr, VALUE klass, VALUE flags
         mmtk_handle_user_collection_request(ractor_cache, false, false);
     }
 
-    alloc_size += sizeof(VALUE);
+    // Layout: [hidden size header (sizeof(VALUE))][payload (alloc_size)][suffix (RVALUE_SUFFIX_SIZE)]
+    alloc_size += sizeof(VALUE) + RVALUE_SUFFIX_SIZE;
 
     VALUE *alloc_obj = (VALUE *)rb_mmtk_alloc_fast_path(objspace, ractor_cache, alloc_size);
     if (!alloc_obj) {
@@ -893,7 +914,7 @@ rb_gc_impl_new_obj(void *objspace_ptr, void *cache_ptr, VALUE klass, VALUE flags
     }
 
     alloc_obj++;
-    alloc_obj[-1] = alloc_size - sizeof(VALUE);
+    alloc_obj[-1] = alloc_size - sizeof(VALUE) - RVALUE_SUFFIX_SIZE;
     alloc_obj[0] = flags;
     alloc_obj[1] = klass;
 
@@ -905,6 +926,10 @@ rb_gc_impl_new_obj(void *objspace_ptr, void *cache_ptr, VALUE klass, VALUE flags
 
     objspace->total_allocated_objects++;
 
+#if RACTOR_CHECK_MODE
+    rb_ractor_setup_belonging((VALUE)alloc_obj);
+#endif
+
     return (VALUE)alloc_obj;
 }
 
diff --git a/gc/mmtk/mmtk.h b/gc/mmtk/mmtk.h
index ee338c87efe15e..e8f95920ddcaf5 100644
--- a/gc/mmtk/mmtk.h
+++ b/gc/mmtk/mmtk.h
@@ -95,7 +95,7 @@ bool mmtk_is_reachable(MMTk_ObjectReference object);
 MMTk_Builder *mmtk_builder_default(void);
 
 void mmtk_init_binding(MMTk_Builder *builder,
-                       const struct MMTk_RubyBindingOptions *_binding_options,
+                       const struct MMTk_RubyBindingOptions *binding_options,
                        const struct MMTk_RubyUpcalls *upcalls);
 
 void mmtk_initialize_collection(MMTk_VMThread tls);
diff --git a/gc/mmtk/src/api.rs b/gc/mmtk/src/api.rs
index b9797f6fe2df6f..0c73cd74ebc345 100644
--- a/gc/mmtk/src/api.rs
+++ b/gc/mmtk/src/api.rs
@@ -181,7 +181,7 @@ pub extern "C" fn mmtk_builder_default() -> *mut MMTKBuilder {
 #[no_mangle]
 pub unsafe extern "C" fn mmtk_init_binding(
     builder: *mut MMTKBuilder,
-    _binding_options: *const RubyBindingOptions,
+    binding_options: *const RubyBindingOptions,
     upcalls: *const RubyUpcalls,
 ) {
     crate::MUTATOR_THREAD_PANIC_HANDLER
@@ -191,9 +191,13 @@ pub unsafe extern "C" fn mmtk_init_binding(
     crate::set_panic_hook();
 
     let builder: Box<MMTKBuilder> = unsafe { Box::from_raw(builder) };
-    let binding_options = RubyBindingOptions {
-        ractor_check_mode: false,
-        suffix_size: 0,
+    let binding_options = if binding_options.is_null() {
+        RubyBindingOptions {
+            ractor_check_mode: false,
+            suffix_size: 0,
+        }
+    } else {
+        unsafe { (*binding_options).clone() }
     };
     let mmtk_boxed = mmtk_init(&builder);
     let mmtk_static = Box::leak(Box::new(mmtk_boxed));

From 33744d25cfdd056552ab9a464ac250d56dfaaf2c Mon Sep 17 00:00:00 2001
From: Matt Valentine-House <matt@eightbitraptor.com>
Date: Thu, 7 May 2026 15:19:34 +0100
Subject: [PATCH 02/12] [ruby/mmtk] Remove unnecessary null check.

the only caller of this unconditionally constructs a binding options
object now, So actually this is dead code

https://github.com/ruby/mmtk/commit/d832004e89
---
 gc/mmtk/src/api.rs | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/gc/mmtk/src/api.rs b/gc/mmtk/src/api.rs
index 0c73cd74ebc345..1519d2b6237761 100644
--- a/gc/mmtk/src/api.rs
+++ b/gc/mmtk/src/api.rs
@@ -191,14 +191,7 @@ pub unsafe extern "C" fn mmtk_init_binding(
     crate::set_panic_hook();
 
     let builder: Box<MMTKBuilder> = unsafe { Box::from_raw(builder) };
-    let binding_options = if binding_options.is_null() {
-        RubyBindingOptions {
-            ractor_check_mode: false,
-            suffix_size: 0,
-        }
-    } else {
-        unsafe { (*binding_options).clone() }
-    };
+    let binding_options = unsafe { (*binding_options).clone() };
     let mmtk_boxed = mmtk_init(&builder);
     let mmtk_static = Box::leak(Box::new(mmtk_boxed));
 

From 4a0072d5f29befde814ea0d9a83c711e1f049564 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Chris=20Hasi=C5=84ski?= <krzysztof.hasinski@gmail.com>
Date: Fri, 8 May 2026 19:10:12 +0200
Subject: [PATCH 03/12] Speed up Integer#to_s with a two digit lookup table
 (#16719)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* numeric: emit two decimal digits per iteration in rb_fix2str

Replace the digit-at-a-time loop in rb_fix2str with the standard
itoa 2-digit lookup table for base 10.  Each iteration now
writes two digits using a single (u % 100, u / 100) pair, so the
number of loop iterations is halved for multi-digit integers.
The classic per-digit loop is kept for non-base-10 conversion.

Benchmark (Apple M-series, 5M-10M ops, best of 3 runs):

  case            base       patch      delta
  ---------       -----      -----      -----
  1-digit   (5)   64 ns/op   64 ns/op    -0%
  2-digit   (42)  64 ns/op   65 ns/op    +2%  (noise)
  3-digit   (400) 66 ns/op   64 ns/op    -3%
  5-digit   (12345)          69 ns/op   67 ns/op    -3%
  10-digit  (1234567890)     77 ns/op   67 ns/op   -13%
  19-digit  (2^62-1)        111 ns/op   75 ns/op   -33%

The crossover is at ~3 digits: below that the constant setup
dominates and the benefit is within noise, above that the halved
iteration count shows up linearly.  Typical Rails payloads mix
short IDs (1-5 digits) and longer values (timestamps, nanos,
large counts), so the win is workload-dependent but strictly
non-negative for real code.

Correctness: 100k random fuzz across the full fixnum range plus
targeted edges (0, ±1, ±99, ±100, 2^30-1, 2^62-1, etc.) all pass.
make test-all shows 34694 tests, 7325860 assertions, 0 new
failures (same pre-existing TestArgf#test_puts flake as on
master) — test_integer.rb alone runs 38 tests / 421628 assertions
of which Integer#to_s exercises the bulk, all pass.

The 200-byte lookup table sits in .rodata and fits in a single
cache line of its own (3 lines for the whole table).  No change
to public API, no change to bignum conversion, no change to
non-base-10 conversion paths.

* bignum: emit two decimal digits per iteration in big2str_2bdigits

Extend the 2-digit lookup-table itoa optimisation from rb_fix2str to
the inner conversion loop used by Bignum#to_s.  big2str_2bdigits has
two code paths — a leading-chunk path that emits variable-length
digits, and a recursive-chunk path that emits a fixed-width zero-
padded block — and both gain from the halved division count.  The
classic per-digit loop is preserved for non-base-10 conversion.

Moves the ruby_decimal_digit_pairs table from a file-static in
numeric.c to bignum.c next to ruby_digitmap, and exposes it through
internal/bignum.h so both files share the same 200-byte .rodata
instance.

Benchmark (Apple M-series, best of 3 runs, measures bignum-only
speedup against the preceding fixnum commit):

  case            base      patch     delta
  ---------       -----     -----     -----
  big_20dig   10^19+...  146 ns/op 124 ns/op  -15%
  big_40dig   10^39+...  174 ns/op 152 ns/op  -13%
  big_100dig  10^99+42   236 ns/op 213 ns/op  -10%
  big_500dig  10^499+7  1119 ns/op 1086 ns/op  -3%
  big_1000dig 10^999    3490 ns/op 3459 ns/op  -1%
  fix_19dig   2^62-1      76 ns/op   76 ns/op   0% (unchanged path)

Wins concentrate in the 20-100 digit range where big2str_2bdigits
is the dominant cost.  Above ~500 digits the Karatsuba divmod
recursion dominates and the digit-emission saving shrinks to the
noise floor.  The 20-100 range is what actual Ruby code exercises
(financial high-precision sums, nanosecond timestamps, large
counters); crypto-size (1000+ digit) bignums are rare in to_s paths.

Correctness: 100k random fixnum fuzz unchanged, 500 random bignum
fuzz up to 2^256 with cross-check against sprintf("%d"), bases
2/8/16/36 round-trip, plus edge cases (0, just-above-fixnum, ±2^100,
20-digit strings near the fixnum boundary).  test/ruby/test_integer.rb
stays at 38 tests / 421628 assertions / 0 failures, test_bignum.rb
passes 74 / 607 / 0 failures, full make test-all reports 34694
tests / 0 new failures (same TestArgf#test_puts pre-existing flake
as master).

* benchmark: add int_to_s yaml for Integer#to_s

Reproducible benchmark for the two preceding commits.  Covers:

- 1/2/3/5/10/19-digit positive fixnums (spans the break-even point
  and the two large-number wins at the top)
- A negative fixnum (exercises the minus-sign prepend path)
- 20/40/100-digit bignums (spans the big2str_2bdigits win range)
- Two string-interpolation scenarios, so reviewers can see how much
  of the Integer#to_s speedup reaches real code that allocates the
  result string too

Intended to be consumed by benchmark-driver against master vs
int-to-s-twodigit for A/B comparison.  Matches the numbers in the
commit messages of 5bfb7e02a2 and c5df6de835.

---------

Co-authored-by: tomoya ishida <tomoyapenguin@gmail.com>
---
 benchmark/int_to_s.yml | 25 ++++++++++++
 bignum.c               | 86 +++++++++++++++++++++++++++++++++++++-----
 internal/bignum.h      |  1 +
 numeric.c              | 36 ++++++++++++++++--
 4 files changed, 135 insertions(+), 13 deletions(-)
 create mode 100644 benchmark/int_to_s.yml

diff --git a/benchmark/int_to_s.yml b/benchmark/int_to_s.yml
new file mode 100644
index 00000000000000..000dae9612ec54
--- /dev/null
+++ b/benchmark/int_to_s.yml
@@ -0,0 +1,25 @@
+prelude: |
+  # frozen_string_literal: true
+  N1   = 5
+  N2   = 42
+  N3   = 400
+  N5   = 12345
+  N10  = 1_234_567_890
+  N19  = 4_611_686_018_427_387_903
+  NEG  = -1_234_567_890
+  BIG20  = 10 ** 19 + 12_345_678_901_234_567
+  BIG40  = 10 ** 39 + 123_456_789_012_345
+  BIG100 = 10 ** 99 + 42
+benchmark:
+  fix_1digit:   "N1.to_s"
+  fix_2digit:   "N2.to_s"
+  fix_3digit:   "N3.to_s"
+  fix_5digit:   "N5.to_s"
+  fix_10digit:  "N10.to_s"
+  fix_19digit:  "N19.to_s"
+  fix_negative: "NEG.to_s"
+  big_20digit:  "BIG20.to_s"
+  big_40digit:  "BIG40.to_s"
+  big_100digit: "BIG100.to_s"
+  interp_id:    '"id=#{N10}"'
+  interp_mixed: '"a=#{N2},b=#{N5},c=#{N10}"'
diff --git a/bignum.c b/bignum.c
index e4af035caccedd..28924b4eb9cd09 100644
--- a/bignum.c
+++ b/bignum.c
@@ -64,6 +64,21 @@ static const bool debug_integer_pack = (
 
 const char ruby_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
 
+/* Two-digit decimal lookup table.  Offset 2*n holds the ASCII pair for
+ * n in the range 0..99.  Used by both rb_fix2str in numeric.c and
+ * big2str_2bdigits below to emit two base-10 digits per iteration. */
+const char ruby_decimal_digit_pairs[201] =
+    "00010203040506070809"
+    "10111213141516171819"
+    "20212223242526272829"
+    "30313233343536373839"
+    "40414243444546474849"
+    "50515253545556575859"
+    "60616263646566676869"
+    "70717273747576777879"
+    "80818283848586878889"
+    "90919293949596979899";
+
 #ifndef SIZEOF_BDIGIT_DBL
 # if SIZEOF_INT*2 <= SIZEOF_LONG_LONG
 #  define SIZEOF_BDIGIT_DBL SIZEOF_LONG_LONG
@@ -4811,11 +4826,34 @@ big2str_2bdigits(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t tail
             return;
         p = buf;
         j = sizeof(buf);
-        do {
-            BDIGIT_DBL idx = num % b2s->base;
-            num /= b2s->base;
-            p[--j] = ruby_digitmap[idx];
-        } while (num);
+        if (b2s->base == 10) {
+            /* Emit two decimal digits per iteration from ruby_decimal_digit_pairs.
+             * See the comment on the table in bignum.c near ruby_digitmap. */
+            while (num >= 100) {
+                BDIGIT_DBL idx = (num % 100) * 2;
+                num /= 100;
+                j -= 2;
+                p[j]     = ruby_decimal_digit_pairs[idx];
+                p[j + 1] = ruby_decimal_digit_pairs[idx + 1];
+            }
+            if (num >= 10) {
+                BDIGIT_DBL idx = num * 2;
+                j -= 2;
+                p[j]     = ruby_decimal_digit_pairs[idx];
+                p[j + 1] = ruby_decimal_digit_pairs[idx + 1];
+            }
+            else {
+                /* num is 1..9 here (0 was handled above) */
+                p[--j] = (char)('0' + num);
+            }
+        }
+        else {
+            do {
+                BDIGIT_DBL idx = num % b2s->base;
+                num /= b2s->base;
+                p[--j] = ruby_digitmap[idx];
+            } while (num);
+        }
         len = sizeof(buf) - j;
         big2str_alloc(b2s, len + taillen);
         MEMCPY(b2s->ptr, buf + j, char, len);
@@ -4823,11 +4861,39 @@ big2str_2bdigits(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t tail
     else {
         p = b2s->ptr;
         j = b2s->hbase2_numdigits;
-        do {
-            BDIGIT_DBL idx = num % b2s->base;
-            num /= b2s->base;
-            p[--j] = ruby_digitmap[idx];
-        } while (j);
+        if (b2s->base == 10) {
+            /* Non-beginning chunks must emit EXACTLY hbase2_numdigits,
+             * zero-padded on the left.  Consume num in 2-digit groups,
+             * handle the odd trailing digit, then memset remaining
+             * positions with '0'. */
+            while (num >= 100) {
+                BDIGIT_DBL idx = (num % 100) * 2;
+                num /= 100;
+                j -= 2;
+                p[j]     = ruby_decimal_digit_pairs[idx];
+                p[j + 1] = ruby_decimal_digit_pairs[idx + 1];
+            }
+            if (num >= 10) {
+                BDIGIT_DBL idx = num * 2;
+                j -= 2;
+                p[j]     = ruby_decimal_digit_pairs[idx];
+                p[j + 1] = ruby_decimal_digit_pairs[idx + 1];
+            }
+            else if (num > 0) {
+                p[--j] = (char)('0' + num);
+            }
+            if (j > 0) {
+                memset(p, '0', j);
+                j = 0;
+            }
+        }
+        else {
+            do {
+                BDIGIT_DBL idx = num % b2s->base;
+                num /= b2s->base;
+                p[--j] = ruby_digitmap[idx];
+            } while (j);
+        }
         len = b2s->hbase2_numdigits;
     }
     b2s->ptr += len;
diff --git a/internal/bignum.h b/internal/bignum.h
index f11fbd3a4d096a..7389a17c747e15 100644
--- a/internal/bignum.h
+++ b/internal/bignum.h
@@ -107,6 +107,7 @@ struct RBignum {
 
 /* bignum.c */
 extern const char ruby_digitmap[];
+extern const char ruby_decimal_digit_pairs[];
 double rb_big_fdiv_double(VALUE x, VALUE y);
 VALUE rb_big_uminus(VALUE x);
 VALUE rb_big_hash(VALUE);
diff --git a/numeric.c b/numeric.c
index 40b7bfc0f8e2b4..175bd7cfa0f730 100644
--- a/numeric.c
+++ b/numeric.c
@@ -4040,6 +4040,11 @@ rb_int_uminus(VALUE num)
     }
 }
 
+/* ruby_decimal_digit_pairs is defined in bignum.c and declared in
+ * internal/bignum.h.  See there for the rationale of the 2-digit
+ * lookup-table itoa optimisation; both rb_fix2str here and big2str_2bdigits
+ * in bignum.c consume it. */
+
 VALUE
 rb_fix2str(VALUE x, int base)
 {
@@ -4072,9 +4077,34 @@ rb_fix2str(VALUE x, int base)
     else {
         u = val;
     }
-    do {
-        *--b = ruby_digitmap[(int)(u % base)];
-    } while (u /= base);
+    if (base == 10) {
+        /* Emit two digits per iteration from a precomputed table.  The
+         * compiler lowers `u % 100` and `u / 100` to a single multiply +
+         * shift, so each iteration costs roughly one multiply, one shift,
+         * and two stores.  About 2x fewer iterations than the classic
+         * per-digit loop for multi-digit inputs. */
+        while (u >= 100) {
+            unsigned long idx = (u % 100) * 2;
+            u /= 100;
+            b -= 2;
+            b[0] = ruby_decimal_digit_pairs[idx];
+            b[1] = ruby_decimal_digit_pairs[idx + 1];
+        }
+        if (u >= 10) {
+            unsigned long idx = u * 2;
+            b -= 2;
+            b[0] = ruby_decimal_digit_pairs[idx];
+            b[1] = ruby_decimal_digit_pairs[idx + 1];
+        }
+        else {
+            *--b = (char)('0' + u);
+        }
+    }
+    else {
+        do {
+            *--b = ruby_digitmap[(int)(u % base)];
+        } while (u /= base);
+    }
     if (neg) {
         *--b = '-';
     }

From 97aa28abab6dc65e2aa0373796546d4ebf2df717 Mon Sep 17 00:00:00 2001
From: Steven Webb <steven.daniel.webb@gmail.com>
Date: Sat, 9 May 2026 02:27:05 +0800
Subject: [PATCH 04/12] Fix gdb rb_ps helper (#16896)

Over time the .gdbinit initializer has drifted from the codebase and the
rb_ps helper no longer works. This PR fixes it. The changes that caused
it to break were:

* 226f37059ec5f3ea3a1417e0bab630c64dbc8ac3 renamed cfp->iseq to cfp->_iseq.
* 6c24904a690eb7c4e20c3fa8c3751acc03454100 switched from storing the last_id to storing the next_id.
* f7ae32ed3b5b93247f9f62a58e3dd129098d0b27 removed ID_ENTRY_SIZE.
---
 .gdbinit        | 15 ++++++++-------
 vm_core.h       |  2 +-
 vm_insnhelper.h |  2 +-
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/.gdbinit b/.gdbinit
index bda544c64136ff..0d585beef9eeaa 100644
--- a/.gdbinit
+++ b/.gdbinit
@@ -979,7 +979,7 @@ end
 
 define print_lineno
   set $cfp = $arg0
-  set $iseq = $cfp->iseq
+  set $iseq = rb_get_cfp_iseq($cfp)
   set $pos = $cfp->pc - $iseq->body->iseq_encoded
   if $pos != 0
     set $pos = $pos - 1
@@ -1060,7 +1060,7 @@ define print_id
   else
     set $serial = (rb_id_serial_t)$id
   end
-  if $serial && $serial <= ruby_global_symbols.last_id
+  if $serial && $serial < ruby_global_symbols.next_id
     set $idx = $serial / ID_ENTRY_UNIT
     set $ids = (struct RArray *)ruby_global_symbols.ids
     set $flags = $ids->basic.flags
@@ -1083,7 +1083,7 @@ define print_id
           set $aryptr = $ary->as.heap.ptr
           set $arylen = $ary->as.heap.len
         end
-        set $result = $aryptr[($serial % ID_ENTRY_UNIT) * ID_ENTRY_SIZE + $t]
+        set $result = $aryptr[($serial % ID_ENTRY_UNIT) + $t]
         if $result != RUBY_Qnil
           print_string $result
         else
@@ -1117,16 +1117,17 @@ define rb_ps_thread
   set $cfp = $ps_thread_th->ec->cfp
   set $cfpend = (rb_control_frame_t *)($ps_thread_th->ec->vm_stack + $ps_thread_th->ec->vm_stack_size)-1
   while $cfp < $cfpend
-    if $cfp->iseq
-      if !((VALUE)$cfp->iseq & RUBY_IMMEDIATE_MASK) && (((imemo_ifunc << RUBY_FL_USHIFT) | RUBY_T_IMEMO)==$cfp->iseq->flags & ((RUBY_IMEMO_MASK << RUBY_FL_USHIFT) | RUBY_T_MASK))
+    if $cfp->_iseq
+      set $iseq = rb_get_cfp_iseq($cfp)
+      if !((VALUE)$iseq & RUBY_IMMEDIATE_MASK) && (((imemo_ifunc << RUBY_FL_USHIFT) | RUBY_T_IMEMO)==$iseq->flags & ((RUBY_IMEMO_MASK << RUBY_FL_USHIFT) | RUBY_T_MASK))
         printf "%d:ifunc ", $cfpend-$cfp
         set print symbol-filename on
-        output/a $cfp->iseq.body
+        output/a $iseq.body
         set print symbol-filename off
         printf "\n"
       else
       if $cfp->pc
-        set $location = $cfp->iseq->body->location
+        set $location = $iseq->body->location
         printf "%d:", $cfpend-$cfp
         print_pathobj $location.pathobj
         printf ":"
diff --git a/vm_core.h b/vm_core.h
index 89f80b52c75a37..1e3dcfe04f21ac 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -920,7 +920,7 @@ struct rb_block {
 typedef struct rb_control_frame_struct {
     const VALUE *pc;        // cfp[0]
     VALUE *sp;              // cfp[1]
-    const rb_iseq_t *_iseq; // cfp[2] -- use rb_cfp_iseq(cfp) to read
+    const rb_iseq_t *_iseq; // cfp[2] -- use CFP_ISEQ(cfp) to read
     VALUE self;             // cfp[3] / block[0]
     const VALUE *ep;        // cfp[4] / block[1]
     const void *block_code; // cfp[5] / block[2] -- iseq, ifunc, or forwarded block handler
diff --git a/vm_insnhelper.h b/vm_insnhelper.h
index 88c387ee152afa..2d83fb5897a376 100644
--- a/vm_insnhelper.h
+++ b/vm_insnhelper.h
@@ -116,7 +116,7 @@ enum vm_regan_acttype {
 // instruction sequence C struct
 // Uses cfp->_iseq directly because the interpreter always has a valid _iseq
 // field (it's written on exit from JIT code). Code in vm_insnhelper.c that
-// may be called as a ZJIT fallback should use rb_cfp_iseq() instead.
+// may be called as a ZJIT fallback should use CFP_ISEQ() instead.
 #define GET_ISEQ() (GET_CFP()->_iseq)
 
 /**********************************************************/

From 3a5bfb8a144cbc1d8021fd3709e007fc6f5b1261 Mon Sep 17 00:00:00 2001
From: Max Bernstein <tekknolagi@gmail.com>
Date: Fri, 8 May 2026 16:00:32 -0400
Subject: [PATCH 05/12] ZJIT: Use Insn::for_each_operand_mut in Function::find
 (#16902)

ZJIT: Use for_each_operand_mut in Function::find

No need to repeat this matching logic manually.
---
 zjit/src/hir.rs | 241 +-----------------------------------------------
 1 file changed, 5 insertions(+), 236 deletions(-)

diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index 4d006af1ab2c96..27bf5df42741ee 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -2783,243 +2783,12 @@ impl Function {
                 }
             };
         }
-        macro_rules! find_vec {
-            ( $x:expr ) => {
-                {
-                    $x.iter().map(|arg| find!(*arg)).collect()
-                }
-            };
-        }
-        macro_rules! find_branch_edge {
-            ( $edge:ident ) => {
-                {
-                    BranchEdge {
-                        target: $edge.target,
-                        args: find_vec!($edge.args),
-                    }
-                }
-            };
-        }
         let insn_id = find!(insn_id);
-        use Insn::*;
-        match &self.insns[insn_id.0] {
-            result@(Const {..}
-                    | Param
-                    | LoadArg {..}
-                    | Entries {..}
-                    | GetConstantPath {..}
-                    | PatchPoint {..}
-                    | PutSpecialObject {..}
-                    | GetGlobal {..}
-                    | SideExit {..}
-                    | EntryPoint {..}
-                    | LoadPC
-                    | LoadSP
-                    | LoadEC
-                    | GetEP {..}
-                    | LoadSelf
-                    | BreakPoint
-                    | IncrCounterPtr {..}
-                    | IncrCounter(_)) => result.clone(),
-            &Snapshot { state: FrameState { iseq, insn_idx, pc, ref stack, ref locals } } =>
-                Snapshot {
-                    state: FrameState {
-                        iseq,
-                        insn_idx,
-                        pc,
-                        stack: find_vec!(stack),
-                        locals: find_vec!(locals),
-                    }
-                },
-            &Return { val } => Return { val: find!(val) },
-            &FixnumBitCheck { val, index } => FixnumBitCheck { val: find!(val), index },
-            &Throw { throw_state, val, state } => Throw { throw_state, val: find!(val), state },
-            &StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state },
-            &StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) },
-            &StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) },
-            &StringGetbyte { string, index } => StringGetbyte { string: find!(string), index: find!(index) },
-            &StringSetbyteFixnum { string, index, value } => StringSetbyteFixnum { string: find!(string), index: find!(index), value: find!(value) },
-            &StringAppend { recv, other, state } => StringAppend { recv: find!(recv), other: find!(other), state: find!(state) },
-            &StringAppendCodepoint { recv, other, state } => StringAppendCodepoint { recv: find!(recv), other: find!(other), state: find!(state) },
-            &StringEqual { left, right } => StringEqual { left: find!(left), right: find!(right) },
-            &ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state },
-            &Test { val } => Test { val: find!(val) },
-            &IsNil { val } => IsNil { val: find!(val) },
-            &IsMethodCfunc { val, cd, cfunc, state } => IsMethodCfunc { val: find!(val), cd, cfunc, state },
-            &IsBitEqual { left, right } => IsBitEqual { left: find!(left), right: find!(right) },
-            &IsBitNotEqual { left, right } => IsBitNotEqual { left: find!(left), right: find!(right) },
-            &BoxBool { val } => BoxBool { val: find!(val) },
-            &BoxFixnum { val, state } => BoxFixnum { val: find!(val), state: find!(state) },
-            &UnboxFixnum { val } => UnboxFixnum { val: find!(val) },
-            &FixnumAref { recv, index } => FixnumAref { recv: find!(recv), index: find!(index) },
-            Jump(target) => Jump(find_branch_edge!(target)),
-            &IfTrue { val, ref target } => IfTrue { val: find!(val), target: find_branch_edge!(target) },
-            &IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) },
-            &RefineType { val, new_type } => RefineType { val: find!(val), new_type },
-            &HasType { val, expected } => HasType { val: find!(val), expected },
-            &GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type, state },
-            &GuardTypeNot { val, guard_type, state } => GuardTypeNot { val: find!(val), guard_type, state },
-            &GuardBitEquals { val, expected, reason, state, recompile } => GuardBitEquals { val: find!(val), expected, reason, state, recompile },
-            &GuardAnyBitSet { val, mask, mask_name, reason, state } => GuardAnyBitSet { val: find!(val), mask, mask_name, reason, state },
-            &GuardNoBitsSet { val, mask, mask_name, reason, state } => GuardNoBitsSet { val: find!(val), mask, mask_name, reason, state },
-            &GuardGreaterEq { left, right, reason, state } => GuardGreaterEq { left: find!(left), right: find!(right), reason, state },
-            &GuardLess { left, right, state } => GuardLess { left: find!(left), right: find!(right), state },
-            &IsBlockGiven { lep } => IsBlockGiven { lep: find!(lep) },
-            &IsBlockParamModified { flags } => IsBlockParamModified { flags: find!(flags) },
-            &GetBlockParam { level, ep_offset, state } => GetBlockParam { level, ep_offset, state: find!(state) },
-            &FixnumAdd { left, right, state } => FixnumAdd { left: find!(left), right: find!(right), state },
-            &FixnumSub { left, right, state } => FixnumSub { left: find!(left), right: find!(right), state },
-            &FixnumMult { left, right, state } => FixnumMult { left: find!(left), right: find!(right), state },
-            &FixnumDiv { left, right, state } => FixnumDiv { left: find!(left), right: find!(right), state },
-            &FixnumMod { left, right, state } => FixnumMod { left: find!(left), right: find!(right), state },
-            &FloatAdd { recv, other, state } => FloatAdd { recv: find!(recv), other: find!(other), state },
-            &FloatSub { recv, other, state } => FloatSub { recv: find!(recv), other: find!(other), state },
-            &FloatMul { recv, other, state } => FloatMul { recv: find!(recv), other: find!(other), state },
-            &FloatDiv { recv, other, state } => FloatDiv { recv: find!(recv), other: find!(other), state },
-            &FloatToInt { recv, state } => FloatToInt { recv: find!(recv), state },
-            &FixnumNeq { left, right } => FixnumNeq { left: find!(left), right: find!(right) },
-            &FixnumEq { left, right } => FixnumEq { left: find!(left), right: find!(right) },
-            &FixnumGt { left, right } => FixnumGt { left: find!(left), right: find!(right) },
-            &FixnumGe { left, right } => FixnumGe { left: find!(left), right: find!(right) },
-            &FixnumLt { left, right } => FixnumLt { left: find!(left), right: find!(right) },
-            &FixnumLe { left, right } => FixnumLe { left: find!(left), right: find!(right) },
-            &FixnumAnd { left, right } => FixnumAnd { left: find!(left), right: find!(right) },
-            &FixnumOr { left, right } => FixnumOr { left: find!(left), right: find!(right) },
-            &FixnumXor { left, right } => FixnumXor { left: find!(left), right: find!(right) },
-            &IntAnd { left, right } => IntAnd { left: find!(left), right: find!(right) },
-            &IntOr { left, right } => IntOr { left: find!(left), right: find!(right) },
-            &FixnumLShift { left, right, state } => FixnumLShift { left: find!(left), right: find!(right), state },
-            &FixnumRShift { left, right } => FixnumRShift { left: find!(left), right: find!(right) },
-            &ObjToString { val, cd, state } => ObjToString {
-                val: find!(val),
-                cd,
-                state,
-            },
-            &AnyToString { val, str, state } => AnyToString {
-                val: find!(val),
-                str: find!(str),
-                state,
-            },
-            &SendDirect { recv, cd, cme, iseq, ref args, kw_bits, block, state } => SendDirect {
-                recv: find!(recv),
-                cd,
-                cme,
-                iseq,
-                args: find_vec!(args),
-                kw_bits,
-                block,
-                state,
-            },
-            &Send { recv, cd, block, ref args, state, reason } => Send {
-                recv: find!(recv),
-                cd,
-                block,
-                args: find_vec!(args),
-                state,
-                reason,
-            },
-            &SendForward { recv, cd, blockiseq, ref args, state, reason } => SendForward {
-                recv: find!(recv),
-                cd,
-                blockiseq,
-                args: find_vec!(args),
-                state,
-                reason,
-            },
-            &InvokeSuper { recv, cd, blockiseq, ref args, state, reason } => InvokeSuper {
-                recv: find!(recv),
-                cd,
-                blockiseq,
-                args: find_vec!(args),
-                state,
-                reason,
-            },
-            &InvokeSuperForward { recv, cd, blockiseq, ref args, state, reason } => InvokeSuperForward {
-                recv: find!(recv),
-                cd,
-                blockiseq,
-                args: find_vec!(args),
-                state,
-                reason,
-            },
-            &InvokeBlock { cd, ref args, state, reason } => InvokeBlock {
-                cd,
-                args: find_vec!(args),
-                state,
-                reason,
-            },
-            &InvokeBlockIfunc { cd, block_handler, ref args, state } => InvokeBlockIfunc {
-                cd,
-                block_handler: find!(block_handler),
-                args: find_vec!(args),
-                state: find!(state),
-            },
-            &InvokeProc { recv, ref args, state, kw_splat } => InvokeProc {
-                recv: find!(recv),
-                args: find_vec!(args),
-                state: find!(state),
-                kw_splat,
-            },
-            &InvokeBuiltin { bf, recv, ref args, state, leaf, return_type } => InvokeBuiltin { bf, recv: find!(recv), args: find_vec!(args), state, leaf, return_type },
-            &ArrayDup { val, state } => ArrayDup { val: find!(val), state },
-            &HashDup { val, state } => HashDup { val: find!(val), state },
-            &HashAref { hash, key, state } => HashAref { hash: find!(hash), key: find!(key), state },
-            &HashAset { hash, key, val, state } => HashAset { hash: find!(hash), key: find!(key), val: find!(val), state },
-            &ObjectAlloc { val, state } => ObjectAlloc { val: find!(val), state },
-            &ObjectAllocClass { class, state } => ObjectAllocClass { class, state: find!(state) },
-            &CCall { cfunc, recv, ref args, name, owner, return_type, elidable } => CCall { cfunc, recv: find!(recv), args: find_vec!(args), name, owner, return_type, elidable },
-            &CCallWithFrame { cd, cfunc, recv, ref args, cme, name, state, return_type, elidable, block } => CCallWithFrame {
-                cd,
-                cfunc,
-                recv: find!(recv),
-                args: find_vec!(args),
-                cme,
-                name,
-                state: find!(state),
-                return_type,
-                elidable,
-                block,
-            },
-            &CCallVariadic { cfunc, recv, ref args, cme, name, state, return_type, elidable, block } => CCallVariadic {
-                cfunc, recv: find!(recv), args: find_vec!(args), cme, name, state, return_type, elidable, block
-            },
-            &CheckMatch { target, pattern, flag, state } => CheckMatch { target: find!(target), pattern: find!(pattern), flag, state: find!(state) },
-            &Defined { op_type, obj, pushval, v, lep_level, state } => Defined { op_type, obj, pushval, v: find!(v), lep_level, state: find!(state) },
-            &DefinedIvar { self_val, pushval, id, state } => DefinedIvar { self_val: find!(self_val), pushval, id, state },
-            &GetConstant { klass, id, allow_nil, state } => GetConstant { klass: find!(klass), id, allow_nil: find!(allow_nil), state },
-            &NewArray { ref elements, state } => NewArray { elements: find_vec!(elements), state: find!(state) },
-            &NewHash { ref elements, state } => NewHash { elements: find_vec!(elements), state: find!(state) },
-            &NewRange { low, high, flag, state } => NewRange { low: find!(low), high: find!(high), flag, state: find!(state) },
-            &NewRangeFixnum { low, high, flag, state } => NewRangeFixnum { low: find!(low), high: find!(high), flag, state: find!(state) },
-            &ArrayAref { array, index } => ArrayAref { array: find!(array), index: find!(index) },
-            &ArrayAset { array, index, val } => ArrayAset { array: find!(array), index: find!(index), val: find!(val) },
-            &ArrayPop { array, state } => ArrayPop { array: find!(array), state: find!(state) },
-            &ArrayLength { array } => ArrayLength { array: find!(array) },
-            &AdjustBounds { index, length } => AdjustBounds { index: find!(index), length: find!(length) },
-            &ArrayMax { ref elements, state } => ArrayMax { elements: find_vec!(elements), state: find!(state) },
-            &ArrayMin { ref elements, state } => ArrayMin { elements: find_vec!(elements), state: find!(state) },
-            &ArrayInclude { ref elements, target, state } => ArrayInclude { elements: find_vec!(elements), target: find!(target), state: find!(state) },
-            &ArrayPackBuffer { ref elements, fmt, ref buffer, state } => ArrayPackBuffer { elements: find_vec!(elements), fmt: find!(fmt), buffer: (*buffer).map(|buffer| find!(buffer)), state: find!(state) },
-            &DupArrayInclude { ary, target, state } => DupArrayInclude { ary, target: find!(target), state: find!(state) },
-            &ArrayHash { ref elements, state } => ArrayHash { elements: find_vec!(elements), state },
-            &SetGlobal { id, val, state } => SetGlobal { id, val: find!(val), state },
-            &GetIvar { self_val, id, ic, state } => GetIvar { self_val: find!(self_val), id, ic, state },
-            &LoadField { recv, id, offset, return_type } => LoadField { recv: find!(recv), id, offset, return_type },
-            &StoreField { recv, id, offset, val } => StoreField { recv: find!(recv), id, offset, val: find!(val) },
-            &WriteBarrier { recv, val } => WriteBarrier { recv: find!(recv), val: find!(val) },
-            &SetIvar { self_val, id, ic, val, state } => SetIvar { self_val: find!(self_val), id, ic, val: find!(val), state },
-            &GetClassVar { id, ic, state } => GetClassVar { id, ic, state },
-            &SetClassVar { id, val, ic, state } => SetClassVar { id, val: find!(val), ic, state },
-            &SetLocal { val, ep_offset, level } => SetLocal { val: find!(val), ep_offset, level },
-            &GetSpecialSymbol { symbol_type, state } => GetSpecialSymbol { symbol_type, state },
-            &GetSpecialNumber { nth, state } => GetSpecialNumber { nth, state },
-            &ToArray { val, state } => ToArray { val: find!(val), state },
-            &ToNewArray { val, state } => ToNewArray { val: find!(val), state },
-            &ArrayExtend { left, right, state } => ArrayExtend { left: find!(left), right: find!(right), state },
-            &ArrayPush { array, val, state } => ArrayPush { array: find!(array), val: find!(val), state },
-            &CheckInterrupts { state } => CheckInterrupts { state },
-            &IsA { val, class } => IsA { val: find!(val), class: find!(class) },
-        }
+        let mut result = self.insns[insn_id.0].clone();
+        result.for_each_operand_mut(&mut |operand: &mut InsnId| {
+            *operand = find!(*operand);
+        });
+        result
     }
 
     /// Update DynamicSendReason for the instruction at insn_id

From dc90c26a103ad62df73464cc1896edbcc90bd0c7 Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Fri, 8 May 2026 21:04:08 +0200
Subject: [PATCH 06/12] [ruby/prism] Respect `encoding` option in `Prism.lex`
 and friends

utf-8 is the default for source files but can be overwritten via options

https://github.com/ruby/prism/commit/355f451528
---
 prism/extension.c      |  2 +-
 test/prism/lex_test.rb | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/prism/extension.c b/prism/extension.c
index 9f9169cfff7880..27df8dac50ddff 100644
--- a/prism/extension.c
+++ b/prism/extension.c
@@ -793,7 +793,7 @@ parse_lex_input(const uint8_t *input, size_t input_length, const pm_options_t *o
     parse_lex_data_t parse_lex_data = {
         .source = source,
         .tokens = rb_ary_new(),
-        .encoding = rb_utf8_encoding(),
+        .encoding = rb_enc_find(pm_parser_encoding_name(parser)),
         .freeze = pm_options_freeze(options),
     };
 
diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb
index 8ea7ce7e9b258f..1e06d52184b3c5 100644
--- a/test/prism/lex_test.rb
+++ b/test/prism/lex_test.rb
@@ -47,6 +47,24 @@ def test_parse_lex_file
       end
     end
 
+    def test_lex_encoding
+      tokens = Prism.lex('"わたし"', encoding: Encoding::Windows_31J).value
+      tokens.each do |t|
+        assert_equal(Encoding::Windows_31J, t[0].value.encoding)
+      end
+
+      # Shebangs must appear on the first line. For these cases, the encoding
+      # comment may appear second, but it should still change encoding.
+      tokens = Prism.lex(<<~RUBY, encoding: Encoding::Windows_31J).value
+        #! /usr/bin/env ruby
+        # encoding: utf-8
+        "わたし"
+      RUBY
+      tokens.each do |t|
+        assert_equal(Encoding::UTF_8, t[0].value.encoding)
+      end
+    end
+
     if RUBY_VERSION >= "3.3"
       def test_lex_compat
         source = "foo bar"

From 07ae044b0dd4968b4ef6dd072cc0a2a851d79902 Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Fri, 8 May 2026 21:10:27 +0200
Subject: [PATCH 07/12] [ruby/prism] Take the strings encoding as the initial
 encoding in the ripper translator

When no magic encoding comment is present, it does not default to utf-8,
and takes the encoding of the string that contains the source code instead.
Most of the time that will be utf-8, but not always.

https://github.com/ruby/prism/commit/1a273db780
---
 lib/prism/translation/ripper.rb | 5 +++--
 test/prism/ruby/ripper_test.rb  | 6 ++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
index ddcec997b94efa..f179a149a1df45 100644
--- a/lib/prism/translation/ripper.rb
+++ b/lib/prism/translation/ripper.rb
@@ -57,7 +57,8 @@ def self.parse(src, filename = "(ripper)", lineno = 1)
       #          [[1, 13], :on_kw,     "end", END      ]]
       #
       def self.lex(src, filename = "-", lineno = 1, raise_errors: false)
-        result = Prism.lex_compat(coerce_source(src), filepath: filename, line: lineno, version: "current")
+        coerced = coerce_source(src)
+        result = Prism.lex_compat(coerced, filepath: filename, line: lineno, version: "current", encoding: coerced.encoding)
 
         if result.failure? && raise_errors
           raise SyntaxError, result.errors.first.message
@@ -4077,7 +4078,7 @@ def visit_yield_node(node)
 
       # Lazily initialize the parse result.
       def result
-        @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true)
+        @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true, encoding: source.encoding)
       end
 
       def line_and_column_cache
diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb
index 05be087868d811..4fff630561e7d6 100644
--- a/test/prism/ruby/ripper_test.rb
+++ b/test/prism/ruby/ripper_test.rb
@@ -224,6 +224,12 @@ def test_tokenize
       assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source))
     end
 
+    def test_encoding
+      source = '"わたし"'.encode(Encoding::Windows_31J)
+      assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source))
+      assert_equal(Ripper.sexp(source), Translation::Ripper.sexp(source))
+    end
+
     def test_sexp_coercion
       string_like = Object.new
       def string_like.to_str

From 3d861274e6388f28a26496c473d605a286e6d3d2 Mon Sep 17 00:00:00 2001
From: John Hawthorn <john@hawthorn.email>
Date: Fri, 8 May 2026 10:39:48 -0700
Subject: [PATCH 08/12] Introduce RMATCH_{BEG,END,NREGS} helpers

These are internal-only helpers which can be used instead of the
RMATCH_REGS struct directly. RMATCH_REGS is just a pointer offset from
the RMatch VALUE itself, so this should not significantly affect
codegen.

The motivation for this is that it's both simpler, and should move us
towards being able to replace the storage for RMATCH, and to be able to
store the positions embedded instead of in separate malloc memory.
---
 depend            |  17 +++++++
 ext/ripper/depend |   2 +
 internal/re.h     |  19 ++++++++
 re.c              | 120 +++++++++++++++++++---------------------------
 string.c          |  59 ++++++++++-------------
 5 files changed, 113 insertions(+), 104 deletions(-)

diff --git a/depend b/depend
index b4a7882d8b30f5..a17eb16f758660 100644
--- a/depend
+++ b/depend
@@ -4878,6 +4878,7 @@ enum.$(OBJEXT): {$(VPATH)}internal/core/rclass.h
 enum.$(OBJEXT): {$(VPATH)}internal/core/rdata.h
 enum.$(OBJEXT): {$(VPATH)}internal/core/rfile.h
 enum.$(OBJEXT): {$(VPATH)}internal/core/rhash.h
+enum.$(OBJEXT): {$(VPATH)}internal/core/rmatch.h
 enum.$(OBJEXT): {$(VPATH)}internal/core/robject.h
 enum.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h
 enum.$(OBJEXT): {$(VPATH)}internal/core/rstring.h
@@ -4968,6 +4969,8 @@ enum.$(OBJEXT): {$(VPATH)}internal/xmalloc.h
 enum.$(OBJEXT): {$(VPATH)}missing.h
 enum.$(OBJEXT): {$(VPATH)}onigmo.h
 enum.$(OBJEXT): {$(VPATH)}oniguruma.h
+enum.$(OBJEXT): {$(VPATH)}re.h
+enum.$(OBJEXT): {$(VPATH)}regex.h
 enum.$(OBJEXT): {$(VPATH)}ruby_assert.h
 enum.$(OBJEXT): {$(VPATH)}shape.h
 enum.$(OBJEXT): {$(VPATH)}st.h
@@ -8965,6 +8968,7 @@ marshal.$(OBJEXT): {$(VPATH)}internal/core/rclass.h
 marshal.$(OBJEXT): {$(VPATH)}internal/core/rdata.h
 marshal.$(OBJEXT): {$(VPATH)}internal/core/rfile.h
 marshal.$(OBJEXT): {$(VPATH)}internal/core/rhash.h
+marshal.$(OBJEXT): {$(VPATH)}internal/core/rmatch.h
 marshal.$(OBJEXT): {$(VPATH)}internal/core/robject.h
 marshal.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h
 marshal.$(OBJEXT): {$(VPATH)}internal/core/rstring.h
@@ -9060,6 +9064,8 @@ marshal.$(OBJEXT): {$(VPATH)}missing.h
 marshal.$(OBJEXT): {$(VPATH)}node.h
 marshal.$(OBJEXT): {$(VPATH)}onigmo.h
 marshal.$(OBJEXT): {$(VPATH)}oniguruma.h
+marshal.$(OBJEXT): {$(VPATH)}re.h
+marshal.$(OBJEXT): {$(VPATH)}regex.h
 marshal.$(OBJEXT): {$(VPATH)}ruby_assert.h
 marshal.$(OBJEXT): {$(VPATH)}ruby_atomic.h
 marshal.$(OBJEXT): {$(VPATH)}rubyparser.h
@@ -10952,6 +10958,7 @@ parse.$(OBJEXT): {$(VPATH)}internal/core/rclass.h
 parse.$(OBJEXT): {$(VPATH)}internal/core/rdata.h
 parse.$(OBJEXT): {$(VPATH)}internal/core/rfile.h
 parse.$(OBJEXT): {$(VPATH)}internal/core/rhash.h
+parse.$(OBJEXT): {$(VPATH)}internal/core/rmatch.h
 parse.$(OBJEXT): {$(VPATH)}internal/core/robject.h
 parse.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h
 parse.$(OBJEXT): {$(VPATH)}internal/core/rstring.h
@@ -11054,6 +11061,7 @@ parse.$(OBJEXT): {$(VPATH)}parser_st.h
 parse.$(OBJEXT): {$(VPATH)}probes.dmyh
 parse.$(OBJEXT): {$(VPATH)}probes.h
 parse.$(OBJEXT): {$(VPATH)}ractor.h
+parse.$(OBJEXT): {$(VPATH)}re.h
 parse.$(OBJEXT): {$(VPATH)}regenc.h
 parse.$(OBJEXT): {$(VPATH)}regex.h
 parse.$(OBJEXT): {$(VPATH)}ruby_assert.h
@@ -15946,6 +15954,7 @@ ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rclass.h
 ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rdata.h
 ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rfile.h
 ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rhash.h
+ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rmatch.h
 ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/robject.h
 ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h
 ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rstring.h
@@ -16037,6 +16046,8 @@ ruby_parser.$(OBJEXT): {$(VPATH)}missing.h
 ruby_parser.$(OBJEXT): {$(VPATH)}node.h
 ruby_parser.$(OBJEXT): {$(VPATH)}onigmo.h
 ruby_parser.$(OBJEXT): {$(VPATH)}oniguruma.h
+ruby_parser.$(OBJEXT): {$(VPATH)}re.h
+ruby_parser.$(OBJEXT): {$(VPATH)}regex.h
 ruby_parser.$(OBJEXT): {$(VPATH)}ruby_assert.h
 ruby_parser.$(OBJEXT): {$(VPATH)}ruby_parser.c
 ruby_parser.$(OBJEXT): {$(VPATH)}rubyparser.h
@@ -19345,6 +19356,7 @@ variable.$(OBJEXT): {$(VPATH)}internal/core/rclass.h
 variable.$(OBJEXT): {$(VPATH)}internal/core/rdata.h
 variable.$(OBJEXT): {$(VPATH)}internal/core/rfile.h
 variable.$(OBJEXT): {$(VPATH)}internal/core/rhash.h
+variable.$(OBJEXT): {$(VPATH)}internal/core/rmatch.h
 variable.$(OBJEXT): {$(VPATH)}internal/core/robject.h
 variable.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h
 variable.$(OBJEXT): {$(VPATH)}internal/core/rstring.h
@@ -19439,6 +19451,8 @@ variable.$(OBJEXT): {$(VPATH)}onigmo.h
 variable.$(OBJEXT): {$(VPATH)}oniguruma.h
 variable.$(OBJEXT): {$(VPATH)}ractor.h
 variable.$(OBJEXT): {$(VPATH)}ractor_core.h
+variable.$(OBJEXT): {$(VPATH)}re.h
+variable.$(OBJEXT): {$(VPATH)}regex.h
 variable.$(OBJEXT): {$(VPATH)}ruby_assert.h
 variable.$(OBJEXT): {$(VPATH)}ruby_atomic.h
 variable.$(OBJEXT): {$(VPATH)}rubyparser.h
@@ -19839,6 +19853,7 @@ vm.$(OBJEXT): {$(VPATH)}internal/core/rclass.h
 vm.$(OBJEXT): {$(VPATH)}internal/core/rdata.h
 vm.$(OBJEXT): {$(VPATH)}internal/core/rfile.h
 vm.$(OBJEXT): {$(VPATH)}internal/core/rhash.h
+vm.$(OBJEXT): {$(VPATH)}internal/core/rmatch.h
 vm.$(OBJEXT): {$(VPATH)}internal/core/robject.h
 vm.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h
 vm.$(OBJEXT): {$(VPATH)}internal/core/rstring.h
@@ -19941,6 +19956,8 @@ vm.$(OBJEXT): {$(VPATH)}probes.h
 vm.$(OBJEXT): {$(VPATH)}probes_helper.h
 vm.$(OBJEXT): {$(VPATH)}ractor.h
 vm.$(OBJEXT): {$(VPATH)}ractor_core.h
+vm.$(OBJEXT): {$(VPATH)}re.h
+vm.$(OBJEXT): {$(VPATH)}regex.h
 vm.$(OBJEXT): {$(VPATH)}ruby_assert.h
 vm.$(OBJEXT): {$(VPATH)}ruby_atomic.h
 vm.$(OBJEXT): {$(VPATH)}rubyparser.h
diff --git a/ext/ripper/depend b/ext/ripper/depend
index 96d41c87b89ac0..db83378a1d53db 100644
--- a/ext/ripper/depend
+++ b/ext/ripper/depend
@@ -474,6 +474,7 @@ ripper.o: $(hdrdir)/ruby/internal/core/rclass.h
 ripper.o: $(hdrdir)/ruby/internal/core/rdata.h
 ripper.o: $(hdrdir)/ruby/internal/core/rfile.h
 ripper.o: $(hdrdir)/ruby/internal/core/rhash.h
+ripper.o: $(hdrdir)/ruby/internal/core/rmatch.h
 ripper.o: $(hdrdir)/ruby/internal/core/robject.h
 ripper.o: $(hdrdir)/ruby/internal/core/rregexp.h
 ripper.o: $(hdrdir)/ruby/internal/core/rstring.h
@@ -566,6 +567,7 @@ ripper.o: $(hdrdir)/ruby/missing.h
 ripper.o: $(hdrdir)/ruby/onigmo.h
 ripper.o: $(hdrdir)/ruby/oniguruma.h
 ripper.o: $(hdrdir)/ruby/ractor.h
+ripper.o: $(hdrdir)/ruby/re.h
 ripper.o: $(hdrdir)/ruby/regex.h
 ripper.o: $(hdrdir)/ruby/ruby.h
 ripper.o: $(hdrdir)/ruby/st.h
diff --git a/internal/re.h b/internal/re.h
index 2d2eba0dc1905c..52a05902adaf23 100644
--- a/internal/re.h
+++ b/internal/re.h
@@ -10,6 +10,25 @@
  */
 #include "ruby/internal/stdbool.h"     /* for bool */
 #include "ruby/ruby.h"          /* for VALUE */
+#include "ruby/re.h"            /* for struct RMatch and struct re_registers */
+
+static inline long
+RMATCH_BEG(VALUE match, int i)
+{
+    return RMATCH(match)->regs.beg[i];
+}
+
+static inline long
+RMATCH_END(VALUE match, int i)
+{
+    return RMATCH(match)->regs.end[i];
+}
+
+static inline int
+RMATCH_NREGS(VALUE match)
+{
+    return RMATCH(match)->regs.num_regs;
+}
 
 /* re.c */
 VALUE rb_reg_s_alloc(VALUE klass);
diff --git a/re.c b/re.c
index e65369424a03a0..e4f580ecc06513 100644
--- a/re.c
+++ b/re.c
@@ -1179,7 +1179,7 @@ static VALUE
 match_size(VALUE match)
 {
     match_check(match);
-    return INT2FIX(RMATCH_REGS(match)->num_regs);
+    return INT2FIX(RMATCH_NREGS(match));
 }
 
 static int name_to_backref_number(const struct re_registers *, VALUE, const char*, const char*);
@@ -1193,9 +1193,9 @@ name_to_backref_error(VALUE name)
 }
 
 static void
-backref_number_check(struct re_registers *regs, int i)
+backref_number_check(VALUE match, int i)
 {
-    if (i < 0 || regs->num_regs <= i)
+    if (i < 0 || RMATCH_NREGS(match) <= i)
         rb_raise(rb_eIndexError, "index %d out of matches", i);
 }
 
@@ -1245,12 +1245,11 @@ static VALUE
 match_offset(VALUE match, VALUE n)
 {
     int i = match_backref_number(match, n);
-    struct re_registers *regs = RMATCH_REGS(match);
 
     match_check(match);
-    backref_number_check(regs, i);
+    backref_number_check(match, i);
 
-    if (BEG(i) < 0)
+    if (RMATCH_BEG(match, i) < 0)
         return rb_assoc_new(Qnil, Qnil);
 
     update_char_offset(match);
@@ -1280,14 +1279,13 @@ static VALUE
 match_byteoffset(VALUE match, VALUE n)
 {
     int i = match_backref_number(match, n);
-    struct re_registers *regs = RMATCH_REGS(match);
 
     match_check(match);
-    backref_number_check(regs, i);
+    backref_number_check(match, i);
 
-    if (BEG(i) < 0)
+    if (RMATCH_BEG(match, i) < 0)
         return rb_assoc_new(Qnil, Qnil);
-    return rb_assoc_new(LONG2NUM(BEG(i)), LONG2NUM(END(i)));
+    return rb_assoc_new(LONG2NUM(RMATCH_BEG(match, i)), LONG2NUM(RMATCH_END(match, i)));
 }
 
 
@@ -1304,14 +1302,13 @@ static VALUE
 match_bytebegin(VALUE match, VALUE n)
 {
     int i = match_backref_number(match, n);
-    struct re_registers *regs = RMATCH_REGS(match);
 
     match_check(match);
-    backref_number_check(regs, i);
+    backref_number_check(match, i);
 
-    if (BEG(i) < 0)
+    if (RMATCH_BEG(match, i) < 0)
         return Qnil;
-    return LONG2NUM(BEG(i));
+    return LONG2NUM(RMATCH_BEG(match, i));
 }
 
 
@@ -1328,14 +1325,13 @@ static VALUE
 match_byteend(VALUE match, VALUE n)
 {
     int i = match_backref_number(match, n);
-    struct re_registers *regs = RMATCH_REGS(match);
 
     match_check(match);
-    backref_number_check(regs, i);
+    backref_number_check(match, i);
 
-    if (BEG(i) < 0)
+    if (RMATCH_BEG(match, i) < 0)
         return Qnil;
-    return LONG2NUM(END(i));
+    return LONG2NUM(RMATCH_END(match, i));
 }
 
 
@@ -1352,12 +1348,11 @@ static VALUE
 match_begin(VALUE match, VALUE n)
 {
     int i = match_backref_number(match, n);
-    struct re_registers *regs = RMATCH_REGS(match);
 
     match_check(match);
-    backref_number_check(regs, i);
+    backref_number_check(match, i);
 
-    if (BEG(i) < 0)
+    if (RMATCH_BEG(match, i) < 0)
         return Qnil;
 
     update_char_offset(match);
@@ -1378,12 +1373,11 @@ static VALUE
 match_end(VALUE match, VALUE n)
 {
     int i = match_backref_number(match, n);
-    struct re_registers *regs = RMATCH_REGS(match);
 
     match_check(match);
-    backref_number_check(regs, i);
+    backref_number_check(match, i);
 
-    if (BEG(i) < 0)
+    if (RMATCH_BEG(match, i) < 0)
         return Qnil;
 
     update_char_offset(match);
@@ -1420,11 +1414,10 @@ static VALUE
 match_nth(VALUE match, VALUE n)
 {
     int i = match_backref_number(match, n);
-    struct re_registers *regs = RMATCH_REGS(match);
 
-    backref_number_check(regs, i);
+    backref_number_check(match, i);
 
-    long start = BEG(i), end = END(i);
+    long start = RMATCH_BEG(match, i), end = RMATCH_END(match, i);
     if (start < 0)
         return Qnil;
 
@@ -1464,12 +1457,11 @@ static VALUE
 match_nth_length(VALUE match, VALUE n)
 {
     int i = match_backref_number(match, n);
-    struct re_registers *regs = RMATCH_REGS(match);
 
     match_check(match);
-    backref_number_check(regs, i);
+    backref_number_check(match, i);
 
-    if (BEG(i) < 0)
+    if (RMATCH_BEG(match, i) < 0)
         return Qnil;
 
     update_char_offset(match);
@@ -1495,11 +1487,8 @@ rb_match_unbusy(VALUE match)
 int
 rb_match_count(VALUE match)
 {
-    struct re_registers *regs;
     if (NIL_P(match)) return -1;
-    regs = RMATCH_REGS(match);
-    if (!regs) return -1;
-    return regs->num_regs;
+    return RMATCH_NREGS(match);
 }
 
 static void
@@ -1892,18 +1881,17 @@ rb_reg_start_with_p(VALUE re, VALUE str)
 VALUE
 rb_reg_nth_defined(int nth, VALUE match)
 {
-    struct re_registers *regs;
     if (NIL_P(match)) return Qnil;
     match_check(match);
-    regs = RMATCH_REGS(match);
-    if (nth >= regs->num_regs) {
+    int num_regs = RMATCH_NREGS(match);
+    if (nth >= num_regs) {
         return Qnil;
     }
     if (nth < 0) {
-        nth += regs->num_regs;
+        nth += num_regs;
         if (nth <= 0) return Qnil;
     }
-    return RBOOL(BEG(nth) != -1);
+    return RBOOL(RMATCH_BEG(match, nth) != -1);
 }
 
 VALUE
@@ -1911,21 +1899,20 @@ rb_reg_nth_match(int nth, VALUE match)
 {
     VALUE str;
     long start, end, len;
-    struct re_registers *regs;
 
     if (NIL_P(match)) return Qnil;
     match_check(match);
-    regs = RMATCH_REGS(match);
-    if (nth >= regs->num_regs) {
+    int num_regs = RMATCH_NREGS(match);
+    if (nth >= num_regs) {
         return Qnil;
     }
     if (nth < 0) {
-        nth += regs->num_regs;
+        nth += num_regs;
         if (nth <= 0) return Qnil;
     }
-    start = BEG(nth);
+    start = RMATCH_BEG(match, nth);
     if (start == -1) return Qnil;
-    end = END(nth);
+    end = RMATCH_END(match, nth);
     len = end - start;
     str = rb_str_subseq(RMATCH(match)->str, start, len);
     return str;
@@ -1959,13 +1946,11 @@ VALUE
 rb_reg_match_pre(VALUE match)
 {
     VALUE str;
-    struct re_registers *regs;
 
     if (NIL_P(match)) return Qnil;
     match_check(match);
-    regs = RMATCH_REGS(match);
-    if (BEG(0) == -1) return Qnil;
-    str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
+    if (RMATCH_BEG(match, 0) == -1) return Qnil;
+    str = rb_str_subseq(RMATCH(match)->str, 0, RMATCH_BEG(match, 0));
     return str;
 }
 
@@ -1993,14 +1978,12 @@ rb_reg_match_post(VALUE match)
 {
     VALUE str;
     long pos;
-    struct re_registers *regs;
 
     if (NIL_P(match)) return Qnil;
     match_check(match);
-    regs = RMATCH_REGS(match);
-    if (BEG(0) == -1) return Qnil;
+    if (RMATCH_BEG(match, 0) == -1) return Qnil;
     str = RMATCH(match)->str;
-    pos = END(0);
+    pos = RMATCH_END(match, 0);
     str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
     return str;
 }
@@ -2009,14 +1992,12 @@ static int
 match_last_index(VALUE match)
 {
     int i;
-    struct re_registers *regs;
 
     if (NIL_P(match)) return -1;
     match_check(match);
-    regs = RMATCH_REGS(match);
-    if (BEG(0) == -1) return -1;
+    if (RMATCH_BEG(match, 0) == -1) return -1;
 
-    for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--)
+    for (i = RMATCH_NREGS(match) - 1; RMATCH_BEG(match, i) == -1 && i > 0; i--)
         ;
     return i;
 }
@@ -2026,8 +2007,8 @@ rb_reg_match_last(VALUE match)
 {
     int i = match_last_index(match);
     if (i <= 0) return Qnil;
-    struct re_registers *regs = RMATCH_REGS(match);
-    return rb_str_subseq(RMATCH(match)->str, BEG(i), END(i) - BEG(i));
+    long start = RMATCH_BEG(match, i);
+    return rb_str_subseq(RMATCH(match)->str, start, RMATCH_END(match, i) - start);
 }
 
 VALUE
@@ -2065,22 +2046,22 @@ last_paren_match_getter(ID _x, VALUE *_y)
 static VALUE
 match_array(VALUE match, int start)
 {
-    struct re_registers *regs;
     VALUE ary;
     VALUE target;
     int i;
 
     match_check(match);
-    regs = RMATCH_REGS(match);
-    ary = rb_ary_new2(regs->num_regs);
+    int num_regs = RMATCH_NREGS(match);
+    ary = rb_ary_new2(num_regs);
     target = RMATCH(match)->str;
 
-    for (i=start; i<regs->num_regs; i++) {
-        if (regs->beg[i] == -1) {
+    for (i = start; i < num_regs; i++) {
+        long beg = RMATCH_BEG(match, i);
+        if (beg == -1) {
             rb_ary_push(ary, Qnil);
         }
         else {
-            VALUE str = rb_str_subseq(target, regs->beg[i], regs->end[i]-regs->beg[i]);
+            VALUE str = rb_str_subseq(target, beg, RMATCH_END(match, i) - beg);
             rb_ary_push(ary, str);
         }
     }
@@ -2165,7 +2146,7 @@ namev_to_backref_number(const struct re_registers *regs, VALUE re, VALUE name)
 static VALUE
 match_ary_subseq(VALUE match, long beg, long len, VALUE result)
 {
-    long olen = RMATCH_REGS(match)->num_regs;
+    long olen = RMATCH_NREGS(match);
     long j, end = olen < beg+len ? olen : beg+len;
     if (NIL_P(result)) result = rb_ary_new_capa(len);
     if (len == 0) return result;
@@ -2183,7 +2164,7 @@ static VALUE
 match_ary_aref(VALUE match, VALUE idx, VALUE result)
 {
     long beg, len;
-    int num_regs = RMATCH_REGS(match)->num_regs;
+    int num_regs = RMATCH_NREGS(match);
 
     /* check if idx is Range */
     switch (rb_range_beg_len(idx, &beg, &len, (long)num_regs, !NIL_P(result))) {
@@ -2261,7 +2242,7 @@ match_aref(int argc, VALUE *argv, VALUE match)
     else {
         long beg = NUM2LONG(idx);
         long len = NUM2LONG(length);
-        long num_regs = RMATCH_REGS(match)->num_regs;
+        long num_regs = RMATCH_NREGS(match);
         if (len < 0) {
             return Qnil;
         }
@@ -2601,8 +2582,7 @@ match_inspect(VALUE match)
     VALUE cname = rb_class_path(rb_obj_class(match));
     VALUE str;
     int i;
-    struct re_registers *regs = RMATCH_REGS(match);
-    int num_regs = regs->num_regs;
+    int num_regs = RMATCH_NREGS(match);
     struct backref_name_tag *names;
     VALUE names_obj = Qnil;
     VALUE regexp = RMATCH(match)->regexp;
diff --git a/string.c b/string.c
index dae7700887c4c4..f179b816e8f52d 100644
--- a/string.c
+++ b/string.c
@@ -4620,8 +4620,7 @@ rb_str_index_m(int argc, VALUE *argv, VALUE str)
 
         if (rb_reg_search(sub, str, pos, 0) >= 0) {
             VALUE match = rb_backref_get();
-            struct re_registers *regs = RMATCH_REGS(match);
-            pos = rb_str_sublen(str, BEG(0));
+            pos = rb_str_sublen(str, RMATCH_BEG(match, 0));
             return LONG2NUM(pos);
         }
     }
@@ -4747,8 +4746,7 @@ rb_str_byteindex_m(int argc, VALUE *argv, VALUE str)
     if (RB_TYPE_P(sub, T_REGEXP)) {
         if (rb_reg_search(sub, str, pos, 0) >= 0) {
             VALUE match = rb_backref_get();
-            struct re_registers *regs = RMATCH_REGS(match);
-            pos = BEG(0);
+            pos = RMATCH_BEG(match, 0);
             return LONG2NUM(pos);
         }
     }
@@ -4879,8 +4877,7 @@ rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
 
         if (rb_reg_search(sub, str, pos, 1) >= 0) {
             VALUE match = rb_backref_get();
-            struct re_registers *regs = RMATCH_REGS(match);
-            pos = rb_str_sublen(str, BEG(0));
+            pos = rb_str_sublen(str, RMATCH_BEG(match, 0));
             return LONG2NUM(pos);
         }
     }
@@ -5037,8 +5034,7 @@ rb_str_byterindex_m(int argc, VALUE *argv, VALUE str)
     if (RB_TYPE_P(sub, T_REGEXP)) {
         if (rb_reg_search(sub, str, pos, 1) >= 0) {
             VALUE match = rb_backref_get();
-            struct re_registers *regs = RMATCH_REGS(match);
-            pos = BEG(0);
+            pos = RMATCH_BEG(match, 0);
             return LONG2NUM(pos);
         }
     }
@@ -5915,26 +5911,25 @@ rb_str_subpat_set(VALUE str, VALUE re, VALUE backref, VALUE val)
     VALUE match;
     long start, end, len;
     rb_encoding *enc;
-    struct re_registers *regs;
 
     if (rb_reg_search(re, str, 0, 0) < 0) {
         rb_raise(rb_eIndexError, "regexp not matched");
     }
     match = rb_backref_get();
     nth = rb_reg_backref_number(match, backref);
-    regs = RMATCH_REGS(match);
-    if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
+    int num_regs = RMATCH_NREGS(match);
+    if ((nth >= num_regs) || ((nth < 0) && (-nth >= num_regs))) {
         rb_raise(rb_eIndexError, "index %d out of regexp", nth);
     }
     if (nth < 0) {
-        nth += regs->num_regs;
+        nth += num_regs;
     }
 
-    start = BEG(nth);
+    start = RMATCH_BEG(match, nth);
     if (start == -1) {
         rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
     }
-    end = END(nth);
+    end = RMATCH_END(match, nth);
     len = end - start;
     StringValue(val);
     enc = rb_enc_check_str(str, val);
@@ -6069,14 +6064,14 @@ rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
     if (RB_TYPE_P(indx, T_REGEXP)) {
         if (rb_reg_search(indx, str, 0, 0) < 0) return Qnil;
         VALUE match = rb_backref_get();
-        struct re_registers *regs = RMATCH_REGS(match);
+        int num_regs = RMATCH_NREGS(match);
         int nth = 0;
         if (argc > 1 && (nth = rb_reg_backref_number(match, argv[1])) < 0) {
-            if ((nth += regs->num_regs) <= 0) return Qnil;
+            if ((nth += num_regs) <= 0) return Qnil;
         }
-        else if (nth >= regs->num_regs) return Qnil;
-        beg = BEG(nth);
-        len = END(nth) - beg;
+        else if (nth >= num_regs) return Qnil;
+        beg = RMATCH_BEG(match, nth);
+        len = RMATCH_END(match, nth) - beg;
         goto subseq;
     }
     else if (argc == 2) {
@@ -9337,18 +9332,16 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
         if (result) result = rb_ary_new();
         long len = RSTRING_LEN(str);
         long start = beg;
-        long idx;
+        int idx;
         int last_null = 0;
-        struct re_registers *regs;
         VALUE match = 0;
 
         for (; rb_reg_search(spat, str, start, 0) >= 0;
              (match ? (rb_match_unbusy(match), rb_backref_set(match)) : (void)0)) {
             match = rb_backref_get();
             if (!result) rb_match_busy(match);
-            regs = RMATCH_REGS(match);
-            end = BEG(0);
-            if (start == end && BEG(0) == END(0)) {
+            end = RMATCH_BEG(match, 0);
+            if (start == end && RMATCH_BEG(match, 0) == RMATCH_END(match, 0)) {
                 if (!ptr) {
                     SPLIT_STR(0, 0);
                     break;
@@ -9368,13 +9361,13 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
             }
             else {
                 SPLIT_STR(beg, end-beg);
-                beg = start = END(0);
+                beg = start = RMATCH_END(match, 0);
             }
             last_null = 0;
 
-            for (idx=1; idx < regs->num_regs; idx++) {
-                if (BEG(idx) == -1) continue;
-                SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
+            for (idx = 1; idx < RMATCH_NREGS(match); idx++) {
+                if (RMATCH_BEG(match, idx) == -1) continue;
+                SPLIT_STR(RMATCH_BEG(match, idx), RMATCH_END(match, idx) - RMATCH_BEG(match, idx));
             }
             if (!NIL_P(limit) && lim <= ++i) break;
         }
@@ -11255,10 +11248,9 @@ rb_str_partition(VALUE str, VALUE sep)
             goto failed;
         }
         VALUE match = rb_backref_get();
-        struct re_registers *regs = RMATCH_REGS(match);
 
-        pos = BEG(0);
-        sep = rb_str_subseq(str, pos, END(0) - pos);
+        pos = RMATCH_BEG(match, 0);
+        sep = rb_str_subseq(str, pos, RMATCH_END(match, 0) - pos);
     }
     else {
         pos = rb_str_index(str, sep, 0);
@@ -11292,10 +11284,9 @@ rb_str_rpartition(VALUE str, VALUE sep)
             goto failed;
         }
         VALUE match = rb_backref_get();
-        struct re_registers *regs = RMATCH_REGS(match);
 
-        pos = BEG(0);
-        sep = rb_str_subseq(str, pos, END(0) - pos);
+        pos = RMATCH_BEG(match, 0);
+        sep = rb_str_subseq(str, pos, RMATCH_END(match, 0) - pos);
     }
     else {
         pos = rb_str_sublen(str, pos);

From ee19cef31e3c2e38056778103a7f878afe8d99bf Mon Sep 17 00:00:00 2001
From: John Hawthorn <john@hawthorn.email>
Date: Fri, 8 May 2026 12:38:55 -0700
Subject: [PATCH 09/12] Replace BEG/END with RMATCH_BEG/RMATCH_END

---
 re.c     | 28 +++++++++++++---------------
 string.c | 31 +++++++++++++------------------
 2 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/re.c b/re.c
index e4f580ecc06513..de46c0e7ca85b7 100644
--- a/re.c
+++ b/re.c
@@ -1004,7 +1004,6 @@ static void
 update_char_offset(VALUE match)
 {
     struct RMatch *rm = RMATCH(match);
-    struct re_registers *regs;
     int i, num_regs, num_pos;
     long c;
     char *s, *p, *q;
@@ -1015,8 +1014,7 @@ update_char_offset(VALUE match)
     if (rm->char_offset_num_allocated)
         return;
 
-    regs = &rm->regs;
-    num_regs = rm->regs.num_regs;
+    num_regs = RMATCH_NREGS(match);
 
     if (rm->char_offset_num_allocated < num_regs) {
         SIZED_REALLOC_N(rm->char_offset, struct rmatch_offset, num_regs, rm->char_offset_num_allocated);
@@ -1026,8 +1024,8 @@ update_char_offset(VALUE match)
     enc = rb_enc_get(RMATCH(match)->str);
     if (rb_enc_mbmaxlen(enc) == 1) {
         for (i = 0; i < num_regs; i++) {
-            rm->char_offset[i].beg = BEG(i);
-            rm->char_offset[i].end = END(i);
+            rm->char_offset[i].beg = RMATCH_BEG(match, i);
+            rm->char_offset[i].end = RMATCH_END(match, i);
         }
         return;
     }
@@ -1035,10 +1033,10 @@ update_char_offset(VALUE match)
     pairs = RB_ALLOCV_N(pair_t, pairs_obj, num_regs * 2);
     num_pos = 0;
     for (i = 0; i < num_regs; i++) {
-        if (BEG(i) < 0)
+        if (RMATCH_BEG(match, i) < 0)
             continue;
-        pairs[num_pos++].byte_pos = BEG(i);
-        pairs[num_pos++].byte_pos = END(i);
+        pairs[num_pos++].byte_pos = RMATCH_BEG(match, i);
+        pairs[num_pos++].byte_pos = RMATCH_END(match, i);
     }
     qsort(pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
 
@@ -1053,17 +1051,17 @@ update_char_offset(VALUE match)
 
     for (i = 0; i < num_regs; i++) {
         pair_t key, *found;
-        if (BEG(i) < 0) {
+        if (RMATCH_BEG(match, i) < 0) {
             rm->char_offset[i].beg = -1;
             rm->char_offset[i].end = -1;
             continue;
         }
 
-        key.byte_pos = BEG(i);
+        key.byte_pos = RMATCH_BEG(match, i);
         found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
         rm->char_offset[i].beg = found->char_pos;
 
-        key.byte_pos = END(i);
+        key.byte_pos = RMATCH_END(match, i);
         found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
         rm->char_offset[i].end = found->char_pos;
     }
@@ -3637,7 +3635,7 @@ match_integer_at(int argc, VALUE *argv, VALUE match)
 
     int base = 10;
     VALUE idx;
-    long nth;
+    int nth;
 
     argc = rb_check_arity(argc, 1, 2);
     if (FIXNUM_P(idx = argv[0])) {
@@ -3651,10 +3649,10 @@ match_integer_at(int argc, VALUE *argv, VALUE match)
         rb_raise(rb_eArgError, "invalid radix %d", base);
     }
 
-    if (nth >= regs->num_regs) return Qnil;
-    if (nth < 0 && (nth += regs->num_regs) <= 0) return Qnil;
+    if (nth >= RMATCH_NREGS(match)) return Qnil;
+    if (nth < 0 && (nth += RMATCH_NREGS(match)) <= 0) return Qnil;
 
-    long start = BEG(nth), end = END(nth);
+    long start = RMATCH_BEG(match, nth), end = RMATCH_END(match, nth);
     if (start < 0) return Qnil;
     RUBY_ASSERT(start <= end, "%ld > %ld", start, end);
 
diff --git a/string.c b/string.c
index f179b816e8f52d..2d7bd4ee74401c 100644
--- a/string.c
+++ b/string.c
@@ -60,9 +60,6 @@
 # define HAVE_CRYPT_R 1
 #endif
 
-#define BEG(no) (regs->beg[(no)])
-#define END(no) (regs->end[(no)])
-
 #undef rb_str_new
 #undef rb_usascii_str_new
 #undef rb_utf8_str_new
@@ -6273,8 +6270,8 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
             match0 = pat;
         }
         else {
-            beg0 = BEG(0);
-            end0 = END(0);
+            beg0 = RMATCH_BEG(match, 0);
+            end0 = RMATCH_END(match, 0);
             if (iter) match0 = rb_reg_nth_match(0, match);
         }
 
@@ -6419,8 +6416,8 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang)
             match0 = pat;
         }
         else {
-            beg0 = BEG(0);
-            end0 = END(0);
+            beg0 = RMATCH_BEG(match, 0);
+            end0 = RMATCH_END(match, 0);
             if (mode == ITER) match0 = rb_reg_nth_match(0, match);
         }
 
@@ -10645,17 +10642,14 @@ scan_once(VALUE str, VALUE pat, long *start, int set_backref_str)
     VALUE result = Qnil;
     long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
     if (pos >= 0) {
-        VALUE match;
-        struct re_registers *regs;
+        VALUE match = Qnil;
         if (BUILTIN_TYPE(pat) == T_STRING) {
-            regs = NULL;
             end = pos + RSTRING_LEN(pat);
         }
         else {
             match = rb_backref_get();
-            regs = RMATCH_REGS(match);
-            pos = BEG(0);
-            end = END(0);
+            pos = RMATCH_BEG(match, 0);
+            end = RMATCH_END(match, 0);
         }
 
         if (pos == end) {
@@ -10673,16 +10667,17 @@ scan_once(VALUE str, VALUE pat, long *start, int set_backref_str)
             *start = end;
         }
 
-        if (!regs || regs->num_regs == 1) {
+        if (NIL_P(match) || RMATCH_NREGS(match) == 1) {
             result = rb_str_subseq(str, pos, end - pos);
             return result;
         }
         else {
-            result = rb_ary_new2(regs->num_regs);
-            for (int i = 1; i < regs->num_regs; i++) {
+            int num_regs = RMATCH_NREGS(match);
+            result = rb_ary_new2(num_regs);
+            for (int i = 1; i < num_regs; i++) {
                 VALUE s = Qnil;
-                if (BEG(i) >= 0) {
-                    s = rb_str_subseq(str, BEG(i), END(i)-BEG(i));
+                if (RMATCH_BEG(match, i) >= 0) {
+                    s = rb_str_subseq(str, RMATCH_BEG(match, i), RMATCH_END(match, i) - RMATCH_BEG(match, i));
                 }
 
                 rb_ary_push(result, s);

From 12bb8955263bb8cb82bdcd642556ab360cbb7b12 Mon Sep 17 00:00:00 2001
From: John Hawthorn <john@hawthorn.email>
Date: Fri, 8 May 2026 12:46:41 -0700
Subject: [PATCH 10/12] Introduce RMATCH_{BEG,END}_PTR

---
 internal/re.h | 16 ++++++++++++++--
 re.c          | 20 ++++++++------------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/internal/re.h b/internal/re.h
index 52a05902adaf23..aa1c93f64275dd 100644
--- a/internal/re.h
+++ b/internal/re.h
@@ -12,16 +12,28 @@
 #include "ruby/ruby.h"          /* for VALUE */
 #include "ruby/re.h"            /* for struct RMatch and struct re_registers */
 
+static inline OnigPosition *
+RMATCH_BEG_PTR(VALUE match)
+{
+    return RMATCH(match)->regs.beg;
+}
+
+static inline OnigPosition *
+RMATCH_END_PTR(VALUE match)
+{
+    return RMATCH(match)->regs.end;
+}
+
 static inline long
 RMATCH_BEG(VALUE match, int i)
 {
-    return RMATCH(match)->regs.beg[i];
+    return RMATCH_BEG_PTR(match)[i];
 }
 
 static inline long
 RMATCH_END(VALUE match, int i)
 {
-    return RMATCH(match)->regs.end[i];
+    return RMATCH_END_PTR(match)[i];
 }
 
 static inline int
diff --git a/re.c b/re.c
index de46c0e7ca85b7..bb6af74eb57f36 100644
--- a/re.c
+++ b/re.c
@@ -3553,16 +3553,15 @@ rb_reg_equal(VALUE re1, VALUE re2)
 static VALUE
 match_hash(VALUE match)
 {
-    const struct re_registers *regs;
     st_index_t hashval;
 
     match_check(match);
     hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));
     hashval = rb_hash_uint(hashval, reg_hash(match_regexp(match)));
-    regs = RMATCH_REGS(match);
-    hashval = rb_hash_uint(hashval, regs->num_regs);
-    hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
-    hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
+    int num_regs = RMATCH_NREGS(match);
+    hashval = rb_hash_uint(hashval, num_regs);
+    hashval = rb_hash_uint(hashval, rb_memhash(RMATCH_BEG_PTR(match), num_regs * sizeof(OnigPosition)));
+    hashval = rb_hash_uint(hashval, rb_memhash(RMATCH_END_PTR(match), num_regs * sizeof(OnigPosition)));
     hashval = rb_hash_end(hashval);
     return ST2FIX(hashval);
 }
@@ -3579,18 +3578,15 @@ match_hash(VALUE match)
 static VALUE
 match_equal(VALUE match1, VALUE match2)
 {
-    const struct re_registers *regs1, *regs2;
-
     if (match1 == match2) return Qtrue;
     if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
     if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse;
     if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
     if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse;
-    regs1 = RMATCH_REGS(match1);
-    regs2 = RMATCH_REGS(match2);
-    if (regs1->num_regs != regs2->num_regs) return Qfalse;
-    if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
-    if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
+    int num_regs = RMATCH_NREGS(match1);
+    if (num_regs != RMATCH_NREGS(match2)) return Qfalse;
+    if (memcmp(RMATCH_BEG_PTR(match1), RMATCH_BEG_PTR(match2), num_regs * sizeof(OnigPosition))) return Qfalse;
+    if (memcmp(RMATCH_END_PTR(match1), RMATCH_END_PTR(match2), num_regs * sizeof(OnigPosition))) return Qfalse;
     return Qtrue;
 }
 

From e9e4647e6667743e26db037beaa6a56bc8c70f48 Mon Sep 17 00:00:00 2001
From: Aaron Patterson <tenderlove@ruby-lang.org>
Date: Fri, 8 May 2026 14:23:18 -0700
Subject: [PATCH 11/12] ZJIT: add an unreachable instruction (#16901)

Unreachable instructions terminate blocks.  We'll use this mostly for
testing as a terminator instruction (since traditional BB's will require
all blocks to end with a terminator)
---
 zjit/src/asm/arm64/inst/mod.rs |  2 ++
 zjit/src/asm/arm64/inst/udf.rs | 52 ++++++++++++++++++++++++++++++++++
 zjit/src/asm/arm64/mod.rs      |  6 ++++
 zjit/src/backend/arm64/mod.rs  |  3 ++
 zjit/src/backend/lir.rs        | 13 +++++++--
 zjit/src/backend/x86_64/mod.rs |  1 +
 zjit/src/codegen.rs            |  1 +
 zjit/src/hir.rs                | 16 +++++++----
 8 files changed, 87 insertions(+), 7 deletions(-)
 create mode 100644 zjit/src/asm/arm64/inst/udf.rs

diff --git a/zjit/src/asm/arm64/inst/mod.rs b/zjit/src/asm/arm64/inst/mod.rs
index bfffd914efe29a..270c784f270410 100644
--- a/zjit/src/asm/arm64/inst/mod.rs
+++ b/zjit/src/asm/arm64/inst/mod.rs
@@ -26,6 +26,7 @@ mod sbfm;
 mod shift_imm;
 mod sys_reg;
 mod test_bit;
+mod udf;
 
 pub use atomic::Atomic;
 pub use branch::Branch;
@@ -52,3 +53,4 @@ pub use sbfm::SBFM;
 pub use shift_imm::ShiftImm;
 pub use sys_reg::SysReg;
 pub use test_bit::TestBit;
+pub use udf::Udf;
diff --git a/zjit/src/asm/arm64/inst/udf.rs b/zjit/src/asm/arm64/inst/udf.rs
new file mode 100644
index 00000000000000..297d17ed628adf
--- /dev/null
+++ b/zjit/src/asm/arm64/inst/udf.rs
@@ -0,0 +1,52 @@
+/// The struct that represents an A64 permanently undefined instruction.
+///
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// |  0  0  0  0    0  0  0  0    0  0  0  0    0  0  0  0                                                         |
+/// |                                                         imm16..................................................|
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct Udf {
+    /// The immediate value encoded in the instruction
+    imm16: u16
+}
+
+impl Udf {
+    /// UDF - Permanently Undefined
+    /// <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UDF--Permanently-Undefined->
+    pub fn udf(imm16: u16) -> Self {
+        Self { imm16 }
+    }
+}
+
+impl From<Udf> for u32 {
+    /// Convert an instruction into a 32-bit value.
+    fn from(inst: Udf) -> Self {
+        inst.imm16 as u32
+    }
+}
+
+impl From<Udf> for [u8; 4] {
+    /// Convert an instruction into a 4 byte array.
+    fn from(inst: Udf) -> [u8; 4] {
+        let result: u32 = inst.into();
+        result.to_le_bytes()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_udf() {
+        let result: u32 = Udf::udf(0).into();
+        assert_eq!(0x00000000, result);
+    }
+
+    #[test]
+    fn test_udf_imm() {
+        let result: u32 = Udf::udf(1).into();
+        assert_eq!(0x00000001, result);
+    }
+}
diff --git a/zjit/src/asm/arm64/mod.rs b/zjit/src/asm/arm64/mod.rs
index a360d7738b2dbf..b53f1cf6733664 100644
--- a/zjit/src/asm/arm64/mod.rs
+++ b/zjit/src/asm/arm64/mod.rs
@@ -321,6 +321,12 @@ pub fn brk(cb: &mut CodeBlock, imm16: A64Opnd) {
     cb.write_bytes(&bytes);
 }
 
+/// UDF - permanently undefined instruction
+pub fn udf(cb: &mut CodeBlock, imm16: u16) {
+    let bytes: [u8; 4] = Udf::udf(imm16).into();
+    cb.write_bytes(&bytes);
+}
+
 /// CMP - compare rn and rm, update flags
 pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) {
     let bytes: [u8; 4] = match (rn, rm) {
diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs
index 54c803168dc3a7..4d7aa2c9533186 100644
--- a/zjit/src/backend/arm64/mod.rs
+++ b/zjit/src/backend/arm64/mod.rs
@@ -1561,6 +1561,9 @@ impl Assembler {
                 Insn::Breakpoint => {
                     brk(cb, A64Opnd::None);
                 },
+                Insn::Abort => {
+                    udf(cb, u16::MAX);
+                },
                 Insn::CSelZ { truthy, falsy, out } |
                 Insn::CSelE { truthy, falsy, out } => {
                     csel(cb, out.into(), truthy.into(), falsy.into(), Condition::EQ);
diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs
index bb8d1e1e735b03..7335680f84fac1 100644
--- a/zjit/src/backend/lir.rs
+++ b/zjit/src/backend/lir.rs
@@ -653,6 +653,9 @@ pub enum Insn {
     #[allow(dead_code)]
     Breakpoint,
 
+    // Abort the process
+    Abort,
+
     /// Add a comment into the IR at the point that this instruction is added.
     /// It won't have any impact on that actual compiled code.
     Comment(String),
@@ -895,6 +898,7 @@ impl Insn {
             Insn::And { .. } => "And",
             Insn::BakeString(_) => "BakeString",
             Insn::Breakpoint => "Breakpoint",
+            Insn::Abort => "Abort",
             Insn::Comment(_) => "Comment",
             Insn::Cmp { .. } => "Cmp",
             Insn::CPop { .. } => "CPop",
@@ -1185,7 +1189,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
             }
 
             Insn::BakeString(_) |
-            Insn::Breakpoint |
+            Insn::Breakpoint | Insn::Abort |
             Insn::Comment(_) |
             Insn::CPop { .. } |
             Insn::PadPatchPoint |
@@ -1363,7 +1367,7 @@ impl<'a> InsnOpndMutIterator<'a> {
             }
 
             Insn::BakeString(_) |
-            Insn::Breakpoint |
+            Insn::Breakpoint | Insn::Abort |
             Insn::Comment(_) |
             Insn::CPop { .. } |
             Insn::FrameSetup { .. } |
@@ -3465,6 +3469,11 @@ impl Assembler {
         self.push_insn(Insn::Breakpoint);
     }
 
+    #[allow(dead_code)]
+    pub fn abort(&mut self) {
+        self.push_insn(Insn::Abort);
+    }
+
     /// Call a C function without PosMarkers
     pub fn ccall(&mut self, fptr: *const u8, opnds: Vec<Opnd>) -> Opnd {
         let canary_opnd = self.set_stack_canary();
diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs
index 3904bfd71f3dcb..a3af9856dab291 100644
--- a/zjit/src/backend/x86_64/mod.rs
+++ b/zjit/src/backend/x86_64/mod.rs
@@ -1089,6 +1089,7 @@ impl Assembler {
                 },
 
                 Insn::Breakpoint => int3(cb),
+                Insn::Abort => ud2(cb),
 
                 Insn::CSelZ { truthy, falsy, out } => {
                     emit_csel(cb, *truthy, *falsy, *out, cmovz, cmovnz);
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index b9b8b6509abfb2..097257ddf85ede 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -754,6 +754,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
         Insn::ObjToString { val, cd, state, .. } => gen_objtostring(jit, asm, opnd!(val), *cd, &function.frame_state(*state)),
         &Insn::CheckInterrupts { state } => no_output!(gen_check_interrupts(jit, asm, &function.frame_state(state))),
         Insn::BreakPoint => no_output!(asm.breakpoint()),
+        Insn::Unreachable => no_output!(asm.abort()),
         &Insn::HashDup { val, state } => { gen_hash_dup(asm, opnd!(val), &function.frame_state(state)) },
         &Insn::HashAref { hash, key, state } => { gen_hash_aref(jit, asm, opnd!(hash), opnd!(key), &function.frame_state(state)) },
         &Insn::HashAset { hash, key, val, state } => { no_output!(gen_hash_aset(jit, asm, opnd!(hash), opnd!(key), opnd!(val), &function.frame_state(state))) },
diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index 27bf5df42741ee..1d8358cbada039 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -1147,6 +1147,11 @@ pub enum Insn {
     CheckInterrupts { state: InsnId },
 
     BreakPoint,
+
+    /// Only use this instruction in tests where you need to end a block with
+    /// a terminator, but don't ever expect the code to be executed.  This
+    /// instruction should never be generated from iseq_to_hir
+    Unreachable,
 }
 
 /// Macro that enumerates all operands of an Insn, dispatching to caller-provided
@@ -1165,7 +1170,7 @@ macro_rules! for_each_operand_impl {
             | Insn::LoadEC
             | Insn::GetEP { .. }
             | Insn::LoadSelf
-            | Insn::BreakPoint
+            | Insn::BreakPoint | Insn::Unreachable
             | Insn::PutSpecialObject { .. }
             | Insn::IncrCounter(_)
             | Insn::IncrCounterPtr { .. } => {}
@@ -1471,7 +1476,7 @@ impl Insn {
             | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. }
             | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetGlobal { .. }
             | Insn::SetLocal { .. } | Insn::Throw { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. }
-            | Insn::CheckInterrupts { .. } | Insn::BreakPoint
+            | Insn::CheckInterrupts { .. } | Insn::BreakPoint | Insn::Unreachable
             | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. }
             | Insn::ArrayAset { .. } => false,
             _ => true,
@@ -1698,7 +1703,7 @@ impl Insn {
                     abstract_heaps::Control
                 ),
             Insn::Entries { .. } => effects::Any,
-            Insn::BreakPoint => Effect::read_write(abstract_heaps::Empty, abstract_heaps::Control),
+            Insn::BreakPoint | Insn::Unreachable => Effect::read_write(abstract_heaps::Empty, abstract_heaps::Control),
         }
     }
 
@@ -2223,6 +2228,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
             Insn::CheckInterrupts { .. } => write!(f, "CheckInterrupts"),
             Insn::IsA { val, class } => write!(f, "IsA {val}, {class}"),
             Insn::BreakPoint => write!(f, "BreakPoint"),
+            Insn::Unreachable => write!(f, "Unreachable"),
         }
     }
 }
@@ -2837,7 +2843,7 @@ impl Function {
             | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. }
             | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. }
             | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. }
-            | Insn::CheckInterrupts { .. } | Insn::BreakPoint
+            | Insn::CheckInterrupts { .. } | Insn::BreakPoint | Insn::Unreachable
             | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } =>
                 panic!("Cannot infer type of instruction with no output: {}. See Insn::has_output().", self.insns[insn.0]),
             Insn::Const { val: Const::Value(val) } => Type::from_value(*val),
@@ -5810,7 +5816,7 @@ impl Function {
             | Insn::LoadSP
             | Insn::LoadEC
             | Insn::GetEP { .. }
-            | Insn::BreakPoint
+            | Insn::BreakPoint | Insn::Unreachable
             | Insn::LoadSelf
             | Insn::Snapshot { .. }
             | Insn::Jump { .. }

From 5faeea873ba4249c685c7f390e939560d736fea8 Mon Sep 17 00:00:00 2001
From: Peter Zhu <peter@peterzhu.ca>
Date: Fri, 8 May 2026 18:02:12 -0400
Subject: [PATCH 12/12] Set EC in rb_gc_event_hook

The event hook may use the EC and it will be null when it is running from
a GC thread.
---
 gc.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/gc.c b/gc.c
index 0976d6b7b0b0cb..1d3f6fa6ed15be 100644
--- a/gc.c
+++ b/gc.c
@@ -242,7 +242,30 @@ rb_gc_event_hook(VALUE obj, rb_event_flag_t event)
     rb_execution_context_t *ec = rb_gc_get_ec();
     if (!ec->cfp) return;
 
+#if USE_MODULAR_GC
+    bool gc_thread_p = false;
+    if (!GET_EC()) {
+        gc_thread_p = true;
+
+# ifdef RB_THREAD_LOCAL_SPECIFIER
+        rb_current_ec_set(ec);
+# else
+        native_tls_set(ruby_current_ec_key, ec);
+# endif
+    }
+#endif
+
     EXEC_EVENT_HOOK(ec, event, ec->cfp->self, 0, 0, 0, obj);
+
+#if USE_MODULAR_GC
+    if (gc_thread_p) {
+# ifdef RB_THREAD_LOCAL_SPECIFIER
+        rb_current_ec_set(NULL);
+# else
+        native_tls_set(ruby_current_ec_key, NULL);
+# endif
+    }
+#endif
 }
 
 void *