diff --git a/.gdbinit b/.gdbinit index bda544c64136ff..0d585beef9eeaa 100644 --- a/.gdbinit +++ b/.gdbinit @@ -979,7 +979,7 @@ end define print_lineno set $cfp = $arg0 - set $iseq = $cfp->iseq + set $iseq = rb_get_cfp_iseq($cfp) set $pos = $cfp->pc - $iseq->body->iseq_encoded if $pos != 0 set $pos = $pos - 1 @@ -1060,7 +1060,7 @@ define print_id else set $serial = (rb_id_serial_t)$id end - if $serial && $serial <= ruby_global_symbols.last_id + if $serial && $serial < ruby_global_symbols.next_id set $idx = $serial / ID_ENTRY_UNIT set $ids = (struct RArray *)ruby_global_symbols.ids set $flags = $ids->basic.flags @@ -1083,7 +1083,7 @@ define print_id set $aryptr = $ary->as.heap.ptr set $arylen = $ary->as.heap.len end - set $result = $aryptr[($serial % ID_ENTRY_UNIT) * ID_ENTRY_SIZE + $t] + set $result = $aryptr[($serial % ID_ENTRY_UNIT) + $t] if $result != RUBY_Qnil print_string $result else @@ -1117,16 +1117,17 @@ define rb_ps_thread set $cfp = $ps_thread_th->ec->cfp set $cfpend = (rb_control_frame_t *)($ps_thread_th->ec->vm_stack + $ps_thread_th->ec->vm_stack_size)-1 while $cfp < $cfpend - if $cfp->iseq - if !((VALUE)$cfp->iseq & RUBY_IMMEDIATE_MASK) && (((imemo_ifunc << RUBY_FL_USHIFT) | RUBY_T_IMEMO)==$cfp->iseq->flags & ((RUBY_IMEMO_MASK << RUBY_FL_USHIFT) | RUBY_T_MASK)) + if $cfp->_iseq + set $iseq = rb_get_cfp_iseq($cfp) + if !((VALUE)$iseq & RUBY_IMMEDIATE_MASK) && (((imemo_ifunc << RUBY_FL_USHIFT) | RUBY_T_IMEMO)==$iseq->flags & ((RUBY_IMEMO_MASK << RUBY_FL_USHIFT) | RUBY_T_MASK)) printf "%d:ifunc ", $cfpend-$cfp set print symbol-filename on - output/a $cfp->iseq.body + output/a $iseq.body set print symbol-filename off printf "\n" else if $cfp->pc - set $location = $cfp->iseq->body->location + set $location = $iseq->body->location printf "%d:", $cfpend-$cfp print_pathobj $location.pathobj printf ":" diff --git a/benchmark/int_to_s.yml b/benchmark/int_to_s.yml new file mode 100644 index 00000000000000..000dae9612ec54 --- /dev/null +++ b/benchmark/int_to_s.yml @@ -0,0 +1,25 @@ +prelude: | + # frozen_string_literal: true + N1 = 5 + N2 = 42 + N3 = 400 + N5 = 12345 + N10 = 1_234_567_890 + N19 = 4_611_686_018_427_387_903 + NEG = -1_234_567_890 + BIG20 = 10 ** 19 + 12_345_678_901_234_567 + BIG40 = 10 ** 39 + 123_456_789_012_345 + BIG100 = 10 ** 99 + 42 +benchmark: + fix_1digit: "N1.to_s" + fix_2digit: "N2.to_s" + fix_3digit: "N3.to_s" + fix_5digit: "N5.to_s" + fix_10digit: "N10.to_s" + fix_19digit: "N19.to_s" + fix_negative: "NEG.to_s" + big_20digit: "BIG20.to_s" + big_40digit: "BIG40.to_s" + big_100digit: "BIG100.to_s" + interp_id: '"id=#{N10}"' + interp_mixed: '"a=#{N2},b=#{N5},c=#{N10}"' diff --git a/bignum.c b/bignum.c index e4af035caccedd..28924b4eb9cd09 100644 --- a/bignum.c +++ b/bignum.c @@ -64,6 +64,21 @@ static const bool debug_integer_pack = ( const char ruby_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; +/* Two-digit decimal lookup table. Offset 2*n holds the ASCII pair for + * n in the range 0..99. Used by both rb_fix2str in numeric.c and + * big2str_2bdigits below to emit two base-10 digits per iteration. */ +const char ruby_decimal_digit_pairs[201] = + "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; + #ifndef SIZEOF_BDIGIT_DBL # if SIZEOF_INT*2 <= SIZEOF_LONG_LONG # define SIZEOF_BDIGIT_DBL SIZEOF_LONG_LONG @@ -4811,11 +4826,34 @@ big2str_2bdigits(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t tail return; p = buf; j = sizeof(buf); - do { - BDIGIT_DBL idx = num % b2s->base; - num /= b2s->base; - p[--j] = ruby_digitmap[idx]; - } while (num); + if (b2s->base == 10) { + /* Emit two decimal digits per iteration from ruby_decimal_digit_pairs. + * See the comment on the table in bignum.c near ruby_digitmap. */ + while (num >= 100) { + BDIGIT_DBL idx = (num % 100) * 2; + num /= 100; + j -= 2; + p[j] = ruby_decimal_digit_pairs[idx]; + p[j + 1] = ruby_decimal_digit_pairs[idx + 1]; + } + if (num >= 10) { + BDIGIT_DBL idx = num * 2; + j -= 2; + p[j] = ruby_decimal_digit_pairs[idx]; + p[j + 1] = ruby_decimal_digit_pairs[idx + 1]; + } + else { + /* num is 1..9 here (0 was handled above) */ + p[--j] = (char)('0' + num); + } + } + else { + do { + BDIGIT_DBL idx = num % b2s->base; + num /= b2s->base; + p[--j] = ruby_digitmap[idx]; + } while (num); + } len = sizeof(buf) - j; big2str_alloc(b2s, len + taillen); MEMCPY(b2s->ptr, buf + j, char, len); @@ -4823,11 +4861,39 @@ big2str_2bdigits(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t tail else { p = b2s->ptr; j = b2s->hbase2_numdigits; - do { - BDIGIT_DBL idx = num % b2s->base; - num /= b2s->base; - p[--j] = ruby_digitmap[idx]; - } while (j); + if (b2s->base == 10) { + /* Non-beginning chunks must emit EXACTLY hbase2_numdigits, + * zero-padded on the left. Consume num in 2-digit groups, + * handle the odd trailing digit, then memset remaining + * positions with '0'. */ + while (num >= 100) { + BDIGIT_DBL idx = (num % 100) * 2; + num /= 100; + j -= 2; + p[j] = ruby_decimal_digit_pairs[idx]; + p[j + 1] = ruby_decimal_digit_pairs[idx + 1]; + } + if (num >= 10) { + BDIGIT_DBL idx = num * 2; + j -= 2; + p[j] = ruby_decimal_digit_pairs[idx]; + p[j + 1] = ruby_decimal_digit_pairs[idx + 1]; + } + else if (num > 0) { + p[--j] = (char)('0' + num); + } + if (j > 0) { + memset(p, '0', j); + j = 0; + } + } + else { + do { + BDIGIT_DBL idx = num % b2s->base; + num /= b2s->base; + p[--j] = ruby_digitmap[idx]; + } while (j); + } len = b2s->hbase2_numdigits; } b2s->ptr += len; diff --git a/depend b/depend index b4a7882d8b30f5..a17eb16f758660 100644 --- a/depend +++ b/depend @@ -4878,6 +4878,7 @@ enum.$(OBJEXT): {$(VPATH)}internal/core/rclass.h enum.$(OBJEXT): {$(VPATH)}internal/core/rdata.h enum.$(OBJEXT): {$(VPATH)}internal/core/rfile.h enum.$(OBJEXT): {$(VPATH)}internal/core/rhash.h +enum.$(OBJEXT): {$(VPATH)}internal/core/rmatch.h enum.$(OBJEXT): {$(VPATH)}internal/core/robject.h enum.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h enum.$(OBJEXT): {$(VPATH)}internal/core/rstring.h @@ -4968,6 +4969,8 @@ enum.$(OBJEXT): {$(VPATH)}internal/xmalloc.h enum.$(OBJEXT): {$(VPATH)}missing.h enum.$(OBJEXT): {$(VPATH)}onigmo.h enum.$(OBJEXT): {$(VPATH)}oniguruma.h +enum.$(OBJEXT): {$(VPATH)}re.h +enum.$(OBJEXT): {$(VPATH)}regex.h enum.$(OBJEXT): {$(VPATH)}ruby_assert.h enum.$(OBJEXT): {$(VPATH)}shape.h enum.$(OBJEXT): {$(VPATH)}st.h @@ -8965,6 +8968,7 @@ marshal.$(OBJEXT): {$(VPATH)}internal/core/rclass.h marshal.$(OBJEXT): {$(VPATH)}internal/core/rdata.h marshal.$(OBJEXT): {$(VPATH)}internal/core/rfile.h marshal.$(OBJEXT): {$(VPATH)}internal/core/rhash.h +marshal.$(OBJEXT): {$(VPATH)}internal/core/rmatch.h marshal.$(OBJEXT): {$(VPATH)}internal/core/robject.h marshal.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h marshal.$(OBJEXT): {$(VPATH)}internal/core/rstring.h @@ -9060,6 +9064,8 @@ marshal.$(OBJEXT): {$(VPATH)}missing.h marshal.$(OBJEXT): {$(VPATH)}node.h marshal.$(OBJEXT): {$(VPATH)}onigmo.h marshal.$(OBJEXT): {$(VPATH)}oniguruma.h +marshal.$(OBJEXT): {$(VPATH)}re.h +marshal.$(OBJEXT): {$(VPATH)}regex.h marshal.$(OBJEXT): {$(VPATH)}ruby_assert.h marshal.$(OBJEXT): {$(VPATH)}ruby_atomic.h marshal.$(OBJEXT): {$(VPATH)}rubyparser.h @@ -10952,6 +10958,7 @@ parse.$(OBJEXT): {$(VPATH)}internal/core/rclass.h parse.$(OBJEXT): {$(VPATH)}internal/core/rdata.h parse.$(OBJEXT): {$(VPATH)}internal/core/rfile.h parse.$(OBJEXT): {$(VPATH)}internal/core/rhash.h +parse.$(OBJEXT): {$(VPATH)}internal/core/rmatch.h parse.$(OBJEXT): {$(VPATH)}internal/core/robject.h parse.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h parse.$(OBJEXT): {$(VPATH)}internal/core/rstring.h @@ -11054,6 +11061,7 @@ parse.$(OBJEXT): {$(VPATH)}parser_st.h parse.$(OBJEXT): {$(VPATH)}probes.dmyh parse.$(OBJEXT): {$(VPATH)}probes.h parse.$(OBJEXT): {$(VPATH)}ractor.h +parse.$(OBJEXT): {$(VPATH)}re.h parse.$(OBJEXT): {$(VPATH)}regenc.h parse.$(OBJEXT): {$(VPATH)}regex.h parse.$(OBJEXT): {$(VPATH)}ruby_assert.h @@ -15946,6 +15954,7 @@ ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rclass.h ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rdata.h ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rfile.h ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rhash.h +ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rmatch.h ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/robject.h ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h ruby_parser.$(OBJEXT): {$(VPATH)}internal/core/rstring.h @@ -16037,6 +16046,8 @@ ruby_parser.$(OBJEXT): {$(VPATH)}missing.h ruby_parser.$(OBJEXT): {$(VPATH)}node.h ruby_parser.$(OBJEXT): {$(VPATH)}onigmo.h ruby_parser.$(OBJEXT): {$(VPATH)}oniguruma.h +ruby_parser.$(OBJEXT): {$(VPATH)}re.h +ruby_parser.$(OBJEXT): {$(VPATH)}regex.h ruby_parser.$(OBJEXT): {$(VPATH)}ruby_assert.h ruby_parser.$(OBJEXT): {$(VPATH)}ruby_parser.c ruby_parser.$(OBJEXT): {$(VPATH)}rubyparser.h @@ -19345,6 +19356,7 @@ variable.$(OBJEXT): {$(VPATH)}internal/core/rclass.h variable.$(OBJEXT): {$(VPATH)}internal/core/rdata.h variable.$(OBJEXT): {$(VPATH)}internal/core/rfile.h variable.$(OBJEXT): {$(VPATH)}internal/core/rhash.h +variable.$(OBJEXT): {$(VPATH)}internal/core/rmatch.h variable.$(OBJEXT): {$(VPATH)}internal/core/robject.h variable.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h variable.$(OBJEXT): {$(VPATH)}internal/core/rstring.h @@ -19439,6 +19451,8 @@ variable.$(OBJEXT): {$(VPATH)}onigmo.h variable.$(OBJEXT): {$(VPATH)}oniguruma.h variable.$(OBJEXT): {$(VPATH)}ractor.h variable.$(OBJEXT): {$(VPATH)}ractor_core.h +variable.$(OBJEXT): {$(VPATH)}re.h +variable.$(OBJEXT): {$(VPATH)}regex.h variable.$(OBJEXT): {$(VPATH)}ruby_assert.h variable.$(OBJEXT): {$(VPATH)}ruby_atomic.h variable.$(OBJEXT): {$(VPATH)}rubyparser.h @@ -19839,6 +19853,7 @@ vm.$(OBJEXT): {$(VPATH)}internal/core/rclass.h vm.$(OBJEXT): {$(VPATH)}internal/core/rdata.h vm.$(OBJEXT): {$(VPATH)}internal/core/rfile.h vm.$(OBJEXT): {$(VPATH)}internal/core/rhash.h +vm.$(OBJEXT): {$(VPATH)}internal/core/rmatch.h vm.$(OBJEXT): {$(VPATH)}internal/core/robject.h vm.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h vm.$(OBJEXT): {$(VPATH)}internal/core/rstring.h @@ -19941,6 +19956,8 @@ vm.$(OBJEXT): {$(VPATH)}probes.h vm.$(OBJEXT): {$(VPATH)}probes_helper.h vm.$(OBJEXT): {$(VPATH)}ractor.h vm.$(OBJEXT): {$(VPATH)}ractor_core.h +vm.$(OBJEXT): {$(VPATH)}re.h +vm.$(OBJEXT): {$(VPATH)}regex.h vm.$(OBJEXT): {$(VPATH)}ruby_assert.h vm.$(OBJEXT): {$(VPATH)}ruby_atomic.h vm.$(OBJEXT): {$(VPATH)}rubyparser.h diff --git a/ext/ripper/depend b/ext/ripper/depend index 96d41c87b89ac0..db83378a1d53db 100644 --- a/ext/ripper/depend +++ b/ext/ripper/depend @@ -474,6 +474,7 @@ ripper.o: $(hdrdir)/ruby/internal/core/rclass.h ripper.o: $(hdrdir)/ruby/internal/core/rdata.h ripper.o: $(hdrdir)/ruby/internal/core/rfile.h ripper.o: $(hdrdir)/ruby/internal/core/rhash.h +ripper.o: $(hdrdir)/ruby/internal/core/rmatch.h ripper.o: $(hdrdir)/ruby/internal/core/robject.h ripper.o: $(hdrdir)/ruby/internal/core/rregexp.h ripper.o: $(hdrdir)/ruby/internal/core/rstring.h @@ -566,6 +567,7 @@ ripper.o: $(hdrdir)/ruby/missing.h ripper.o: $(hdrdir)/ruby/onigmo.h ripper.o: $(hdrdir)/ruby/oniguruma.h ripper.o: $(hdrdir)/ruby/ractor.h +ripper.o: $(hdrdir)/ruby/re.h ripper.o: $(hdrdir)/ruby/regex.h ripper.o: $(hdrdir)/ruby/ruby.h ripper.o: $(hdrdir)/ruby/st.h diff --git a/gc.c b/gc.c index 0976d6b7b0b0cb..1d3f6fa6ed15be 100644 --- a/gc.c +++ b/gc.c @@ -242,7 +242,30 @@ rb_gc_event_hook(VALUE obj, rb_event_flag_t event) rb_execution_context_t *ec = rb_gc_get_ec(); if (!ec->cfp) return; +#if USE_MODULAR_GC + bool gc_thread_p = false; + if (!GET_EC()) { + gc_thread_p = true; + +# ifdef RB_THREAD_LOCAL_SPECIFIER + rb_current_ec_set(ec); +# else + native_tls_set(ruby_current_ec_key, ec); +# endif + } +#endif + EXEC_EVENT_HOOK(ec, event, ec->cfp->self, 0, 0, 0, obj); + +#if USE_MODULAR_GC + if (gc_thread_p) { +# ifdef RB_THREAD_LOCAL_SPECIFIER + rb_current_ec_set(NULL); +# else + native_tls_set(ruby_current_ec_key, NULL); +# endif + } +#endif } void * diff --git a/gc/mmtk/mmtk.c b/gc/mmtk/mmtk.c index e4cd71925c7ae6..9b1aed4e5bd1f9 100644 --- a/gc/mmtk/mmtk.c +++ b/gc/mmtk/mmtk.c @@ -16,6 +16,22 @@ #include #endif +#ifndef VM_CHECK_MODE +# define VM_CHECK_MODE RUBY_DEBUG +#endif + +// From ractor_core.h +#ifndef RACTOR_CHECK_MODE +# define RACTOR_CHECK_MODE (VM_CHECK_MODE || RUBY_DEBUG) && (SIZEOF_UINT64_T == SIZEOF_VALUE) +#endif + +#if RACTOR_CHECK_MODE +# define RVALUE_SUFFIX_SIZE sizeof(VALUE) +void rb_ractor_setup_belonging(VALUE obj); +#else +# define RVALUE_SUFFIX_SIZE 0 +#endif + struct objspace { bool measure_gc_time; bool gc_stress; @@ -557,7 +573,11 @@ void * rb_gc_impl_objspace_alloc(void) { MMTk_Builder *builder = rb_mmtk_builder_init(); - mmtk_init_binding(builder, NULL, &ruby_upcalls); + MMTk_RubyBindingOptions binding_options = { + .ractor_check_mode = RACTOR_CHECK_MODE != 0, + .suffix_size = RVALUE_SUFFIX_SIZE, + }; + mmtk_init_binding(builder, &binding_options, &ruby_upcalls); return calloc(1, sizeof(struct objspace)); } @@ -885,7 +905,8 @@ rb_gc_impl_new_obj(void *objspace_ptr, void *cache_ptr, VALUE klass, VALUE flags mmtk_handle_user_collection_request(ractor_cache, false, false); } - alloc_size += sizeof(VALUE); + // Layout: [hidden size header (sizeof(VALUE))][payload (alloc_size)][suffix (RVALUE_SUFFIX_SIZE)] + alloc_size += sizeof(VALUE) + RVALUE_SUFFIX_SIZE; VALUE *alloc_obj = (VALUE *)rb_mmtk_alloc_fast_path(objspace, ractor_cache, alloc_size); if (!alloc_obj) { @@ -893,7 +914,7 @@ rb_gc_impl_new_obj(void *objspace_ptr, void *cache_ptr, VALUE klass, VALUE flags } alloc_obj++; - alloc_obj[-1] = alloc_size - sizeof(VALUE); + alloc_obj[-1] = alloc_size - sizeof(VALUE) - RVALUE_SUFFIX_SIZE; alloc_obj[0] = flags; alloc_obj[1] = klass; @@ -905,6 +926,10 @@ rb_gc_impl_new_obj(void *objspace_ptr, void *cache_ptr, VALUE klass, VALUE flags objspace->total_allocated_objects++; +#if RACTOR_CHECK_MODE + rb_ractor_setup_belonging((VALUE)alloc_obj); +#endif + return (VALUE)alloc_obj; } diff --git a/gc/mmtk/mmtk.h b/gc/mmtk/mmtk.h index ee338c87efe15e..e8f95920ddcaf5 100644 --- a/gc/mmtk/mmtk.h +++ b/gc/mmtk/mmtk.h @@ -95,7 +95,7 @@ bool mmtk_is_reachable(MMTk_ObjectReference object); MMTk_Builder *mmtk_builder_default(void); void mmtk_init_binding(MMTk_Builder *builder, - const struct MMTk_RubyBindingOptions *_binding_options, + const struct MMTk_RubyBindingOptions *binding_options, const struct MMTk_RubyUpcalls *upcalls); void mmtk_initialize_collection(MMTk_VMThread tls); diff --git a/gc/mmtk/src/api.rs b/gc/mmtk/src/api.rs index b9797f6fe2df6f..1519d2b6237761 100644 --- a/gc/mmtk/src/api.rs +++ b/gc/mmtk/src/api.rs @@ -181,7 +181,7 @@ pub extern "C" fn mmtk_builder_default() -> *mut MMTKBuilder { #[no_mangle] pub unsafe extern "C" fn mmtk_init_binding( builder: *mut MMTKBuilder, - _binding_options: *const RubyBindingOptions, + binding_options: *const RubyBindingOptions, upcalls: *const RubyUpcalls, ) { crate::MUTATOR_THREAD_PANIC_HANDLER @@ -191,10 +191,7 @@ pub unsafe extern "C" fn mmtk_init_binding( crate::set_panic_hook(); let builder: Box = unsafe { Box::from_raw(builder) }; - let binding_options = RubyBindingOptions { - ractor_check_mode: false, - suffix_size: 0, - }; + let binding_options = unsafe { (*binding_options).clone() }; let mmtk_boxed = mmtk_init(&builder); let mmtk_static = Box::leak(Box::new(mmtk_boxed)); diff --git a/internal/bignum.h b/internal/bignum.h index f11fbd3a4d096a..7389a17c747e15 100644 --- a/internal/bignum.h +++ b/internal/bignum.h @@ -107,6 +107,7 @@ struct RBignum { /* bignum.c */ extern const char ruby_digitmap[]; +extern const char ruby_decimal_digit_pairs[]; double rb_big_fdiv_double(VALUE x, VALUE y); VALUE rb_big_uminus(VALUE x); VALUE rb_big_hash(VALUE); diff --git a/internal/re.h b/internal/re.h index 2d2eba0dc1905c..aa1c93f64275dd 100644 --- a/internal/re.h +++ b/internal/re.h @@ -10,6 +10,37 @@ */ #include "ruby/internal/stdbool.h" /* for bool */ #include "ruby/ruby.h" /* for VALUE */ +#include "ruby/re.h" /* for struct RMatch and struct re_registers */ + +static inline OnigPosition * +RMATCH_BEG_PTR(VALUE match) +{ + return RMATCH(match)->regs.beg; +} + +static inline OnigPosition * +RMATCH_END_PTR(VALUE match) +{ + return RMATCH(match)->regs.end; +} + +static inline long +RMATCH_BEG(VALUE match, int i) +{ + return RMATCH_BEG_PTR(match)[i]; +} + +static inline long +RMATCH_END(VALUE match, int i) +{ + return RMATCH_END_PTR(match)[i]; +} + +static inline int +RMATCH_NREGS(VALUE match) +{ + return RMATCH(match)->regs.num_regs; +} /* re.c */ VALUE rb_reg_s_alloc(VALUE klass); diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index ddcec997b94efa..f179a149a1df45 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -57,7 +57,8 @@ def self.parse(src, filename = "(ripper)", lineno = 1) # [[1, 13], :on_kw, "end", END ]] # def self.lex(src, filename = "-", lineno = 1, raise_errors: false) - result = Prism.lex_compat(coerce_source(src), filepath: filename, line: lineno, version: "current") + coerced = coerce_source(src) + result = Prism.lex_compat(coerced, filepath: filename, line: lineno, version: "current", encoding: coerced.encoding) if result.failure? && raise_errors raise SyntaxError, result.errors.first.message @@ -4077,7 +4078,7 @@ def visit_yield_node(node) # Lazily initialize the parse result. def result - @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true) + @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true, encoding: source.encoding) end def line_and_column_cache diff --git a/numeric.c b/numeric.c index 40b7bfc0f8e2b4..175bd7cfa0f730 100644 --- a/numeric.c +++ b/numeric.c @@ -4040,6 +4040,11 @@ rb_int_uminus(VALUE num) } } +/* ruby_decimal_digit_pairs is defined in bignum.c and declared in + * internal/bignum.h. See there for the rationale of the 2-digit + * lookup-table itoa optimisation; both rb_fix2str here and big2str_2bdigits + * in bignum.c consume it. */ + VALUE rb_fix2str(VALUE x, int base) { @@ -4072,9 +4077,34 @@ rb_fix2str(VALUE x, int base) else { u = val; } - do { - *--b = ruby_digitmap[(int)(u % base)]; - } while (u /= base); + if (base == 10) { + /* Emit two digits per iteration from a precomputed table. The + * compiler lowers `u % 100` and `u / 100` to a single multiply + + * shift, so each iteration costs roughly one multiply, one shift, + * and two stores. About 2x fewer iterations than the classic + * per-digit loop for multi-digit inputs. */ + while (u >= 100) { + unsigned long idx = (u % 100) * 2; + u /= 100; + b -= 2; + b[0] = ruby_decimal_digit_pairs[idx]; + b[1] = ruby_decimal_digit_pairs[idx + 1]; + } + if (u >= 10) { + unsigned long idx = u * 2; + b -= 2; + b[0] = ruby_decimal_digit_pairs[idx]; + b[1] = ruby_decimal_digit_pairs[idx + 1]; + } + else { + *--b = (char)('0' + u); + } + } + else { + do { + *--b = ruby_digitmap[(int)(u % base)]; + } while (u /= base); + } if (neg) { *--b = '-'; } diff --git a/prism/extension.c b/prism/extension.c index 9f9169cfff7880..27df8dac50ddff 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -793,7 +793,7 @@ parse_lex_input(const uint8_t *input, size_t input_length, const pm_options_t *o parse_lex_data_t parse_lex_data = { .source = source, .tokens = rb_ary_new(), - .encoding = rb_utf8_encoding(), + .encoding = rb_enc_find(pm_parser_encoding_name(parser)), .freeze = pm_options_freeze(options), }; diff --git a/re.c b/re.c index e65369424a03a0..bb6af74eb57f36 100644 --- a/re.c +++ b/re.c @@ -1004,7 +1004,6 @@ static void update_char_offset(VALUE match) { struct RMatch *rm = RMATCH(match); - struct re_registers *regs; int i, num_regs, num_pos; long c; char *s, *p, *q; @@ -1015,8 +1014,7 @@ update_char_offset(VALUE match) if (rm->char_offset_num_allocated) return; - regs = &rm->regs; - num_regs = rm->regs.num_regs; + num_regs = RMATCH_NREGS(match); if (rm->char_offset_num_allocated < num_regs) { SIZED_REALLOC_N(rm->char_offset, struct rmatch_offset, num_regs, rm->char_offset_num_allocated); @@ -1026,8 +1024,8 @@ update_char_offset(VALUE match) enc = rb_enc_get(RMATCH(match)->str); if (rb_enc_mbmaxlen(enc) == 1) { for (i = 0; i < num_regs; i++) { - rm->char_offset[i].beg = BEG(i); - rm->char_offset[i].end = END(i); + rm->char_offset[i].beg = RMATCH_BEG(match, i); + rm->char_offset[i].end = RMATCH_END(match, i); } return; } @@ -1035,10 +1033,10 @@ update_char_offset(VALUE match) pairs = RB_ALLOCV_N(pair_t, pairs_obj, num_regs * 2); num_pos = 0; for (i = 0; i < num_regs; i++) { - if (BEG(i) < 0) + if (RMATCH_BEG(match, i) < 0) continue; - pairs[num_pos++].byte_pos = BEG(i); - pairs[num_pos++].byte_pos = END(i); + pairs[num_pos++].byte_pos = RMATCH_BEG(match, i); + pairs[num_pos++].byte_pos = RMATCH_END(match, i); } qsort(pairs, num_pos, sizeof(pair_t), pair_byte_cmp); @@ -1053,17 +1051,17 @@ update_char_offset(VALUE match) for (i = 0; i < num_regs; i++) { pair_t key, *found; - if (BEG(i) < 0) { + if (RMATCH_BEG(match, i) < 0) { rm->char_offset[i].beg = -1; rm->char_offset[i].end = -1; continue; } - key.byte_pos = BEG(i); + key.byte_pos = RMATCH_BEG(match, i); found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp); rm->char_offset[i].beg = found->char_pos; - key.byte_pos = END(i); + key.byte_pos = RMATCH_END(match, i); found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp); rm->char_offset[i].end = found->char_pos; } @@ -1179,7 +1177,7 @@ static VALUE match_size(VALUE match) { match_check(match); - return INT2FIX(RMATCH_REGS(match)->num_regs); + return INT2FIX(RMATCH_NREGS(match)); } static int name_to_backref_number(const struct re_registers *, VALUE, const char*, const char*); @@ -1193,9 +1191,9 @@ name_to_backref_error(VALUE name) } static void -backref_number_check(struct re_registers *regs, int i) +backref_number_check(VALUE match, int i) { - if (i < 0 || regs->num_regs <= i) + if (i < 0 || RMATCH_NREGS(match) <= i) rb_raise(rb_eIndexError, "index %d out of matches", i); } @@ -1245,12 +1243,11 @@ static VALUE match_offset(VALUE match, VALUE n) { int i = match_backref_number(match, n); - struct re_registers *regs = RMATCH_REGS(match); match_check(match); - backref_number_check(regs, i); + backref_number_check(match, i); - if (BEG(i) < 0) + if (RMATCH_BEG(match, i) < 0) return rb_assoc_new(Qnil, Qnil); update_char_offset(match); @@ -1280,14 +1277,13 @@ static VALUE match_byteoffset(VALUE match, VALUE n) { int i = match_backref_number(match, n); - struct re_registers *regs = RMATCH_REGS(match); match_check(match); - backref_number_check(regs, i); + backref_number_check(match, i); - if (BEG(i) < 0) + if (RMATCH_BEG(match, i) < 0) return rb_assoc_new(Qnil, Qnil); - return rb_assoc_new(LONG2NUM(BEG(i)), LONG2NUM(END(i))); + return rb_assoc_new(LONG2NUM(RMATCH_BEG(match, i)), LONG2NUM(RMATCH_END(match, i))); } @@ -1304,14 +1300,13 @@ static VALUE match_bytebegin(VALUE match, VALUE n) { int i = match_backref_number(match, n); - struct re_registers *regs = RMATCH_REGS(match); match_check(match); - backref_number_check(regs, i); + backref_number_check(match, i); - if (BEG(i) < 0) + if (RMATCH_BEG(match, i) < 0) return Qnil; - return LONG2NUM(BEG(i)); + return LONG2NUM(RMATCH_BEG(match, i)); } @@ -1328,14 +1323,13 @@ static VALUE match_byteend(VALUE match, VALUE n) { int i = match_backref_number(match, n); - struct re_registers *regs = RMATCH_REGS(match); match_check(match); - backref_number_check(regs, i); + backref_number_check(match, i); - if (BEG(i) < 0) + if (RMATCH_BEG(match, i) < 0) return Qnil; - return LONG2NUM(END(i)); + return LONG2NUM(RMATCH_END(match, i)); } @@ -1352,12 +1346,11 @@ static VALUE match_begin(VALUE match, VALUE n) { int i = match_backref_number(match, n); - struct re_registers *regs = RMATCH_REGS(match); match_check(match); - backref_number_check(regs, i); + backref_number_check(match, i); - if (BEG(i) < 0) + if (RMATCH_BEG(match, i) < 0) return Qnil; update_char_offset(match); @@ -1378,12 +1371,11 @@ static VALUE match_end(VALUE match, VALUE n) { int i = match_backref_number(match, n); - struct re_registers *regs = RMATCH_REGS(match); match_check(match); - backref_number_check(regs, i); + backref_number_check(match, i); - if (BEG(i) < 0) + if (RMATCH_BEG(match, i) < 0) return Qnil; update_char_offset(match); @@ -1420,11 +1412,10 @@ static VALUE match_nth(VALUE match, VALUE n) { int i = match_backref_number(match, n); - struct re_registers *regs = RMATCH_REGS(match); - backref_number_check(regs, i); + backref_number_check(match, i); - long start = BEG(i), end = END(i); + long start = RMATCH_BEG(match, i), end = RMATCH_END(match, i); if (start < 0) return Qnil; @@ -1464,12 +1455,11 @@ static VALUE match_nth_length(VALUE match, VALUE n) { int i = match_backref_number(match, n); - struct re_registers *regs = RMATCH_REGS(match); match_check(match); - backref_number_check(regs, i); + backref_number_check(match, i); - if (BEG(i) < 0) + if (RMATCH_BEG(match, i) < 0) return Qnil; update_char_offset(match); @@ -1495,11 +1485,8 @@ rb_match_unbusy(VALUE match) int rb_match_count(VALUE match) { - struct re_registers *regs; if (NIL_P(match)) return -1; - regs = RMATCH_REGS(match); - if (!regs) return -1; - return regs->num_regs; + return RMATCH_NREGS(match); } static void @@ -1892,18 +1879,17 @@ rb_reg_start_with_p(VALUE re, VALUE str) VALUE rb_reg_nth_defined(int nth, VALUE match) { - struct re_registers *regs; if (NIL_P(match)) return Qnil; match_check(match); - regs = RMATCH_REGS(match); - if (nth >= regs->num_regs) { + int num_regs = RMATCH_NREGS(match); + if (nth >= num_regs) { return Qnil; } if (nth < 0) { - nth += regs->num_regs; + nth += num_regs; if (nth <= 0) return Qnil; } - return RBOOL(BEG(nth) != -1); + return RBOOL(RMATCH_BEG(match, nth) != -1); } VALUE @@ -1911,21 +1897,20 @@ rb_reg_nth_match(int nth, VALUE match) { VALUE str; long start, end, len; - struct re_registers *regs; if (NIL_P(match)) return Qnil; match_check(match); - regs = RMATCH_REGS(match); - if (nth >= regs->num_regs) { + int num_regs = RMATCH_NREGS(match); + if (nth >= num_regs) { return Qnil; } if (nth < 0) { - nth += regs->num_regs; + nth += num_regs; if (nth <= 0) return Qnil; } - start = BEG(nth); + start = RMATCH_BEG(match, nth); if (start == -1) return Qnil; - end = END(nth); + end = RMATCH_END(match, nth); len = end - start; str = rb_str_subseq(RMATCH(match)->str, start, len); return str; @@ -1959,13 +1944,11 @@ VALUE rb_reg_match_pre(VALUE match) { VALUE str; - struct re_registers *regs; if (NIL_P(match)) return Qnil; match_check(match); - regs = RMATCH_REGS(match); - if (BEG(0) == -1) return Qnil; - str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0)); + if (RMATCH_BEG(match, 0) == -1) return Qnil; + str = rb_str_subseq(RMATCH(match)->str, 0, RMATCH_BEG(match, 0)); return str; } @@ -1993,14 +1976,12 @@ rb_reg_match_post(VALUE match) { VALUE str; long pos; - struct re_registers *regs; if (NIL_P(match)) return Qnil; match_check(match); - regs = RMATCH_REGS(match); - if (BEG(0) == -1) return Qnil; + if (RMATCH_BEG(match, 0) == -1) return Qnil; str = RMATCH(match)->str; - pos = END(0); + pos = RMATCH_END(match, 0); str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos); return str; } @@ -2009,14 +1990,12 @@ static int match_last_index(VALUE match) { int i; - struct re_registers *regs; if (NIL_P(match)) return -1; match_check(match); - regs = RMATCH_REGS(match); - if (BEG(0) == -1) return -1; + if (RMATCH_BEG(match, 0) == -1) return -1; - for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--) + for (i = RMATCH_NREGS(match) - 1; RMATCH_BEG(match, i) == -1 && i > 0; i--) ; return i; } @@ -2026,8 +2005,8 @@ rb_reg_match_last(VALUE match) { int i = match_last_index(match); if (i <= 0) return Qnil; - struct re_registers *regs = RMATCH_REGS(match); - return rb_str_subseq(RMATCH(match)->str, BEG(i), END(i) - BEG(i)); + long start = RMATCH_BEG(match, i); + return rb_str_subseq(RMATCH(match)->str, start, RMATCH_END(match, i) - start); } VALUE @@ -2065,22 +2044,22 @@ last_paren_match_getter(ID _x, VALUE *_y) static VALUE match_array(VALUE match, int start) { - struct re_registers *regs; VALUE ary; VALUE target; int i; match_check(match); - regs = RMATCH_REGS(match); - ary = rb_ary_new2(regs->num_regs); + int num_regs = RMATCH_NREGS(match); + ary = rb_ary_new2(num_regs); target = RMATCH(match)->str; - for (i=start; inum_regs; i++) { - if (regs->beg[i] == -1) { + for (i = start; i < num_regs; i++) { + long beg = RMATCH_BEG(match, i); + if (beg == -1) { rb_ary_push(ary, Qnil); } else { - VALUE str = rb_str_subseq(target, regs->beg[i], regs->end[i]-regs->beg[i]); + VALUE str = rb_str_subseq(target, beg, RMATCH_END(match, i) - beg); rb_ary_push(ary, str); } } @@ -2165,7 +2144,7 @@ namev_to_backref_number(const struct re_registers *regs, VALUE re, VALUE name) static VALUE match_ary_subseq(VALUE match, long beg, long len, VALUE result) { - long olen = RMATCH_REGS(match)->num_regs; + long olen = RMATCH_NREGS(match); long j, end = olen < beg+len ? olen : beg+len; if (NIL_P(result)) result = rb_ary_new_capa(len); if (len == 0) return result; @@ -2183,7 +2162,7 @@ static VALUE match_ary_aref(VALUE match, VALUE idx, VALUE result) { long beg, len; - int num_regs = RMATCH_REGS(match)->num_regs; + int num_regs = RMATCH_NREGS(match); /* check if idx is Range */ switch (rb_range_beg_len(idx, &beg, &len, (long)num_regs, !NIL_P(result))) { @@ -2261,7 +2240,7 @@ match_aref(int argc, VALUE *argv, VALUE match) else { long beg = NUM2LONG(idx); long len = NUM2LONG(length); - long num_regs = RMATCH_REGS(match)->num_regs; + long num_regs = RMATCH_NREGS(match); if (len < 0) { return Qnil; } @@ -2601,8 +2580,7 @@ match_inspect(VALUE match) VALUE cname = rb_class_path(rb_obj_class(match)); VALUE str; int i; - struct re_registers *regs = RMATCH_REGS(match); - int num_regs = regs->num_regs; + int num_regs = RMATCH_NREGS(match); struct backref_name_tag *names; VALUE names_obj = Qnil; VALUE regexp = RMATCH(match)->regexp; @@ -3575,16 +3553,15 @@ rb_reg_equal(VALUE re1, VALUE re2) static VALUE match_hash(VALUE match) { - const struct re_registers *regs; st_index_t hashval; match_check(match); hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str)); hashval = rb_hash_uint(hashval, reg_hash(match_regexp(match))); - regs = RMATCH_REGS(match); - hashval = rb_hash_uint(hashval, regs->num_regs); - hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg))); - hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end))); + int num_regs = RMATCH_NREGS(match); + hashval = rb_hash_uint(hashval, num_regs); + hashval = rb_hash_uint(hashval, rb_memhash(RMATCH_BEG_PTR(match), num_regs * sizeof(OnigPosition))); + hashval = rb_hash_uint(hashval, rb_memhash(RMATCH_END_PTR(match), num_regs * sizeof(OnigPosition))); hashval = rb_hash_end(hashval); return ST2FIX(hashval); } @@ -3601,18 +3578,15 @@ match_hash(VALUE match) static VALUE match_equal(VALUE match1, VALUE match2) { - const struct re_registers *regs1, *regs2; - if (match1 == match2) return Qtrue; if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse; if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse; if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse; if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse; - regs1 = RMATCH_REGS(match1); - regs2 = RMATCH_REGS(match2); - if (regs1->num_regs != regs2->num_regs) return Qfalse; - if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse; - if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse; + int num_regs = RMATCH_NREGS(match1); + if (num_regs != RMATCH_NREGS(match2)) return Qfalse; + if (memcmp(RMATCH_BEG_PTR(match1), RMATCH_BEG_PTR(match2), num_regs * sizeof(OnigPosition))) return Qfalse; + if (memcmp(RMATCH_END_PTR(match1), RMATCH_END_PTR(match2), num_regs * sizeof(OnigPosition))) return Qfalse; return Qtrue; } @@ -3657,7 +3631,7 @@ match_integer_at(int argc, VALUE *argv, VALUE match) int base = 10; VALUE idx; - long nth; + int nth; argc = rb_check_arity(argc, 1, 2); if (FIXNUM_P(idx = argv[0])) { @@ -3671,10 +3645,10 @@ match_integer_at(int argc, VALUE *argv, VALUE match) rb_raise(rb_eArgError, "invalid radix %d", base); } - if (nth >= regs->num_regs) return Qnil; - if (nth < 0 && (nth += regs->num_regs) <= 0) return Qnil; + if (nth >= RMATCH_NREGS(match)) return Qnil; + if (nth < 0 && (nth += RMATCH_NREGS(match)) <= 0) return Qnil; - long start = BEG(nth), end = END(nth); + long start = RMATCH_BEG(match, nth), end = RMATCH_END(match, nth); if (start < 0) return Qnil; RUBY_ASSERT(start <= end, "%ld > %ld", start, end); diff --git a/string.c b/string.c index dae7700887c4c4..2d7bd4ee74401c 100644 --- a/string.c +++ b/string.c @@ -60,9 +60,6 @@ # define HAVE_CRYPT_R 1 #endif -#define BEG(no) (regs->beg[(no)]) -#define END(no) (regs->end[(no)]) - #undef rb_str_new #undef rb_usascii_str_new #undef rb_utf8_str_new @@ -4620,8 +4617,7 @@ rb_str_index_m(int argc, VALUE *argv, VALUE str) if (rb_reg_search(sub, str, pos, 0) >= 0) { VALUE match = rb_backref_get(); - struct re_registers *regs = RMATCH_REGS(match); - pos = rb_str_sublen(str, BEG(0)); + pos = rb_str_sublen(str, RMATCH_BEG(match, 0)); return LONG2NUM(pos); } } @@ -4747,8 +4743,7 @@ rb_str_byteindex_m(int argc, VALUE *argv, VALUE str) if (RB_TYPE_P(sub, T_REGEXP)) { if (rb_reg_search(sub, str, pos, 0) >= 0) { VALUE match = rb_backref_get(); - struct re_registers *regs = RMATCH_REGS(match); - pos = BEG(0); + pos = RMATCH_BEG(match, 0); return LONG2NUM(pos); } } @@ -4879,8 +4874,7 @@ rb_str_rindex_m(int argc, VALUE *argv, VALUE str) if (rb_reg_search(sub, str, pos, 1) >= 0) { VALUE match = rb_backref_get(); - struct re_registers *regs = RMATCH_REGS(match); - pos = rb_str_sublen(str, BEG(0)); + pos = rb_str_sublen(str, RMATCH_BEG(match, 0)); return LONG2NUM(pos); } } @@ -5037,8 +5031,7 @@ rb_str_byterindex_m(int argc, VALUE *argv, VALUE str) if (RB_TYPE_P(sub, T_REGEXP)) { if (rb_reg_search(sub, str, pos, 1) >= 0) { VALUE match = rb_backref_get(); - struct re_registers *regs = RMATCH_REGS(match); - pos = BEG(0); + pos = RMATCH_BEG(match, 0); return LONG2NUM(pos); } } @@ -5915,26 +5908,25 @@ rb_str_subpat_set(VALUE str, VALUE re, VALUE backref, VALUE val) VALUE match; long start, end, len; rb_encoding *enc; - struct re_registers *regs; if (rb_reg_search(re, str, 0, 0) < 0) { rb_raise(rb_eIndexError, "regexp not matched"); } match = rb_backref_get(); nth = rb_reg_backref_number(match, backref); - regs = RMATCH_REGS(match); - if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) { + int num_regs = RMATCH_NREGS(match); + if ((nth >= num_regs) || ((nth < 0) && (-nth >= num_regs))) { rb_raise(rb_eIndexError, "index %d out of regexp", nth); } if (nth < 0) { - nth += regs->num_regs; + nth += num_regs; } - start = BEG(nth); + start = RMATCH_BEG(match, nth); if (start == -1) { rb_raise(rb_eIndexError, "regexp group %d not matched", nth); } - end = END(nth); + end = RMATCH_END(match, nth); len = end - start; StringValue(val); enc = rb_enc_check_str(str, val); @@ -6069,14 +6061,14 @@ rb_str_slice_bang(int argc, VALUE *argv, VALUE str) if (RB_TYPE_P(indx, T_REGEXP)) { if (rb_reg_search(indx, str, 0, 0) < 0) return Qnil; VALUE match = rb_backref_get(); - struct re_registers *regs = RMATCH_REGS(match); + int num_regs = RMATCH_NREGS(match); int nth = 0; if (argc > 1 && (nth = rb_reg_backref_number(match, argv[1])) < 0) { - if ((nth += regs->num_regs) <= 0) return Qnil; + if ((nth += num_regs) <= 0) return Qnil; } - else if (nth >= regs->num_regs) return Qnil; - beg = BEG(nth); - len = END(nth) - beg; + else if (nth >= num_regs) return Qnil; + beg = RMATCH_BEG(match, nth); + len = RMATCH_END(match, nth) - beg; goto subseq; } else if (argc == 2) { @@ -6278,8 +6270,8 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str) match0 = pat; } else { - beg0 = BEG(0); - end0 = END(0); + beg0 = RMATCH_BEG(match, 0); + end0 = RMATCH_END(match, 0); if (iter) match0 = rb_reg_nth_match(0, match); } @@ -6424,8 +6416,8 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang) match0 = pat; } else { - beg0 = BEG(0); - end0 = END(0); + beg0 = RMATCH_BEG(match, 0); + end0 = RMATCH_END(match, 0); if (mode == ITER) match0 = rb_reg_nth_match(0, match); } @@ -9337,18 +9329,16 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) if (result) result = rb_ary_new(); long len = RSTRING_LEN(str); long start = beg; - long idx; + int idx; int last_null = 0; - struct re_registers *regs; VALUE match = 0; for (; rb_reg_search(spat, str, start, 0) >= 0; (match ? (rb_match_unbusy(match), rb_backref_set(match)) : (void)0)) { match = rb_backref_get(); if (!result) rb_match_busy(match); - regs = RMATCH_REGS(match); - end = BEG(0); - if (start == end && BEG(0) == END(0)) { + end = RMATCH_BEG(match, 0); + if (start == end && RMATCH_BEG(match, 0) == RMATCH_END(match, 0)) { if (!ptr) { SPLIT_STR(0, 0); break; @@ -9368,13 +9358,13 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) } else { SPLIT_STR(beg, end-beg); - beg = start = END(0); + beg = start = RMATCH_END(match, 0); } last_null = 0; - for (idx=1; idx < regs->num_regs; idx++) { - if (BEG(idx) == -1) continue; - SPLIT_STR(BEG(idx), END(idx)-BEG(idx)); + for (idx = 1; idx < RMATCH_NREGS(match); idx++) { + if (RMATCH_BEG(match, idx) == -1) continue; + SPLIT_STR(RMATCH_BEG(match, idx), RMATCH_END(match, idx) - RMATCH_BEG(match, idx)); } if (!NIL_P(limit) && lim <= ++i) break; } @@ -10652,17 +10642,14 @@ scan_once(VALUE str, VALUE pat, long *start, int set_backref_str) VALUE result = Qnil; long end, pos = rb_pat_search(pat, str, *start, set_backref_str); if (pos >= 0) { - VALUE match; - struct re_registers *regs; + VALUE match = Qnil; if (BUILTIN_TYPE(pat) == T_STRING) { - regs = NULL; end = pos + RSTRING_LEN(pat); } else { match = rb_backref_get(); - regs = RMATCH_REGS(match); - pos = BEG(0); - end = END(0); + pos = RMATCH_BEG(match, 0); + end = RMATCH_END(match, 0); } if (pos == end) { @@ -10680,16 +10667,17 @@ scan_once(VALUE str, VALUE pat, long *start, int set_backref_str) *start = end; } - if (!regs || regs->num_regs == 1) { + if (NIL_P(match) || RMATCH_NREGS(match) == 1) { result = rb_str_subseq(str, pos, end - pos); return result; } else { - result = rb_ary_new2(regs->num_regs); - for (int i = 1; i < regs->num_regs; i++) { + int num_regs = RMATCH_NREGS(match); + result = rb_ary_new2(num_regs); + for (int i = 1; i < num_regs; i++) { VALUE s = Qnil; - if (BEG(i) >= 0) { - s = rb_str_subseq(str, BEG(i), END(i)-BEG(i)); + if (RMATCH_BEG(match, i) >= 0) { + s = rb_str_subseq(str, RMATCH_BEG(match, i), RMATCH_END(match, i) - RMATCH_BEG(match, i)); } rb_ary_push(result, s); @@ -11255,10 +11243,9 @@ rb_str_partition(VALUE str, VALUE sep) goto failed; } VALUE match = rb_backref_get(); - struct re_registers *regs = RMATCH_REGS(match); - pos = BEG(0); - sep = rb_str_subseq(str, pos, END(0) - pos); + pos = RMATCH_BEG(match, 0); + sep = rb_str_subseq(str, pos, RMATCH_END(match, 0) - pos); } else { pos = rb_str_index(str, sep, 0); @@ -11292,10 +11279,9 @@ rb_str_rpartition(VALUE str, VALUE sep) goto failed; } VALUE match = rb_backref_get(); - struct re_registers *regs = RMATCH_REGS(match); - pos = BEG(0); - sep = rb_str_subseq(str, pos, END(0) - pos); + pos = RMATCH_BEG(match, 0); + sep = rb_str_subseq(str, pos, RMATCH_END(match, 0) - pos); } else { pos = rb_str_sublen(str, pos); diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb index 8ea7ce7e9b258f..1e06d52184b3c5 100644 --- a/test/prism/lex_test.rb +++ b/test/prism/lex_test.rb @@ -47,6 +47,24 @@ def test_parse_lex_file end end + def test_lex_encoding + tokens = Prism.lex('"わたし"', encoding: Encoding::Windows_31J).value + tokens.each do |t| + assert_equal(Encoding::Windows_31J, t[0].value.encoding) + end + + # Shebangs must appear on the first line. For these cases, the encoding + # comment may appear second, but it should still change encoding. + tokens = Prism.lex(<<~RUBY, encoding: Encoding::Windows_31J).value + #! /usr/bin/env ruby + # encoding: utf-8 + "わたし" + RUBY + tokens.each do |t| + assert_equal(Encoding::UTF_8, t[0].value.encoding) + end + end + if RUBY_VERSION >= "3.3" def test_lex_compat source = "foo bar" diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index 05be087868d811..4fff630561e7d6 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -224,6 +224,12 @@ def test_tokenize assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source)) end + def test_encoding + source = '"わたし"'.encode(Encoding::Windows_31J) + assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source)) + assert_equal(Ripper.sexp(source), Translation::Ripper.sexp(source)) + end + def test_sexp_coercion string_like = Object.new def string_like.to_str diff --git a/vm_core.h b/vm_core.h index 89f80b52c75a37..1e3dcfe04f21ac 100644 --- a/vm_core.h +++ b/vm_core.h @@ -920,7 +920,7 @@ struct rb_block { typedef struct rb_control_frame_struct { const VALUE *pc; // cfp[0] VALUE *sp; // cfp[1] - const rb_iseq_t *_iseq; // cfp[2] -- use rb_cfp_iseq(cfp) to read + const rb_iseq_t *_iseq; // cfp[2] -- use CFP_ISEQ(cfp) to read VALUE self; // cfp[3] / block[0] const VALUE *ep; // cfp[4] / block[1] const void *block_code; // cfp[5] / block[2] -- iseq, ifunc, or forwarded block handler diff --git a/vm_insnhelper.h b/vm_insnhelper.h index 88c387ee152afa..2d83fb5897a376 100644 --- a/vm_insnhelper.h +++ b/vm_insnhelper.h @@ -116,7 +116,7 @@ enum vm_regan_acttype { // instruction sequence C struct // Uses cfp->_iseq directly because the interpreter always has a valid _iseq // field (it's written on exit from JIT code). Code in vm_insnhelper.c that -// may be called as a ZJIT fallback should use rb_cfp_iseq() instead. +// may be called as a ZJIT fallback should use CFP_ISEQ() instead. #define GET_ISEQ() (GET_CFP()->_iseq) /**********************************************************/ diff --git a/zjit/src/asm/arm64/inst/mod.rs b/zjit/src/asm/arm64/inst/mod.rs index bfffd914efe29a..270c784f270410 100644 --- a/zjit/src/asm/arm64/inst/mod.rs +++ b/zjit/src/asm/arm64/inst/mod.rs @@ -26,6 +26,7 @@ mod sbfm; mod shift_imm; mod sys_reg; mod test_bit; +mod udf; pub use atomic::Atomic; pub use branch::Branch; @@ -52,3 +53,4 @@ pub use sbfm::SBFM; pub use shift_imm::ShiftImm; pub use sys_reg::SysReg; pub use test_bit::TestBit; +pub use udf::Udf; diff --git a/zjit/src/asm/arm64/inst/udf.rs b/zjit/src/asm/arm64/inst/udf.rs new file mode 100644 index 00000000000000..297d17ed628adf --- /dev/null +++ b/zjit/src/asm/arm64/inst/udf.rs @@ -0,0 +1,52 @@ +/// The struct that represents an A64 permanently undefined instruction. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | +/// | imm16..................................................| +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Udf { + /// The immediate value encoded in the instruction + imm16: u16 +} + +impl Udf { + /// UDF - Permanently Undefined + /// + pub fn udf(imm16: u16) -> Self { + Self { imm16 } + } +} + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Udf) -> Self { + inst.imm16 as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Udf) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_udf() { + let result: u32 = Udf::udf(0).into(); + assert_eq!(0x00000000, result); + } + + #[test] + fn test_udf_imm() { + let result: u32 = Udf::udf(1).into(); + assert_eq!(0x00000001, result); + } +} diff --git a/zjit/src/asm/arm64/mod.rs b/zjit/src/asm/arm64/mod.rs index a360d7738b2dbf..b53f1cf6733664 100644 --- a/zjit/src/asm/arm64/mod.rs +++ b/zjit/src/asm/arm64/mod.rs @@ -321,6 +321,12 @@ pub fn brk(cb: &mut CodeBlock, imm16: A64Opnd) { cb.write_bytes(&bytes); } +/// UDF - permanently undefined instruction +pub fn udf(cb: &mut CodeBlock, imm16: u16) { + let bytes: [u8; 4] = Udf::udf(imm16).into(); + cb.write_bytes(&bytes); +} + /// CMP - compare rn and rm, update flags pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rn, rm) { diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 54c803168dc3a7..4d7aa2c9533186 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -1561,6 +1561,9 @@ impl Assembler { Insn::Breakpoint => { brk(cb, A64Opnd::None); }, + Insn::Abort => { + udf(cb, u16::MAX); + }, Insn::CSelZ { truthy, falsy, out } | Insn::CSelE { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::EQ); diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index bb8d1e1e735b03..7335680f84fac1 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -653,6 +653,9 @@ pub enum Insn { #[allow(dead_code)] Breakpoint, + // Abort the process + Abort, + /// Add a comment into the IR at the point that this instruction is added. /// It won't have any impact on that actual compiled code. Comment(String), @@ -895,6 +898,7 @@ impl Insn { Insn::And { .. } => "And", Insn::BakeString(_) => "BakeString", Insn::Breakpoint => "Breakpoint", + Insn::Abort => "Abort", Insn::Comment(_) => "Comment", Insn::Cmp { .. } => "Cmp", Insn::CPop { .. } => "CPop", @@ -1185,7 +1189,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> { } Insn::BakeString(_) | - Insn::Breakpoint | + Insn::Breakpoint | Insn::Abort | Insn::Comment(_) | Insn::CPop { .. } | Insn::PadPatchPoint | @@ -1363,7 +1367,7 @@ impl<'a> InsnOpndMutIterator<'a> { } Insn::BakeString(_) | - Insn::Breakpoint | + Insn::Breakpoint | Insn::Abort | Insn::Comment(_) | Insn::CPop { .. } | Insn::FrameSetup { .. } | @@ -3465,6 +3469,11 @@ impl Assembler { self.push_insn(Insn::Breakpoint); } + #[allow(dead_code)] + pub fn abort(&mut self) { + self.push_insn(Insn::Abort); + } + /// Call a C function without PosMarkers pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { let canary_opnd = self.set_stack_canary(); diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index 3904bfd71f3dcb..a3af9856dab291 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -1089,6 +1089,7 @@ impl Assembler { }, Insn::Breakpoint => int3(cb), + Insn::Abort => ud2(cb), Insn::CSelZ { truthy, falsy, out } => { emit_csel(cb, *truthy, *falsy, *out, cmovz, cmovnz); diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index b9b8b6509abfb2..097257ddf85ede 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -754,6 +754,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::ObjToString { val, cd, state, .. } => gen_objtostring(jit, asm, opnd!(val), *cd, &function.frame_state(*state)), &Insn::CheckInterrupts { state } => no_output!(gen_check_interrupts(jit, asm, &function.frame_state(state))), Insn::BreakPoint => no_output!(asm.breakpoint()), + Insn::Unreachable => no_output!(asm.abort()), &Insn::HashDup { val, state } => { gen_hash_dup(asm, opnd!(val), &function.frame_state(state)) }, &Insn::HashAref { hash, key, state } => { gen_hash_aref(jit, asm, opnd!(hash), opnd!(key), &function.frame_state(state)) }, &Insn::HashAset { hash, key, val, state } => { no_output!(gen_hash_aset(jit, asm, opnd!(hash), opnd!(key), opnd!(val), &function.frame_state(state))) }, diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 4d006af1ab2c96..1d8358cbada039 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -1147,6 +1147,11 @@ pub enum Insn { CheckInterrupts { state: InsnId }, BreakPoint, + + /// Only use this instruction in tests where you need to end a block with + /// a terminator, but don't ever expect the code to be executed. This + /// instruction should never be generated from iseq_to_hir + Unreachable, } /// Macro that enumerates all operands of an Insn, dispatching to caller-provided @@ -1165,7 +1170,7 @@ macro_rules! for_each_operand_impl { | Insn::LoadEC | Insn::GetEP { .. } | Insn::LoadSelf - | Insn::BreakPoint + | Insn::BreakPoint | Insn::Unreachable | Insn::PutSpecialObject { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } => {} @@ -1471,7 +1476,7 @@ impl Insn { | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetGlobal { .. } | Insn::SetLocal { .. } | Insn::Throw { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } - | Insn::CheckInterrupts { .. } | Insn::BreakPoint + | Insn::CheckInterrupts { .. } | Insn::BreakPoint | Insn::Unreachable | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } => false, _ => true, @@ -1698,7 +1703,7 @@ impl Insn { abstract_heaps::Control ), Insn::Entries { .. } => effects::Any, - Insn::BreakPoint => Effect::read_write(abstract_heaps::Empty, abstract_heaps::Control), + Insn::BreakPoint | Insn::Unreachable => Effect::read_write(abstract_heaps::Empty, abstract_heaps::Control), } } @@ -2223,6 +2228,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::CheckInterrupts { .. } => write!(f, "CheckInterrupts"), Insn::IsA { val, class } => write!(f, "IsA {val}, {class}"), Insn::BreakPoint => write!(f, "BreakPoint"), + Insn::Unreachable => write!(f, "Unreachable"), } } } @@ -2783,243 +2789,12 @@ impl Function { } }; } - macro_rules! find_vec { - ( $x:expr ) => { - { - $x.iter().map(|arg| find!(*arg)).collect() - } - }; - } - macro_rules! find_branch_edge { - ( $edge:ident ) => { - { - BranchEdge { - target: $edge.target, - args: find_vec!($edge.args), - } - } - }; - } let insn_id = find!(insn_id); - use Insn::*; - match &self.insns[insn_id.0] { - result@(Const {..} - | Param - | LoadArg {..} - | Entries {..} - | GetConstantPath {..} - | PatchPoint {..} - | PutSpecialObject {..} - | GetGlobal {..} - | SideExit {..} - | EntryPoint {..} - | LoadPC - | LoadSP - | LoadEC - | GetEP {..} - | LoadSelf - | BreakPoint - | IncrCounterPtr {..} - | IncrCounter(_)) => result.clone(), - &Snapshot { state: FrameState { iseq, insn_idx, pc, ref stack, ref locals } } => - Snapshot { - state: FrameState { - iseq, - insn_idx, - pc, - stack: find_vec!(stack), - locals: find_vec!(locals), - } - }, - &Return { val } => Return { val: find!(val) }, - &FixnumBitCheck { val, index } => FixnumBitCheck { val: find!(val), index }, - &Throw { throw_state, val, state } => Throw { throw_state, val: find!(val), state }, - &StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state }, - &StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) }, - &StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) }, - &StringGetbyte { string, index } => StringGetbyte { string: find!(string), index: find!(index) }, - &StringSetbyteFixnum { string, index, value } => StringSetbyteFixnum { string: find!(string), index: find!(index), value: find!(value) }, - &StringAppend { recv, other, state } => StringAppend { recv: find!(recv), other: find!(other), state: find!(state) }, - &StringAppendCodepoint { recv, other, state } => StringAppendCodepoint { recv: find!(recv), other: find!(other), state: find!(state) }, - &StringEqual { left, right } => StringEqual { left: find!(left), right: find!(right) }, - &ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state }, - &Test { val } => Test { val: find!(val) }, - &IsNil { val } => IsNil { val: find!(val) }, - &IsMethodCfunc { val, cd, cfunc, state } => IsMethodCfunc { val: find!(val), cd, cfunc, state }, - &IsBitEqual { left, right } => IsBitEqual { left: find!(left), right: find!(right) }, - &IsBitNotEqual { left, right } => IsBitNotEqual { left: find!(left), right: find!(right) }, - &BoxBool { val } => BoxBool { val: find!(val) }, - &BoxFixnum { val, state } => BoxFixnum { val: find!(val), state: find!(state) }, - &UnboxFixnum { val } => UnboxFixnum { val: find!(val) }, - &FixnumAref { recv, index } => FixnumAref { recv: find!(recv), index: find!(index) }, - Jump(target) => Jump(find_branch_edge!(target)), - &IfTrue { val, ref target } => IfTrue { val: find!(val), target: find_branch_edge!(target) }, - &IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) }, - &RefineType { val, new_type } => RefineType { val: find!(val), new_type }, - &HasType { val, expected } => HasType { val: find!(val), expected }, - &GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type, state }, - &GuardTypeNot { val, guard_type, state } => GuardTypeNot { val: find!(val), guard_type, state }, - &GuardBitEquals { val, expected, reason, state, recompile } => GuardBitEquals { val: find!(val), expected, reason, state, recompile }, - &GuardAnyBitSet { val, mask, mask_name, reason, state } => GuardAnyBitSet { val: find!(val), mask, mask_name, reason, state }, - &GuardNoBitsSet { val, mask, mask_name, reason, state } => GuardNoBitsSet { val: find!(val), mask, mask_name, reason, state }, - &GuardGreaterEq { left, right, reason, state } => GuardGreaterEq { left: find!(left), right: find!(right), reason, state }, - &GuardLess { left, right, state } => GuardLess { left: find!(left), right: find!(right), state }, - &IsBlockGiven { lep } => IsBlockGiven { lep: find!(lep) }, - &IsBlockParamModified { flags } => IsBlockParamModified { flags: find!(flags) }, - &GetBlockParam { level, ep_offset, state } => GetBlockParam { level, ep_offset, state: find!(state) }, - &FixnumAdd { left, right, state } => FixnumAdd { left: find!(left), right: find!(right), state }, - &FixnumSub { left, right, state } => FixnumSub { left: find!(left), right: find!(right), state }, - &FixnumMult { left, right, state } => FixnumMult { left: find!(left), right: find!(right), state }, - &FixnumDiv { left, right, state } => FixnumDiv { left: find!(left), right: find!(right), state }, - &FixnumMod { left, right, state } => FixnumMod { left: find!(left), right: find!(right), state }, - &FloatAdd { recv, other, state } => FloatAdd { recv: find!(recv), other: find!(other), state }, - &FloatSub { recv, other, state } => FloatSub { recv: find!(recv), other: find!(other), state }, - &FloatMul { recv, other, state } => FloatMul { recv: find!(recv), other: find!(other), state }, - &FloatDiv { recv, other, state } => FloatDiv { recv: find!(recv), other: find!(other), state }, - &FloatToInt { recv, state } => FloatToInt { recv: find!(recv), state }, - &FixnumNeq { left, right } => FixnumNeq { left: find!(left), right: find!(right) }, - &FixnumEq { left, right } => FixnumEq { left: find!(left), right: find!(right) }, - &FixnumGt { left, right } => FixnumGt { left: find!(left), right: find!(right) }, - &FixnumGe { left, right } => FixnumGe { left: find!(left), right: find!(right) }, - &FixnumLt { left, right } => FixnumLt { left: find!(left), right: find!(right) }, - &FixnumLe { left, right } => FixnumLe { left: find!(left), right: find!(right) }, - &FixnumAnd { left, right } => FixnumAnd { left: find!(left), right: find!(right) }, - &FixnumOr { left, right } => FixnumOr { left: find!(left), right: find!(right) }, - &FixnumXor { left, right } => FixnumXor { left: find!(left), right: find!(right) }, - &IntAnd { left, right } => IntAnd { left: find!(left), right: find!(right) }, - &IntOr { left, right } => IntOr { left: find!(left), right: find!(right) }, - &FixnumLShift { left, right, state } => FixnumLShift { left: find!(left), right: find!(right), state }, - &FixnumRShift { left, right } => FixnumRShift { left: find!(left), right: find!(right) }, - &ObjToString { val, cd, state } => ObjToString { - val: find!(val), - cd, - state, - }, - &AnyToString { val, str, state } => AnyToString { - val: find!(val), - str: find!(str), - state, - }, - &SendDirect { recv, cd, cme, iseq, ref args, kw_bits, block, state } => SendDirect { - recv: find!(recv), - cd, - cme, - iseq, - args: find_vec!(args), - kw_bits, - block, - state, - }, - &Send { recv, cd, block, ref args, state, reason } => Send { - recv: find!(recv), - cd, - block, - args: find_vec!(args), - state, - reason, - }, - &SendForward { recv, cd, blockiseq, ref args, state, reason } => SendForward { - recv: find!(recv), - cd, - blockiseq, - args: find_vec!(args), - state, - reason, - }, - &InvokeSuper { recv, cd, blockiseq, ref args, state, reason } => InvokeSuper { - recv: find!(recv), - cd, - blockiseq, - args: find_vec!(args), - state, - reason, - }, - &InvokeSuperForward { recv, cd, blockiseq, ref args, state, reason } => InvokeSuperForward { - recv: find!(recv), - cd, - blockiseq, - args: find_vec!(args), - state, - reason, - }, - &InvokeBlock { cd, ref args, state, reason } => InvokeBlock { - cd, - args: find_vec!(args), - state, - reason, - }, - &InvokeBlockIfunc { cd, block_handler, ref args, state } => InvokeBlockIfunc { - cd, - block_handler: find!(block_handler), - args: find_vec!(args), - state: find!(state), - }, - &InvokeProc { recv, ref args, state, kw_splat } => InvokeProc { - recv: find!(recv), - args: find_vec!(args), - state: find!(state), - kw_splat, - }, - &InvokeBuiltin { bf, recv, ref args, state, leaf, return_type } => InvokeBuiltin { bf, recv: find!(recv), args: find_vec!(args), state, leaf, return_type }, - &ArrayDup { val, state } => ArrayDup { val: find!(val), state }, - &HashDup { val, state } => HashDup { val: find!(val), state }, - &HashAref { hash, key, state } => HashAref { hash: find!(hash), key: find!(key), state }, - &HashAset { hash, key, val, state } => HashAset { hash: find!(hash), key: find!(key), val: find!(val), state }, - &ObjectAlloc { val, state } => ObjectAlloc { val: find!(val), state }, - &ObjectAllocClass { class, state } => ObjectAllocClass { class, state: find!(state) }, - &CCall { cfunc, recv, ref args, name, owner, return_type, elidable } => CCall { cfunc, recv: find!(recv), args: find_vec!(args), name, owner, return_type, elidable }, - &CCallWithFrame { cd, cfunc, recv, ref args, cme, name, state, return_type, elidable, block } => CCallWithFrame { - cd, - cfunc, - recv: find!(recv), - args: find_vec!(args), - cme, - name, - state: find!(state), - return_type, - elidable, - block, - }, - &CCallVariadic { cfunc, recv, ref args, cme, name, state, return_type, elidable, block } => CCallVariadic { - cfunc, recv: find!(recv), args: find_vec!(args), cme, name, state, return_type, elidable, block - }, - &CheckMatch { target, pattern, flag, state } => CheckMatch { target: find!(target), pattern: find!(pattern), flag, state: find!(state) }, - &Defined { op_type, obj, pushval, v, lep_level, state } => Defined { op_type, obj, pushval, v: find!(v), lep_level, state: find!(state) }, - &DefinedIvar { self_val, pushval, id, state } => DefinedIvar { self_val: find!(self_val), pushval, id, state }, - &GetConstant { klass, id, allow_nil, state } => GetConstant { klass: find!(klass), id, allow_nil: find!(allow_nil), state }, - &NewArray { ref elements, state } => NewArray { elements: find_vec!(elements), state: find!(state) }, - &NewHash { ref elements, state } => NewHash { elements: find_vec!(elements), state: find!(state) }, - &NewRange { low, high, flag, state } => NewRange { low: find!(low), high: find!(high), flag, state: find!(state) }, - &NewRangeFixnum { low, high, flag, state } => NewRangeFixnum { low: find!(low), high: find!(high), flag, state: find!(state) }, - &ArrayAref { array, index } => ArrayAref { array: find!(array), index: find!(index) }, - &ArrayAset { array, index, val } => ArrayAset { array: find!(array), index: find!(index), val: find!(val) }, - &ArrayPop { array, state } => ArrayPop { array: find!(array), state: find!(state) }, - &ArrayLength { array } => ArrayLength { array: find!(array) }, - &AdjustBounds { index, length } => AdjustBounds { index: find!(index), length: find!(length) }, - &ArrayMax { ref elements, state } => ArrayMax { elements: find_vec!(elements), state: find!(state) }, - &ArrayMin { ref elements, state } => ArrayMin { elements: find_vec!(elements), state: find!(state) }, - &ArrayInclude { ref elements, target, state } => ArrayInclude { elements: find_vec!(elements), target: find!(target), state: find!(state) }, - &ArrayPackBuffer { ref elements, fmt, ref buffer, state } => ArrayPackBuffer { elements: find_vec!(elements), fmt: find!(fmt), buffer: (*buffer).map(|buffer| find!(buffer)), state: find!(state) }, - &DupArrayInclude { ary, target, state } => DupArrayInclude { ary, target: find!(target), state: find!(state) }, - &ArrayHash { ref elements, state } => ArrayHash { elements: find_vec!(elements), state }, - &SetGlobal { id, val, state } => SetGlobal { id, val: find!(val), state }, - &GetIvar { self_val, id, ic, state } => GetIvar { self_val: find!(self_val), id, ic, state }, - &LoadField { recv, id, offset, return_type } => LoadField { recv: find!(recv), id, offset, return_type }, - &StoreField { recv, id, offset, val } => StoreField { recv: find!(recv), id, offset, val: find!(val) }, - &WriteBarrier { recv, val } => WriteBarrier { recv: find!(recv), val: find!(val) }, - &SetIvar { self_val, id, ic, val, state } => SetIvar { self_val: find!(self_val), id, ic, val: find!(val), state }, - &GetClassVar { id, ic, state } => GetClassVar { id, ic, state }, - &SetClassVar { id, val, ic, state } => SetClassVar { id, val: find!(val), ic, state }, - &SetLocal { val, ep_offset, level } => SetLocal { val: find!(val), ep_offset, level }, - &GetSpecialSymbol { symbol_type, state } => GetSpecialSymbol { symbol_type, state }, - &GetSpecialNumber { nth, state } => GetSpecialNumber { nth, state }, - &ToArray { val, state } => ToArray { val: find!(val), state }, - &ToNewArray { val, state } => ToNewArray { val: find!(val), state }, - &ArrayExtend { left, right, state } => ArrayExtend { left: find!(left), right: find!(right), state }, - &ArrayPush { array, val, state } => ArrayPush { array: find!(array), val: find!(val), state }, - &CheckInterrupts { state } => CheckInterrupts { state }, - &IsA { val, class } => IsA { val: find!(val), class: find!(class) }, - } + let mut result = self.insns[insn_id.0].clone(); + result.for_each_operand_mut(&mut |operand: &mut InsnId| { + *operand = find!(*operand); + }); + result } /// Update DynamicSendReason for the instruction at insn_id @@ -3068,7 +2843,7 @@ impl Function { | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } - | Insn::CheckInterrupts { .. } | Insn::BreakPoint + | Insn::CheckInterrupts { .. } | Insn::BreakPoint | Insn::Unreachable | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } => panic!("Cannot infer type of instruction with no output: {}. See Insn::has_output().", self.insns[insn.0]), Insn::Const { val: Const::Value(val) } => Type::from_value(*val), @@ -6041,7 +5816,7 @@ impl Function { | Insn::LoadSP | Insn::LoadEC | Insn::GetEP { .. } - | Insn::BreakPoint + | Insn::BreakPoint | Insn::Unreachable | Insn::LoadSelf | Insn::Snapshot { .. } | Insn::Jump { .. }