ruby · kddnewton · May 8, 2026 · May 8, 2026 · May 8, 2026
diff --git a/ext/prism/extension.c b/ext/prism/extension.c
@@ -793,7 +793,7 @@ parse_lex_input(const uint8_t *input, size_t input_length, const pm_options_t *o
     parse_lex_data_t parse_lex_data = {
         .source = source,
         .tokens = rb_ary_new(),
-        .encoding = rb_utf8_encoding(),
+        .encoding = rb_enc_find(pm_parser_encoding_name(parser)),
         .freeze = pm_options_freeze(options),
     };
 

diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
@@ -57,7 +57,8 @@ def self.parse(src, filename = "(ripper)", lineno = 1)
       #          [[1, 13], :on_kw,     "end", END      ]]
       #
       def self.lex(src, filename = "-", lineno = 1, raise_errors: false)
-        result = Prism.lex_compat(coerce_source(src), filepath: filename, line: lineno, version: "current")
+        coerced = coerce_source(src)
+        result = Prism.lex_compat(coerced, filepath: filename, line: lineno, version: "current", encoding: coerced.encoding)
 
         if result.failure? && raise_errors
           raise SyntaxError, result.errors.first.message
@@ -4077,7 +4078,7 @@ def visit_yield_node(node)
 
       # Lazily initialize the parse result.
       def result
-        @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true)
+        @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true, encoding: source.encoding)
       end
 
       def line_and_column_cache

diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb
@@ -47,6 +47,24 @@ def test_parse_lex_file
       end
     end
 
+    def test_lex_encoding
+      tokens = Prism.lex('"わたし"', encoding: Encoding::Windows_31J).value
+      tokens.each do |t|
+        assert_equal(Encoding::Windows_31J, t[0].value.encoding)
+      end
+
+      # Shebangs must appear on the first line. For these cases, the encoding
+      # comment may appear second, but it should still change encoding.
+      tokens = Prism.lex(<<~RUBY, encoding: Encoding::Windows_31J).value
+        #! /usr/bin/env ruby
+        # encoding: utf-8
+        "わたし"
+      RUBY
+      tokens.each do |t|
+        assert_equal(Encoding::UTF_8, t[0].value.encoding)
+      end
+    end
+
     if RUBY_VERSION >= "3.3"
       def test_lex_compat
         source = "foo bar"

diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb
@@ -224,6 +224,12 @@ def test_tokenize
       assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source))
     end
 
+    def test_encoding
+      source = '"わたし"'.encode(Encoding::Windows_31J)
+      assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source))
+      assert_equal(Ripper.sexp(source), Translation::Ripper.sexp(source))
+    end
+
     def test_sexp_coercion
       string_like = Object.new
       def string_like.to_str