From 2a29c8e02b7dc45096ca623f966c12b11628261c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 29 May 2024 10:12:51 -0400 Subject: [PATCH] Tests overhaul --- lib/prism/ffi.rb | 25 +- lib/prism/translation/ruby_parser.rb | 4 +- test/prism/{ => api}/command_line_test.rb | 10 +- test/prism/api/dump_test.rb | 56 ++ test/prism/{ => api}/parse_comments_test.rb | 14 +- test/prism/{ => api}/parse_stream_test.rb | 11 +- test/prism/api/parse_success_test.rb | 16 + test/prism/api/parse_test.rb | 66 ++ test/prism/bom_test.rb | 2 +- test/prism/encoding/encodings_test.rb | 101 +++ .../regular_expression_encoding_test.rb | 131 ++++ test/prism/encoding/string_encoding_test.rb | 136 +++++ test/prism/encoding/symbol_encoding_test.rb | 108 ++++ test/prism/encoding_test.rb | 577 ------------------ test/prism/errors_test.rb | 3 +- test/prism/fixtures_test.rb | 21 + test/prism/fuzzer_test.rb | 10 +- test/prism/heredoc_dedent_test.rb | 133 +++- test/prism/lex_test.rb | 90 +++ test/prism/library_symbols_test.rb | 2 - test/prism/locals_test.rb | 66 +- test/prism/magic_comment_test.rb | 121 +++- test/prism/newline_offsets_test.rb | 22 + test/prism/newline_test.rb | 27 +- test/prism/parse_test.rb | 371 ----------- test/prism/parser_test.rb | 186 ------ test/prism/regexp_test.rb | 6 +- .../{ => result}/attribute_write_test.rb | 10 +- test/prism/{ => result}/comments_test.rb | 2 +- .../{ => result}/constant_path_node_test.rb | 16 +- test/prism/result/equality_test.rb | 22 + test/prism/result/heredoc_test.rb | 19 + test/prism/{ => result}/index_write_test.rb | 2 +- test/prism/result/integer_base_flags_test.rb | 33 + test/prism/{ => result}/integer_parse_test.rb | 4 +- test/prism/result/numeric_value_test.rb | 21 + test/prism/result/overlap_test.rb | 43 ++ .../{ => result}/redundant_return_test.rb | 2 +- .../result/regular_expression_options_test.rb | 25 + .../source_location_test.rb} | 6 +- .../prism/{ => result}/static_inspect_test.rb | 2 +- .../{ => result}/static_literals_test.rb | 2 +- test/prism/{ => result}/warnings_test.rb | 3 +- test/prism/{ => ruby}/compiler_test.rb | 2 +- .../prism/{ => ruby}/desugar_compiler_test.rb | 2 +- test/prism/{ => ruby}/dispatcher_test.rb | 2 +- test/prism/ruby/location_test.rb | 173 ++++++ .../{ => ruby}/parameters_signature_test.rb | 20 +- test/prism/ruby/parser_test.rb | 288 +++++++++ test/prism/{ => ruby}/pattern_test.rb | 2 +- test/prism/{ => ruby}/reflection_test.rb | 2 +- test/prism/{ => ruby}/ripper_test.rb | 33 +- test/prism/ruby/ruby_parser_test.rb | 127 ++++ test/prism/ruby/tunnel_test.rb | 26 + test/prism/ruby_api_test.rb | 307 ---------- test/prism/ruby_parser_test.rb | 135 ---- test/prism/snapshots_test.rb | 73 +++ test/prism/snippets_test.rb | 42 ++ test/prism/test_helper.rb | 213 ++++++- test/prism/unescape_test.rb | 4 +- 60 files changed, 2201 insertions(+), 1777 deletions(-) rename test/prism/{ => api}/command_line_test.rb (91%) create mode 100644 test/prism/api/dump_test.rb rename test/prism/{ => api}/parse_comments_test.rb (54%) rename test/prism/{ => api}/parse_stream_test.rb (83%) create mode 100644 test/prism/api/parse_success_test.rb create mode 100644 test/prism/api/parse_test.rb create mode 100644 test/prism/encoding/encodings_test.rb create mode 100644 test/prism/encoding/regular_expression_encoding_test.rb create mode 100644 test/prism/encoding/string_encoding_test.rb create mode 100644 test/prism/encoding/symbol_encoding_test.rb delete mode 100644 test/prism/encoding_test.rb create mode 100644 test/prism/fixtures_test.rb create mode 100644 test/prism/lex_test.rb create mode 100644 test/prism/newline_offsets_test.rb delete mode 100644 test/prism/parse_test.rb delete mode 100644 test/prism/parser_test.rb rename test/prism/{ => result}/attribute_write_test.rb (86%) rename test/prism/{ => result}/comments_test.rb (99%) rename test/prism/{ => result}/constant_path_node_test.rb (78%) create mode 100644 test/prism/result/equality_test.rb create mode 100644 test/prism/result/heredoc_test.rb rename test/prism/{ => result}/index_write_test.rb (98%) create mode 100644 test/prism/result/integer_base_flags_test.rb rename test/prism/{ => result}/integer_parse_test.rb (90%) create mode 100644 test/prism/result/numeric_value_test.rb create mode 100644 test/prism/result/overlap_test.rb rename test/prism/{ => result}/redundant_return_test.rb (98%) create mode 100644 test/prism/result/regular_expression_options_test.rb rename test/prism/{location_test.rb => result/source_location_test.rb} (99%) rename test/prism/{ => result}/static_inspect_test.rb (98%) rename test/prism/{ => result}/static_literals_test.rb (98%) rename test/prism/{ => result}/warnings_test.rb (99%) rename test/prism/{ => ruby}/compiler_test.rb (94%) rename test/prism/{ => ruby}/desugar_compiler_test.rb (99%) rename test/prism/{ => ruby}/dispatcher_test.rb (97%) create mode 100644 test/prism/ruby/location_test.rb rename test/prism/{ => ruby}/parameters_signature_test.rb (85%) create mode 100644 test/prism/ruby/parser_test.rb rename test/prism/{ => ruby}/pattern_test.rb (98%) rename test/prism/{ => ruby}/reflection_test.rb (93%) rename test/prism/{ => ruby}/ripper_test.rb (66%) create mode 100644 test/prism/ruby/ruby_parser_test.rb create mode 100644 test/prism/ruby/tunnel_test.rb delete mode 100644 test/prism/ruby_api_test.rb delete mode 100644 test/prism/ruby_parser_test.rb create mode 100644 test/prism/snapshots_test.rb create mode 100644 test/prism/snippets_test.rb diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index df7e183310c..6b48af43ccd 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -200,8 +200,8 @@ def self.with_file(filepath) class << self # Mirror the Prism.dump API by using the serialization API. - def dump(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| dump_common(string, options) } + def dump(source, **options) + LibRubyParser::PrismString.with_string(source) { |string| dump_common(string, options) } end # Mirror the Prism.dump_file API by using the serialization API. @@ -302,6 +302,27 @@ def parse_file_failure?(filepath, **options) !parse_file_success?(filepath, **options) end + # Mirror the Prism.profile API by using the serialization API. + def profile(source, **options) + LibRubyParser::PrismString.with_string(source) do |string| + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + nil + end + end + end + + # Mirror the Prism.profile_file API by using the serialization API. + def profile_file(filepath, **options) + LibRubyParser::PrismString.with_file(filepath) do |string| + LibRubyParser::PrismBuffer.with do |buffer| + options[:filepath] = filepath + LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + nil + end + end + end + private def dump_common(string, options) # :nodoc: diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb index ec458a3b639..38690c54b3e 100644 --- a/lib/prism/translation/ruby_parser.rb +++ b/lib/prism/translation/ruby_parser.rb @@ -485,9 +485,9 @@ def visit_constant_path_or_write_node(node) def visit_constant_path_target_node(node) inner = if node.parent.nil? - s(node, :colon3, node.child.name) + s(node, :colon3, node.name) else - s(node, :colon2, visit(node.parent), node.child.name) + s(node, :colon2, visit(node.parent), node.name) end s(node, :const, inner) diff --git a/test/prism/command_line_test.rb b/test/prism/api/command_line_test.rb similarity index 91% rename from test/prism/command_line_test.rb rename to test/prism/api/command_line_test.rb index 4b04c36f3aa..a313845ead7 100644 --- a/test/prism/command_line_test.rb +++ b/test/prism/api/command_line_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class CommandLineTest < TestCase @@ -67,7 +67,7 @@ def test_command_line_e end def test_command_line_x_implicit - result = Prism.parse(<<~RUBY) + result = Prism.parse_statement(<<~RUBY) #!/bin/bash exit 1 @@ -75,18 +75,18 @@ def test_command_line_x_implicit 1 RUBY - assert_kind_of IntegerNode, result.value.statements.body.first + assert_kind_of IntegerNode, result end def test_command_line_x_explicit - result = Prism.parse(<<~RUBY, command_line: "x") + result = Prism.parse_statement(<<~RUBY, command_line: "x") exit 1 #!/usr/bin/env ruby 1 RUBY - assert_kind_of IntegerNode, result.value.statements.body.first + assert_kind_of IntegerNode, result end def test_command_line_x_implicit_fail diff --git a/test/prism/api/dump_test.rb b/test/prism/api/dump_test.rb new file mode 100644 index 00000000000..941088e1592 --- /dev/null +++ b/test/prism/api/dump_test.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +return if ENV["PRISM_BUILD_MINIMAL"] + +require_relative "../test_helper" + +module Prism + class DumpTest < TestCase + Fixture.each do |fixture| + define_method(fixture.test_name) { assert_dump(fixture) } + end + + def test_dump + filepath = __FILE__ + source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8) + + assert_equal Prism.lex(source, filepath: filepath).value, Prism.lex_file(filepath).value + assert_equal Prism.dump(source, filepath: filepath), Prism.dump_file(filepath) + + serialized = Prism.dump(source, filepath: filepath) + ast1 = Prism.load(source, serialized).value + ast2 = Prism.parse(source, filepath: filepath).value + ast3 = Prism.parse_file(filepath).value + + assert_equal_nodes ast1, ast2 + assert_equal_nodes ast2, ast3 + end + + def test_dump_file + assert_nothing_raised do + Prism.dump_file(__FILE__) + end + + error = assert_raise Errno::ENOENT do + Prism.dump_file("idontexist.rb") + end + + assert_equal "No such file or directory - idontexist.rb", error.message + + assert_raise TypeError do + Prism.dump_file(nil) + end + end + + private + + def assert_dump(fixture) + source = fixture.read + + result = Prism.parse(source, filepath: fixture.path) + dumped = Prism.dump(source, filepath: fixture.path) + + assert_equal_nodes(result.value, Prism.load(source, dumped).value) + end + end +end diff --git a/test/prism/parse_comments_test.rb b/test/prism/api/parse_comments_test.rb similarity index 54% rename from test/prism/parse_comments_test.rb rename to test/prism/api/parse_comments_test.rb index 30086e31552..4dbcca1827f 100644 --- a/test/prism/parse_comments_test.rb +++ b/test/prism/api/parse_comments_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class ParseCommentsTest < TestCase @@ -17,5 +17,17 @@ def test_parse_file_comments assert_kind_of Array, comments assert_equal 1, comments.length end + + def test_parse_file_comments_error + error = assert_raise Errno::ENOENT do + Prism.parse_file_comments("idontexist.rb") + end + + assert_equal "No such file or directory - idontexist.rb", error.message + + assert_raise TypeError do + Prism.parse_file_comments(nil) + end + end end end diff --git a/test/prism/parse_stream_test.rb b/test/prism/api/parse_stream_test.rb similarity index 83% rename from test/prism/parse_stream_test.rb rename to test/prism/api/parse_stream_test.rb index 9e6347b92b0..0edee74cc22 100644 --- a/test/prism/parse_stream_test.rb +++ b/test/prism/api/parse_stream_test.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" -require "stringio" +require_relative "../test_helper" module Prism class ParseStreamTest < TestCase @@ -10,7 +9,7 @@ def test_single_line result = Prism.parse_stream(io) assert result.success? - assert_kind_of Prism::CallNode, result.value.statements.body.first + assert_kind_of Prism::CallNode, result.statement end def test_multi_line @@ -18,8 +17,8 @@ def test_multi_line result = Prism.parse_stream(io) assert result.success? - assert_kind_of Prism::CallNode, result.value.statements.body.first - assert_kind_of Prism::CallNode, result.value.statements.body.last + assert_kind_of Prism::CallNode, result.statement + assert_kind_of Prism::CallNode, result.statement end def test_multi_read @@ -27,7 +26,7 @@ def test_multi_read result = Prism.parse_stream(io) assert result.success? - assert_kind_of Prism::CallNode, result.value.statements.body.first + assert_kind_of Prism::CallNode, result.statement end def test___END__ diff --git a/test/prism/api/parse_success_test.rb b/test/prism/api/parse_success_test.rb new file mode 100644 index 00000000000..2caaa5136e2 --- /dev/null +++ b/test/prism/api/parse_success_test.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class ParseSuccessTest < TestCase + def test_parse_success? + assert Prism.parse_success?("1") + refute Prism.parse_success?("<>") + end + + def test_parse_file_success? + assert Prism.parse_file_success?(__FILE__) + end + end +end diff --git a/test/prism/api/parse_test.rb b/test/prism/api/parse_test.rb new file mode 100644 index 00000000000..864d38461ad --- /dev/null +++ b/test/prism/api/parse_test.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class ParseTest < TestCase + def test_parse_empty_string + result = Prism.parse("") + assert_equal [], result.value.statements.body + end + + def test_parse_takes_file_path + filepath = "filepath.rb" + result = Prism.parse("def foo; __FILE__; end", filepath: filepath) + + assert_equal filepath, find_source_file_node(result.value).filepath + end + + def test_parse_takes_line + line = 4 + result = Prism.parse("def foo\n __FILE__\nend", line: line) + + assert_equal line, result.value.location.start_line + assert_equal line + 1, find_source_file_node(result.value).location.start_line + + result = Prism.parse_lex("def foo\n __FILE__\nend", line: line) + assert_equal line, result.value.first.location.start_line + end + + def test_parse_takes_negative_lines + line = -2 + result = Prism.parse("def foo\n __FILE__\nend", line: line) + + assert_equal line, result.value.location.start_line + assert_equal line + 1, find_source_file_node(result.value).location.start_line + + result = Prism.parse_lex("def foo\n __FILE__\nend", line: line) + assert_equal line, result.value.first.location.start_line + end + + def test_parse_file + node = Prism.parse_file(__FILE__).value + assert_kind_of ProgramNode, node + + error = assert_raise Errno::ENOENT do + Prism.parse_file("idontexist.rb") + end + + assert_equal "No such file or directory - idontexist.rb", error.message + + assert_raise TypeError do + Prism.parse_file(nil) + end + end + + private + + def find_source_file_node(program) + queue = [program] + while (node = queue.shift) + return node if node.is_a?(SourceFileNode) + queue.concat(node.compact_child_nodes) + end + end + end +end diff --git a/test/prism/bom_test.rb b/test/prism/bom_test.rb index 1525caf458d..890bc4b36c3 100644 --- a/test/prism/bom_test.rb +++ b/test/prism/bom_test.rb @@ -2,7 +2,7 @@ # Don't bother checking this on these engines, this is such a specific Ripper # test. -return if RUBY_ENGINE == "jruby" || RUBY_ENGINE == "truffleruby" +return if RUBY_ENGINE != "ruby" require_relative "test_helper" diff --git a/test/prism/encoding/encodings_test.rb b/test/prism/encoding/encodings_test.rb new file mode 100644 index 00000000000..4ad2b465cc1 --- /dev/null +++ b/test/prism/encoding/encodings_test.rb @@ -0,0 +1,101 @@ +# frozen_string_literal: true + +return if RUBY_ENGINE != "ruby" + +require_relative "../test_helper" + +module Prism + class EncodingsTest < TestCase + class ConstantContext < BasicObject + def self.const_missing(const) + const + end + end + + class IdentifierContext < BasicObject + def method_missing(name, *) + name + end + end + + # These test that we're correctly parsing codepoints for each alias of each + # encoding that prism supports. + each_encoding do |encoding, range| + (encoding.names - %w[external internal filesystem locale]).each do |name| + define_method(:"test_encoding_#{name}") do + assert_encoding(encoding, name, range) + end + end + end + + private + + def assert_encoding_constant(name, character) + source = "# encoding: #{name}\n#{character}" + expected = ConstantContext.new.instance_eval(source) + + result = Prism.parse(source) + assert result.success? + + actual = result.value.statements.body.last + assert_kind_of ConstantReadNode, actual + assert_equal expected, actual.name + end + + def assert_encoding_identifier(name, character) + source = "# encoding: #{name}\n#{character}" + expected = IdentifierContext.new.instance_eval(source) + + result = Prism.parse(source) + assert result.success? + + actual = result.value.statements.body.last + assert_kind_of CallNode, actual + assert_equal expected, actual.name + end + + # Check that we can properly parse every codepoint in the given encoding. + def assert_encoding(encoding, name, range) + # I'm not entirely sure, but I believe these codepoints are incorrect in + # their parsing in CRuby. They all report as matching `[[:lower:]]` but + # then they are parsed as constants. This is because CRuby determines if + # an identifier is a constant or not by case folding it down to lowercase + # and checking if there is a difference. And even though they report + # themselves as lowercase, their case fold is different. I have reported + # this bug upstream. + case encoding + when Encoding::UTF_8, Encoding::UTF_8_MAC, Encoding::UTF8_DoCoMo, Encoding::UTF8_KDDI, Encoding::UTF8_SoftBank, Encoding::CESU_8 + range = range.to_a - [ + 0x01c5, 0x01c8, 0x01cb, 0x01f2, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, + 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, + 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, + 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fbc, 0x1fcc, 0x1ffc, + ] + when Encoding::Windows_1253 + range = range.to_a - [0xb5] + end + + range.each do |codepoint| + character = codepoint.chr(encoding) + + if character.match?(/[[:alpha:]]/) + if character.match?(/[[:upper:]]/) + assert_encoding_constant(name, character) + else + assert_encoding_identifier(name, character) + end + elsif character.match?(/[[:alnum:]]/) + assert_encoding_identifier(name, "_#{character}") + else + next if ["/", "{"].include?(character) + + source = "# encoding: #{name}\n/(?##{character})/\n" + assert Prism.parse_success?(source), "Expected #{source.inspect} to parse successfully." + end + rescue RangeError + source = "# encoding: #{name}\n\\x#{codepoint.to_s(16)}" + assert Prism.parse_failure?(source) + end + end + end +end diff --git a/test/prism/encoding/regular_expression_encoding_test.rb b/test/prism/encoding/regular_expression_encoding_test.rb new file mode 100644 index 00000000000..5d062fe59a2 --- /dev/null +++ b/test/prism/encoding/regular_expression_encoding_test.rb @@ -0,0 +1,131 @@ +# frozen_string_literal: true + +return unless defined?(RubyVM::InstructionSequence) +return if RubyVM::InstructionSequence.compile("").to_a[4][:parser] == :prism + +require_relative "../test_helper" + +module Prism + class RegularExpressionEncodingTest < TestCase + each_encoding do |encoding, _| + define_method(:"test_regular_expression_encoding_flags_#{encoding.name}") do + assert_regular_expression_encoding_flags(encoding, ["/a/", "/ą/", "//"]) + end + + escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"] + escapes = escapes.concat(escapes.product(escapes).map(&:join)) + + define_method(:"test_regular_expression_escape_encoding_flags_#{encoding.name}") do + assert_regular_expression_encoding_flags(encoding, escapes.map { |e| "/#{e}/" }) + end + + ["n", "u", "e", "s"].each do |modifier| + define_method(:"test_regular_expression_encoding_modifiers_/#{modifier}_#{encoding.name}") do + regexp_sources = ["abc", "garçon", "\\x80", "gar\\xC3\\xA7on", "gar\\u{E7}on", "abc\\u{FFFFFF}", "\\x80\\u{80}" ] + + assert_regular_expression_encoding_flags( + encoding, + regexp_sources.product(["n", "u", "e", "s"]).map { |r, modifier| "/#{r}/#{modifier}" } + ) + end + end + end + + private + + def assert_regular_expression_encoding_flags(encoding, regexps) + regexps.each do |regexp| + regexp_modifier_used = regexp.end_with?("/u") || regexp.end_with?("/e") || regexp.end_with?("/s") || regexp.end_with?("/n") + source = "# encoding: #{encoding.name}\n#{regexp}" + + encoding_errors = ["invalid multibyte char", "escaped non ASCII character in UTF-8 regexp", "differs from source encoding"] + skipped_errors = ["invalid multibyte escape", "incompatible character encoding", "UTF-8 character in non UTF-8 regexp", "invalid Unicode range", "invalid Unicode list"] + + # TODO (nirvdrum 21-Feb-2024): Prism currently does not handle Regexp validation unless modifiers are used. So, skip processing those errors for now: https://github.com/ruby/prism/issues/2104 + unless regexp_modifier_used + skipped_errors += encoding_errors + encoding_errors.clear + end + + expected = + begin + eval(source).encoding + rescue SyntaxError => error + if encoding_errors.find { |e| error.message.include?(e) } + error.message.split("\n").map { |m| m[/: (.+?)$/, 1] } + elsif skipped_errors.find { |e| error.message.include?(e) } + next + else + raise + end + end + + actual = + Prism.parse(source).then do |result| + if result.success? + regexp = result.statement + + actual_encoding = if regexp.forced_utf8_encoding? + Encoding::UTF_8 + elsif regexp.forced_binary_encoding? + Encoding::ASCII_8BIT + elsif regexp.forced_us_ascii_encoding? + Encoding::US_ASCII + elsif regexp.ascii_8bit? + Encoding::ASCII_8BIT + elsif regexp.utf_8? + Encoding::UTF_8 + elsif regexp.euc_jp? + Encoding::EUC_JP + elsif regexp.windows_31j? + Encoding::Windows_31J + else + encoding + end + + if regexp.utf_8? && actual_encoding != Encoding::UTF_8 + raise "expected regexp encoding to be UTF-8 due to '/u' modifier, but got #{actual_encoding.name}" + elsif regexp.ascii_8bit? && (actual_encoding != Encoding::ASCII_8BIT && actual_encoding != Encoding::US_ASCII) + raise "expected regexp encoding to be ASCII-8BIT or US-ASCII due to '/n' modifier, but got #{actual_encoding.name}" + elsif regexp.euc_jp? && actual_encoding != Encoding::EUC_JP + raise "expected regexp encoding to be EUC-JP due to '/e' modifier, but got #{actual_encoding.name}" + elsif regexp.windows_31j? && actual_encoding != Encoding::Windows_31J + raise "expected regexp encoding to be Windows-31J due to '/s' modifier, but got #{actual_encoding.name}" + end + + if regexp.utf_8? && regexp.forced_utf8_encoding? + raise "the forced_utf8 flag should not be set when the UTF-8 modifier (/u) is used" + elsif regexp.ascii_8bit? && regexp.forced_binary_encoding? + raise "the forced_ascii_8bit flag should not be set when the UTF-8 modifier (/u) is used" + end + + actual_encoding + else + errors = result.errors.map(&:message) + + if errors.last&.include?("UTF-8 mixed within") + nil + else + errors + end + end + end + + # TODO (nirvdrum 22-Feb-2024): Remove this workaround once Prism better maps CRuby's error messages. + # This class of error message is tricky. The part not being compared is a representation of the regexp. + # Depending on the source encoding and any encoding modifiers being used, CRuby alters how the regexp is represented. + # Sometimes it's an MBC string. Other times it uses hexadecimal character escapes. And in other cases it uses + # the long-form Unicode escape sequences. This short-circuit checks that the error message is mostly correct. + if expected.is_a?(Array) && actual.is_a?(Array) + if expected.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:") && + actual.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:") + expected.last.clear + actual.last.clear + end + end + + assert_equal expected, actual + end + end + end +end diff --git a/test/prism/encoding/string_encoding_test.rb b/test/prism/encoding/string_encoding_test.rb new file mode 100644 index 00000000000..6f9d86df3be --- /dev/null +++ b/test/prism/encoding/string_encoding_test.rb @@ -0,0 +1,136 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class StringEncodingTest < TestCase + each_encoding do |encoding, _| + define_method(:"test_#{encoding.name}") do + assert_encoding(encoding) + end + end + + def test_coding + actual = Prism.parse_statement("# coding: utf-8\n'string'").unescaped.encoding + assert_equal Encoding::UTF_8, actual + end + + def test_coding_with_whitespace + actual = Prism.parse_statement("# coding \t \r \v : \t \v \r ascii-8bit \n'string'").unescaped.encoding + assert_equal Encoding::ASCII_8BIT, actual + end + + def test_emacs_style + actual = Prism.parse_statement("# -*- coding: utf-8 -*-\n'string'").unescaped.encoding + assert_equal Encoding::UTF_8, actual + end + + def test_utf_8_unix + actual = Prism.parse_statement("# coding: utf-8-unix\n'string'").unescaped.encoding + assert_equal Encoding::UTF_8, actual + end + + def test_utf_8_dos + actual = Prism.parse_statement("# coding: utf-8-dos\n'string'").unescaped.encoding + assert_equal Encoding::UTF_8, actual + end + + def test_utf_8_mac + actual = Prism.parse_statement("# coding: utf-8-mac\n'string'").unescaped.encoding + assert_equal Encoding::UTF_8, actual + end + + def test_utf_8_star + actual = Prism.parse_statement("# coding: utf-8-*\n'string'").unescaped.encoding + assert_equal Encoding::UTF_8, actual + end + + def test_first_lexed_token + encoding = Prism.lex("# encoding: ascii-8bit").value[0][0].value.encoding + assert_equal Encoding::ASCII_8BIT, encoding + end + + if !ENV["PRISM_BUILD_MINIMAL"] + # This test may be a little confusing. Basically when we use our strpbrk, + # it takes into account the encoding of the file. + def test_strpbrk_multibyte + result = Prism.parse(<<~RUBY) + # encoding: Shift_JIS + %w[\x81\x5c] + RUBY + + assert(result.errors.empty?) + assert_equal( + (+"\x81\x5c").force_encoding(Encoding::Shift_JIS), + result.statement.elements.first.unescaped + ) + end + + def test_slice_encoding + slice = Prism.parse("# encoding: Shift_JIS\nア").value.slice + assert_equal (+"ア").force_encoding(Encoding::SHIFT_JIS), slice + assert_equal Encoding::SHIFT_JIS, slice.encoding + end + + def test_multibyte_escapes + [ + ["'", "'"], + ["\"", "\""], + ["`", "`"], + ["/", "/"], + ["<<'HERE'\n", "\nHERE"], + ["<<-HERE\n", "\nHERE"] + ].each do |opening, closing| + assert Prism.parse_success?("# encoding: shift_jis\n'\\\x82\xA0'\n") + end + end + end + + private + + def assert_encoding(encoding) + escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"] + escapes = escapes.concat(escapes.product(escapes).map(&:join)) + + escapes.each do |escaped| + source = "# encoding: #{encoding.name}\n\"#{escaped}\"" + + expected = + begin + eval(source).encoding + rescue SyntaxError => error + if error.message.include?("UTF-8 mixed within") + error.message[/UTF-8 mixed within .+? source/] + else + raise + end + end + + actual = + Prism.parse(source).then do |result| + if result.success? + string = result.statement + + if string.forced_utf8_encoding? + Encoding::UTF_8 + elsif string.forced_binary_encoding? + Encoding::ASCII_8BIT + else + encoding + end + else + error = result.errors.first + + if error.message.include?("mixed") + error.message + else + raise error.message + end + end + end + + assert_equal expected, actual + end + end + end +end diff --git a/test/prism/encoding/symbol_encoding_test.rb b/test/prism/encoding/symbol_encoding_test.rb new file mode 100644 index 00000000000..20c998a58bb --- /dev/null +++ b/test/prism/encoding/symbol_encoding_test.rb @@ -0,0 +1,108 @@ +# frozen_string_literal: true + +return if RUBY_ENGINE != "ruby" + +require_relative "../test_helper" + +module Prism + class SymbolEncodingTest < TestCase + each_encoding do |encoding, _| + define_method(:"test_symbols_#{encoding.name}") do + assert_symbols(encoding) + end + + define_method(:"test_escapes_#{encoding.name}") do + assert_escapes(encoding) + end + end + + private + + def expected_encoding(source) + eval(source).encoding + end + + def actual_encoding(source, encoding) + result = Prism.parse(source) + + if result.success? + symbol = result.statement + + if symbol.forced_utf8_encoding? + Encoding::UTF_8 + elsif symbol.forced_binary_encoding? + Encoding::ASCII_8BIT + elsif symbol.forced_us_ascii_encoding? + Encoding::US_ASCII + else + encoding + end + else + raise SyntaxError.new(result.errors.map(&:message).join("\n")) + end + end + + def assert_symbols(encoding) + [:a, :ą, :+].each do |symbol| + source = "# encoding: #{encoding.name}\n#{symbol.inspect}" + + expected = + begin + expected_encoding(source) + rescue SyntaxError => error + if error.message.include?("invalid multibyte") + "invalid multibyte" + else + raise + end + end + + actual = + begin + actual_encoding(source, encoding) + rescue SyntaxError => error + if error.message.include?("invalid multibyte") + "invalid multibyte" + else + raise + end + end + + assert_equal expected, actual + end + end + + def assert_escapes(encoding) + escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"] + escapes = escapes.concat(escapes.product(escapes).map(&:join)) + + escapes.each do |escaped| + source = "# encoding: #{encoding.name}\n:\"#{escaped}\"" + + expected = + begin + expected_encoding(source) + rescue SyntaxError => error + if error.message.include?("UTF-8 mixed within") + error.message[/UTF-8 mixed within .+? source/] + else + raise + end + end + + actual = + begin + actual_encoding(source, encoding) + rescue SyntaxError => error + if error.message.include?("mixed") + error.message.split("\n", 2).first + else + raise + end + end + + assert_equal expected, actual + end + end + end +end diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb deleted file mode 100644 index 2aee473ddf9..00000000000 --- a/test/prism/encoding_test.rb +++ /dev/null @@ -1,577 +0,0 @@ -# frozen_string_literal: true - -return if RUBY_ENGINE != "ruby" - -require_relative "test_helper" - -module Prism - class EncodingTest < TestCase - codepoints_1byte = 0...0x100 - encodings = { - Encoding::ASCII_8BIT => codepoints_1byte, - Encoding::US_ASCII => codepoints_1byte - } - - if !ENV["PRISM_BUILD_MINIMAL"] - encodings[Encoding::Windows_1253] = codepoints_1byte - end - - # By default we don't test every codepoint in these encodings because it - # takes a very long time. - if ENV["PRISM_TEST_ALL_ENCODINGS"] - codepoints_2bytes = 0...0x10000 - codepoints_unicode = (0...0x110000) - - codepoints_eucjp = [ - *(0...0x10000), - *(0...0x10000).map { |bytes| bytes | 0x8F0000 } - ] - - codepoints_emacs_mule = [ - *(0...0x80), - *((0x81...0x90).flat_map { |byte1| (0x90...0x100).map { |byte2| byte1 << 8 | byte2 } }), - *((0x90...0x9C).flat_map { |byte1| (0xA0...0x100).flat_map { |byte2| (0xA0...0x100).flat_map { |byte3| byte1 << 16 | byte2 << 8 | byte3 } } }), - *((0xF0...0xF5).flat_map { |byte2| (0xA0...0x100).flat_map { |byte3| (0xA0...0x100).flat_map { |byte4| 0x9C << 24 | byte3 << 16 | byte3 << 8 | byte4 } } }), - ] - - codepoints_gb18030 = [ - *(0...0x80), - *((0x81..0xFE).flat_map { |byte1| (0x40...0x100).map { |byte2| byte1 << 8 | byte2 } }), - *((0x81..0xFE).flat_map { |byte1| (0x30...0x40).flat_map { |byte2| (0x81..0xFE).flat_map { |byte3| (0x2F...0x41).map { |byte4| byte1 << 24 | byte2 << 16 | byte3 << 8 | byte4 } } } }), - ] - - codepoints_euc_tw = [ - *(0..0x7F), - *(0xA1..0xFF).flat_map { |byte1| (0xA1..0xFF).map { |byte2| (byte1 << 8) | byte2 } }, - *(0xA1..0xB0).flat_map { |byte2| (0xA1..0xFF).flat_map { |byte3| (0xA1..0xFF).flat_map { |byte4| 0x8E << 24 | byte2 << 16 | byte3 << 8 | byte4 } } } - ] - - encodings.merge!( - Encoding::CP850 => codepoints_1byte, - Encoding::CP852 => codepoints_1byte, - Encoding::CP855 => codepoints_1byte, - Encoding::GB1988 => codepoints_1byte, - Encoding::IBM437 => codepoints_1byte, - Encoding::IBM720 => codepoints_1byte, - Encoding::IBM737 => codepoints_1byte, - Encoding::IBM775 => codepoints_1byte, - Encoding::IBM852 => codepoints_1byte, - Encoding::IBM855 => codepoints_1byte, - Encoding::IBM857 => codepoints_1byte, - Encoding::IBM860 => codepoints_1byte, - Encoding::IBM861 => codepoints_1byte, - Encoding::IBM862 => codepoints_1byte, - Encoding::IBM863 => codepoints_1byte, - Encoding::IBM864 => codepoints_1byte, - Encoding::IBM865 => codepoints_1byte, - Encoding::IBM866 => codepoints_1byte, - Encoding::IBM869 => codepoints_1byte, - Encoding::ISO_8859_1 => codepoints_1byte, - Encoding::ISO_8859_2 => codepoints_1byte, - Encoding::ISO_8859_3 => codepoints_1byte, - Encoding::ISO_8859_4 => codepoints_1byte, - Encoding::ISO_8859_5 => codepoints_1byte, - Encoding::ISO_8859_6 => codepoints_1byte, - Encoding::ISO_8859_7 => codepoints_1byte, - Encoding::ISO_8859_8 => codepoints_1byte, - Encoding::ISO_8859_9 => codepoints_1byte, - Encoding::ISO_8859_10 => codepoints_1byte, - Encoding::ISO_8859_11 => codepoints_1byte, - Encoding::ISO_8859_13 => codepoints_1byte, - Encoding::ISO_8859_14 => codepoints_1byte, - Encoding::ISO_8859_15 => codepoints_1byte, - Encoding::ISO_8859_16 => codepoints_1byte, - Encoding::KOI8_R => codepoints_1byte, - Encoding::KOI8_U => codepoints_1byte, - Encoding::MACCENTEURO => codepoints_1byte, - Encoding::MACCROATIAN => codepoints_1byte, - Encoding::MACCYRILLIC => codepoints_1byte, - Encoding::MACGREEK => codepoints_1byte, - Encoding::MACICELAND => codepoints_1byte, - Encoding::MACROMAN => codepoints_1byte, - Encoding::MACROMANIA => codepoints_1byte, - Encoding::MACTHAI => codepoints_1byte, - Encoding::MACTURKISH => codepoints_1byte, - Encoding::MACUKRAINE => codepoints_1byte, - Encoding::TIS_620 => codepoints_1byte, - Encoding::Windows_1250 => codepoints_1byte, - Encoding::Windows_1251 => codepoints_1byte, - Encoding::Windows_1252 => codepoints_1byte, - Encoding::Windows_1254 => codepoints_1byte, - Encoding::Windows_1255 => codepoints_1byte, - Encoding::Windows_1256 => codepoints_1byte, - Encoding::Windows_1257 => codepoints_1byte, - Encoding::Windows_1258 => codepoints_1byte, - Encoding::Windows_874 => codepoints_1byte, - Encoding::Big5 => codepoints_2bytes, - Encoding::Big5_HKSCS => codepoints_2bytes, - Encoding::Big5_UAO => codepoints_2bytes, - Encoding::CP949 => codepoints_2bytes, - Encoding::CP950 => codepoints_2bytes, - Encoding::CP951 => codepoints_2bytes, - Encoding::EUC_KR => codepoints_2bytes, - Encoding::GBK => codepoints_2bytes, - Encoding::GB12345 => codepoints_2bytes, - Encoding::GB2312 => codepoints_2bytes, - Encoding::MACJAPANESE => codepoints_2bytes, - Encoding::Shift_JIS => codepoints_2bytes, - Encoding::SJIS_DoCoMo => codepoints_2bytes, - Encoding::SJIS_KDDI => codepoints_2bytes, - Encoding::SJIS_SoftBank => codepoints_2bytes, - Encoding::Windows_31J => codepoints_2bytes, - Encoding::UTF_8 => codepoints_unicode, - Encoding::UTF8_MAC => codepoints_unicode, - Encoding::UTF8_DoCoMo => codepoints_unicode, - Encoding::UTF8_KDDI => codepoints_unicode, - Encoding::UTF8_SoftBank => codepoints_unicode, - Encoding::CESU_8 => codepoints_unicode, - Encoding::CP51932 => codepoints_eucjp, - Encoding::EUC_JP => codepoints_eucjp, - Encoding::EUCJP_MS => codepoints_eucjp, - Encoding::EUC_JIS_2004 => codepoints_eucjp, - Encoding::EMACS_MULE => codepoints_emacs_mule, - Encoding::STATELESS_ISO_2022_JP => codepoints_emacs_mule, - Encoding::STATELESS_ISO_2022_JP_KDDI => codepoints_emacs_mule, - Encoding::GB18030 => codepoints_gb18030, - Encoding::EUC_TW => codepoints_euc_tw - ) - end - - # These test that we're correctly parsing codepoints for each alias of each - # encoding that prism supports. - encodings.each do |encoding, range| - (encoding.names - %w[external internal filesystem locale]).each do |name| - define_method(:"test_encoding_#{name}") do - assert_encoding(encoding, name, range) - end - end - end - - # These test that we're correctly setting the flags on strings for each - # encoding that prism supports. - escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"] - escapes = escapes.concat(escapes.product(escapes).map(&:join)) - symbols = [:a, :ą, :+] - regexps = [/a/, /ą/, //] - - encodings.each_key do |encoding| - define_method(:"test_encoding_flags_#{encoding.name}") do - assert_encoding_flags(encoding, escapes) - end - - define_method(:"test_symbol_encoding_flags_#{encoding.name}") do - assert_symbol_encoding_flags(encoding, symbols) - end - - define_method(:"test_symbol_character_escape_encoding_flags_#{encoding.name}") do - assert_symbol_character_escape_encoding_flags(encoding, escapes) - end - - define_method(:"test_regular_expression_encoding_flags_#{encoding.name}") do - assert_regular_expression_encoding_flags(encoding, regexps.map(&:inspect)) - end - - define_method(:"test_regular_expression_escape_encoding_flags_#{encoding.name}") do - assert_regular_expression_encoding_flags(encoding, escapes.map { |e| "/#{e}/" }) - end - end - - encoding_modifiers = { ascii_8bit: "n", utf_8: "u", euc_jp: "e", windows_31j: "s" } - regexp_sources = ["abc", "garçon", "\\x80", "gar\\xC3\\xA7on", "gar\\u{E7}on", "abc\\u{FFFFFF}", "\\x80\\u{80}" ] - - encoding_modifiers.each_value do |modifier| - encodings.each_key do |encoding| - define_method(:"test_regular_expression_encoding_modifiers_/#{modifier}_#{encoding.name}") do - assert_regular_expression_encoding_flags( - encoding, - regexp_sources.product(encoding_modifiers.values).map { |r, modifier| "/#{r}/#{modifier}" } - ) - end - end - end - - def test_coding - result = Prism.parse("# coding: utf-8\n'string'") - actual = result.value.statements.body.first.unescaped.encoding - assert_equal Encoding.find("utf-8"), actual - end - - def test_coding_with_whitespace - result = Prism.parse("# coding \t \r \v : \t \v \r ascii-8bit \n'string'") - actual = result.value.statements.body.first.unescaped.encoding - assert_equal Encoding.find("ascii-8bit"), actual - end - - def test_emacs_style - result = Prism.parse("# -*- coding: utf-8 -*-\n'string'") - actual = result.value.statements.body.first.unescaped.encoding - assert_equal Encoding.find("utf-8"), actual - end - - def test_utf_8_variations - %w[ - utf-8-unix - utf-8-dos - utf-8-mac - utf-8-* - ].each do |encoding| - result = Prism.parse("# coding: #{encoding}\n'string'") - actual = result.value.statements.body.first.unescaped.encoding - assert_equal Encoding.find("utf-8"), actual - end - end - - def test_first_lexed_token - encoding = Prism.lex("# encoding: ascii-8bit").value[0][0].value.encoding - assert_equal Encoding.find("ascii-8bit"), encoding - end - - if !ENV["PRISM_BUILD_MINIMAL"] - # This test may be a little confusing. Basically when we use our strpbrk, - # it takes into account the encoding of the file. - def test_strpbrk_multibyte - result = Prism.parse(<<~RUBY) - # encoding: Shift_JIS - %w[\x81\x5c] - RUBY - - assert(result.errors.empty?) - assert_equal( - (+"\x81\x5c").force_encoding(Encoding::Shift_JIS), - result.value.statements.body.first.elements.first.unescaped - ) - end - - def test_slice_encoding - slice = Prism.parse("# encoding: Shift_JIS\nア").value.slice - assert_equal (+"ア").force_encoding(Encoding::SHIFT_JIS), slice - assert_equal Encoding::SHIFT_JIS, slice.encoding - end - - def test_multibyte_escapes - [ - ["'", "'"], - ["\"", "\""], - ["`", "`"], - ["/", "/"], - ["<<'HERE'\n", "\nHERE"], - ["<<-HERE\n", "\nHERE"] - ].each do |opening, closing| - assert Prism.parse_success?("# encoding: shift_jis\n'\\\x82\xA0'\n") - end - end - end - - private - - class ConstantContext < BasicObject - def self.const_missing(const) - const - end - end - - def constant_context - ConstantContext.new - end - - class IdentifierContext < BasicObject - def method_missing(name, *) - name - end - end - - def identifier_context - IdentifierContext.new - end - - def assert_encoding_constant(name, character) - source = "# encoding: #{name}\n#{character}" - expected = constant_context.instance_eval(source) - - result = Prism.parse(source) - assert result.success? - - actual = result.value.statements.body.last - assert_kind_of ConstantReadNode, actual - assert_equal expected, actual.name - end - - def assert_encoding_identifier(name, character) - source = "# encoding: #{name}\n#{character}" - expected = identifier_context.instance_eval(source) - - result = Prism.parse(source) - assert result.success? - - actual = result.value.statements.body.last - assert_kind_of CallNode, actual - assert_equal expected, actual.name - end - - # Check that we can properly parse every codepoint in the given encoding. - def assert_encoding(encoding, name, range) - # I'm not entirely sure, but I believe these codepoints are incorrect in - # their parsing in CRuby. They all report as matching `[[:lower:]]` but - # then they are parsed as constants. This is because CRuby determines if - # an identifier is a constant or not by case folding it down to lowercase - # and checking if there is a difference. And even though they report - # themselves as lowercase, their case fold is different. I have reported - # this bug upstream. - case encoding - when Encoding::UTF_8, Encoding::UTF_8_MAC, Encoding::UTF8_DoCoMo, Encoding::UTF8_KDDI, Encoding::UTF8_SoftBank, Encoding::CESU_8 - range = range.to_a - [ - 0x01c5, 0x01c8, 0x01cb, 0x01f2, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, - 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, - 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, - 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fbc, 0x1fcc, 0x1ffc, - ] - when Encoding::Windows_1253 - range = range.to_a - [0xb5] - end - - range.each do |codepoint| - character = codepoint.chr(encoding) - - if character.match?(/[[:alpha:]]/) - if character.match?(/[[:upper:]]/) - assert_encoding_constant(name, character) - else - assert_encoding_identifier(name, character) - end - elsif character.match?(/[[:alnum:]]/) - assert_encoding_identifier(name, "_#{character}") - else - next if ["/", "{"].include?(character) - - source = "# encoding: #{name}\n/(?##{character})/\n" - assert Prism.parse(source).success?, "Expected #{source.inspect} to parse successfully." - end - rescue RangeError - source = "# encoding: #{name}\n\\x#{codepoint.to_s(16)}" - refute Prism.parse(source).success? - end - end - - def assert_encoding_flags(encoding, escapes) - escapes.each do |escaped| - source = "# encoding: #{encoding.name}\n\"#{escaped}\"" - - expected = - begin - eval(source).encoding - rescue SyntaxError => error - if error.message.include?("UTF-8 mixed within") - error.message[/: (.+?)\n/, 1] - else - raise - end - end - - actual = - Prism.parse(source).then do |result| - if result.success? - string = result.value.statements.body.first - - if string.forced_utf8_encoding? - Encoding::UTF_8 - elsif string.forced_binary_encoding? - Encoding::ASCII_8BIT - else - encoding - end - else - error = result.errors.first - - if error.message.include?("mixed") - error.message - else - raise error.message - end - end - end - - assert_equal expected, actual - end - end - - # Test Symbol literals without any interpolation or escape sequences. - def assert_symbol_encoding_flags(encoding, symbols) - symbols.each do |symbol| - source = "# encoding: #{encoding.name}\n#{symbol.inspect}" - - expected = - begin - eval(source).encoding - rescue SyntaxError => error - unless error.message.include?("invalid multibyte char") - raise - end - end - - actual = - Prism.parse(source).then do |result| - if result.success? - symbol = result.value.statements.body.first - - if symbol.forced_utf8_encoding? - Encoding::UTF_8 - elsif symbol.forced_binary_encoding? - Encoding::ASCII_8BIT - elsif symbol.forced_us_ascii_encoding? - Encoding::US_ASCII - else - encoding - end - else - error = result.errors.last - - unless error.message.include?("invalid symbol") - raise error.message - end - end - end - - assert_equal expected, actual - end - end - - def assert_symbol_character_escape_encoding_flags(encoding, escapes) - escapes.each do |escaped| - source = "# encoding: #{encoding.name}\n:\"#{escaped}\"" - - expected = - begin - eval(source).encoding - rescue SyntaxError => error - if error.message.include?("UTF-8 mixed within") - error.message[/: (.+?)\n/, 1] - else - raise - end - end - - actual = - Prism.parse(source).then do |result| - if result.success? - symbol = result.value.statements.body.first - - if symbol.forced_utf8_encoding? - Encoding::UTF_8 - elsif symbol.forced_binary_encoding? - Encoding::ASCII_8BIT - elsif symbol.forced_us_ascii_encoding? - Encoding::US_ASCII - else - encoding - end - else - error = result.errors.first - - if error.message.include?("mixed") - error.message - else - raise error.message - end - end - end - - assert_equal expected, actual - end - end - - def assert_regular_expression_encoding_flags(encoding, regexps) - regexps.each do |regexp| - regexp_modifier_used = regexp.end_with?("/u") || regexp.end_with?("/e") || regexp.end_with?("/s") || regexp.end_with?("/n") - source = "# encoding: #{encoding.name}\n#{regexp}" - - encoding_errors = ["invalid multibyte char", "escaped non ASCII character in UTF-8 regexp", "differs from source encoding"] - skipped_errors = ["invalid multibyte escape", "incompatible character encoding", "UTF-8 character in non UTF-8 regexp", "invalid Unicode range", "invalid Unicode list"] - - # TODO (nirvdrum 21-Feb-2024): Prism currently does not handle Regexp validation unless modifiers are used. So, skip processing those errors for now: https://github.com/ruby/prism/issues/2104 - unless regexp_modifier_used - skipped_errors += encoding_errors - encoding_errors.clear - end - - expected = - begin - eval(source).encoding - rescue SyntaxError => error - if encoding_errors.find { |e| error.message.include?(e) } - error.message.split("\n").map { |m| m[/: (.+?)$/, 1] } - elsif skipped_errors.find { |e| error.message.include?(e) } - next - else - raise - end - end - - actual = - Prism.parse(source).then do |result| - if result.success? - regexp = result.value.statements.body.first - - actual_encoding = if regexp.forced_utf8_encoding? - Encoding::UTF_8 - elsif regexp.forced_binary_encoding? - Encoding::ASCII_8BIT - elsif regexp.forced_us_ascii_encoding? - Encoding::US_ASCII - elsif regexp.ascii_8bit? - Encoding::ASCII_8BIT - elsif regexp.utf_8? - Encoding::UTF_8 - elsif regexp.euc_jp? - Encoding::EUC_JP - elsif regexp.windows_31j? - Encoding::Windows_31J - else - encoding - end - - if regexp.utf_8? && actual_encoding != Encoding::UTF_8 - raise "expected regexp encoding to be UTF-8 due to '/u' modifier, but got #{actual_encoding.name}" - elsif regexp.ascii_8bit? && (actual_encoding != Encoding::ASCII_8BIT && actual_encoding != Encoding::US_ASCII) - raise "expected regexp encoding to be ASCII-8BIT or US-ASCII due to '/n' modifier, but got #{actual_encoding.name}" - elsif regexp.euc_jp? && actual_encoding != Encoding::EUC_JP - raise "expected regexp encoding to be EUC-JP due to '/e' modifier, but got #{actual_encoding.name}" - elsif regexp.windows_31j? && actual_encoding != Encoding::Windows_31J - raise "expected regexp encoding to be Windows-31J due to '/s' modifier, but got #{actual_encoding.name}" - end - - if regexp.utf_8? && regexp.forced_utf8_encoding? - raise "the forced_utf8 flag should not be set when the UTF-8 modifier (/u) is used" - elsif regexp.ascii_8bit? && regexp.forced_binary_encoding? - raise "the forced_ascii_8bit flag should not be set when the UTF-8 modifier (/u) is used" - end - - actual_encoding - else - errors = result.errors.map(&:message) - - if errors.last&.include?("UTF-8 mixed within") - nil - else - errors - end - end - end - - # TODO (nirvdrum 22-Feb-2024): Remove this workaround once Prism better maps CRuby's error messages. - # This class of error message is tricky. The part not being compared is a representation of the regexp. - # Depending on the source encoding and any encoding modifiers being used, CRuby alters how the regexp is represented. - # Sometimes it's an MBC string. Other times it uses hexadecimal character escapes. And in other cases it uses - # the long-form Unicode escape sequences. This short-circuit checks that the error message is mostly correct. - if expected.is_a?(Array) && actual.is_a?(Array) - if expected.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:") && - actual.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:") - expected.last.clear - actual.last.clear - end - end - - assert_equal expected, actual - end - end - end -end diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index 5f4acb01201..8848ea58899 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -1246,8 +1246,7 @@ def test_content_after_unterminated_heredoc end def test_invalid_message_name - result = Prism.parse("+.@foo,+=foo") - assert_equal :"", result.value.statements.body.first.write_name + assert_equal :"", Prism.parse_statement("+.@foo,+=foo").write_name end def test_invalid_operator_write_fcall diff --git a/test/prism/fixtures_test.rb b/test/prism/fixtures_test.rb new file mode 100644 index 00000000000..7225b4ac66c --- /dev/null +++ b/test/prism/fixtures_test.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +return if RUBY_VERSION < "3.2.0" + +require_relative "test_helper" + +module Prism + class FixturesTest < TestCase + except = [] + + # Ruby < 3.3.0 cannot parse heredocs where there are leading whitespace + # characters in the heredoc start. + # Example: <<~' EOF' or <<-' EOF' + # https://bugs.ruby-lang.org/issues/19539 + except << "heredocs_leading_whitespace.txt" if RUBY_VERSION < "3.3.0" + + Fixture.each(except: except) do |fixture| + define_method(fixture.test_name) { assert_valid_syntax(fixture.read) } + end + end +end diff --git a/test/prism/fuzzer_test.rb b/test/prism/fuzzer_test.rb index 511210e7ee1..4927478bdc2 100644 --- a/test/prism/fuzzer_test.rb +++ b/test/prism/fuzzer_test.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -return if ENV["PRISM_BUILD_MINIMAL"] - require_relative "test_helper" module Prism @@ -9,7 +7,7 @@ module Prism # invalid memory access. class FuzzerTest < TestCase def self.snippet(name, source) - define_method(:"test_fuzzer_#{name}") { Prism.dump(source) } + define_method(:"test_fuzzer_#{name}") { Prism.profile(source) } end snippet "incomplete global variable", "$" @@ -39,29 +37,31 @@ def self.snippet(name, source) snippet "escaped unicode at end of file 8", '"\\u33' snippet "escaped unicode at end of file 9", '"\\u333' snippet "float suffix at end of file", "1e" + snippet "parameter name that is zero length", "a { |b;" snippet "statements node with multiple heredocs", <<~EOF for <= "3.2.0") + +require_relative "test_helper" + +module Prism + class LexTest < TestCase + except = [ + # It seems like there are some oddities with nested heredocs and ripper. + # Waiting for feedback on https://bugs.ruby-lang.org/issues/19838. + "seattlerb/heredoc_nested.txt", + "whitequark/dedenting_heredoc.txt", + # Ripper seems to have a bug that the regex portions before and after + # the heredoc are combined into a single token. See + # https://bugs.ruby-lang.org/issues/19838. + "spanning_heredoc.txt", + "spanning_heredoc_newlines.txt" + ] + + if RUBY_VERSION < "3.3.0" + # This file has changed behavior in Ripper in Ruby 3.3, so we skip it if + # we're on an earlier version. + except << "seattlerb/pct_w_heredoc_interp_nested.txt" + + # Ruby < 3.3.0 cannot parse heredocs where there are leading whitespace + # characters in the heredoc start. + # Example: <<~' EOF' or <<-' EOF' + # https://bugs.ruby-lang.org/issues/19539 + except << "heredocs_leading_whitespace.txt" + end + + Fixture.each(except: except) do |fixture| + define_method(fixture.test_name) { assert_lex(fixture) } + end + + def test_lex_file + assert_nothing_raised do + Prism.lex_file(__FILE__) + end + + error = assert_raise Errno::ENOENT do + Prism.lex_file("idontexist.rb") + end + + assert_equal "No such file or directory - idontexist.rb", error.message + + assert_raise TypeError do + Prism.lex_file(nil) + end + end + + def test_parse_lex + node, tokens = Prism.parse_lex("def foo; end").value + + assert_kind_of ProgramNode, node + assert_equal 5, tokens.length + end + + def test_parse_lex_file + node, tokens = Prism.parse_lex_file(__FILE__).value + + assert_kind_of ProgramNode, node + refute_empty tokens + + error = assert_raise Errno::ENOENT do + Prism.parse_lex_file("idontexist.rb") + end + + assert_equal "No such file or directory - idontexist.rb", error.message + + assert_raise TypeError do + Prism.parse_lex_file(nil) + end + end + + private + + def assert_lex(fixture) + source = fixture.read + + result = Prism.lex_compat(source) + assert_equal [], result.errors + + Prism.lex_ripper(source).zip(result.value).each do |(ripper, prism)| + assert_equal ripper, prism + end + end + end +end diff --git a/test/prism/library_symbols_test.rb b/test/prism/library_symbols_test.rb index b10a367c183..44f225478bd 100644 --- a/test/prism/library_symbols_test.rb +++ b/test/prism/library_symbols_test.rb @@ -3,8 +3,6 @@ require_relative "test_helper" return if RUBY_PLATFORM !~ /linux/ - -# TODO: determine why these symbols are incorrect on ppc64le return if RUBY_PLATFORM =~ /powerpc64le/ module Prism diff --git a/test/prism/locals_test.rb b/test/prism/locals_test.rb index 0e57a9a80c9..27fdfc90ef4 100644 --- a/test/prism/locals_test.rb +++ b/test/prism/locals_test.rb @@ -17,14 +17,14 @@ module Prism class LocalsTest < TestCase - base = File.join(__dir__, "fixtures") - Dir["**/*.txt", base: base].each do |relative| + except = [ # Skip this fixture because it has a different number of locals because # CRuby is eliminating dead code. - next if relative == "whitequark/ruby_bug_10653.txt" + "whitequark/ruby_bug_10653.txt" + ] - filepath = File.join(base, relative) - define_method("test_#{relative}") { assert_locals(filepath) } + Fixture.each(except: except) do |fixture| + define_method(fixture.test_name) { assert_locals(fixture) } end def setup @@ -38,8 +38,8 @@ def teardown private - def assert_locals(filepath) - source = File.read(filepath) + def assert_locals(fixture) + source = fixture.read expected = cruby_locals(source) actual = prism_locals(source) @@ -47,14 +47,6 @@ def assert_locals(filepath) assert_equal(expected, actual) end - def ignore_warnings - previous_verbosity = $VERBOSE - $VERBOSE = nil - yield - ensure - $VERBOSE = previous_verbosity - end - # A wrapper around a RubyVM::InstructionSequence that provides a more # convenient interface for accessing parts of the iseq. class ISeq @@ -104,35 +96,29 @@ def each_child # For the given source, compiles with CRuby and returns a list of all of the # sets of local variables that were encountered. def cruby_locals(source) - verbose, $VERBOSE = $VERBOSE, nil - - begin - locals = [] #: Array[Array[Symbol | Integer]] - stack = [ISeq.new(RubyVM::InstructionSequence.compile(source).to_a)] - - while (iseq = stack.pop) - names = [*iseq.local_table] - names.map!.with_index do |name, index| - # When an anonymous local variable is present in the iseq's local - # table, it is represented as the stack offset from the top. - # However, when these are dumped to binary and read back in, they - # are replaced with the symbol :#arg_rest. To consistently handle - # this, we replace them here with their index. - if name == :"#arg_rest" - names.length - index + 1 - else - name - end + locals = [] #: Array[Array[Symbol | Integer]] + stack = [ISeq.new(ignore_warnings { RubyVM::InstructionSequence.compile(source) }.to_a)] + + while (iseq = stack.pop) + names = [*iseq.local_table] + names.map!.with_index do |name, index| + # When an anonymous local variable is present in the iseq's local + # table, it is represented as the stack offset from the top. + # However, when these are dumped to binary and read back in, they + # are replaced with the symbol :#arg_rest. To consistently handle + # this, we replace them here with their index. + if name == :"#arg_rest" + names.length - index + 1 + else + name end - - locals << names - iseq.each_child { |child| stack << child } end - locals - ensure - $VERBOSE = verbose + locals << names + iseq.each_child { |child| stack << child } end + + locals end # For the given source, parses with prism and returns a list of all of the diff --git a/test/prism/magic_comment_test.rb b/test/prism/magic_comment_test.rb index 9e2e92af927..14653fb0f86 100644 --- a/test/prism/magic_comment_test.rb +++ b/test/prism/magic_comment_test.rb @@ -2,32 +2,109 @@ require_relative "test_helper" -return if RUBY_ENGINE != "ruby" - module Prism class MagicCommentTest < TestCase - examples = [ - "# encoding: ascii", - "# coding: ascii", - "# eNcOdInG: ascii", - "# CoDiNg: ascii", - "# \s\t\v encoding \s\t\v : \s\t\v ascii \s\t\v", - "# -*- encoding: ascii -*-", - "# -*- coding: ascii -*-", - "# -*- eNcOdInG: ascii -*-", - "# -*- CoDiNg: ascii -*-", - "# -*- \s\t\v encoding \s\t\v : \s\t\v ascii \s\t\v -*-", - "# -*- foo: bar; encoding: ascii -*-", - "# coding \t \r \v : \t \v \r ascii-8bit", - "# vim: filetype=ruby, fileencoding=windows-31j, tabsize=3, shiftwidth=3" - ] - - examples.each.with_index(1) do |example, index| - define_method(:"test_magic_comment_#{index}") do - expected = RubyVM::InstructionSequence.compile(%Q{#{example}\n""}).eval.encoding - actual = Prism.parse(example).encoding + if RUBY_ENGINE == "ruby" + class MagicCommentRipper < Ripper + attr_reader :magic_comments + + def initialize(*) + super + @magic_comments = [] + end + + def on_magic_comment(key, value) + @magic_comments << [key, value] + super + end + end + + Fixture.each do |fixture| + define_method(fixture.test_name) { assert_magic_comments(fixture) } + end + end + + def test_encoding + assert_magic_encoding(Encoding::US_ASCII, "# encoding: ascii") + end + + def test_coding + assert_magic_encoding(Encoding::US_ASCII, "# coding: ascii") + end + + def test_eNcOdInG + assert_magic_encoding(Encoding::US_ASCII, "# eNcOdInG: ascii") + end + + def test_CoDiNg + assert_magic_encoding(Encoding::US_ASCII, "# CoDiNg: ascii") + end + + def test_encoding_whitespace + assert_magic_encoding(Encoding::US_ASCII, "# \s\t\v encoding \s\t\v : \s\t\v ascii \s\t\v") + end + + def test_emacs_encoding + assert_magic_encoding(Encoding::US_ASCII, "# -*- encoding: ascii -*-") + end + + def test_emacs_coding + assert_magic_encoding(Encoding::US_ASCII, "# -*- coding: ascii -*-") + end + + def test_emacs_eNcOdInG + assert_magic_encoding(Encoding::US_ASCII, "# -*- eNcOdInG: ascii -*-") + end + + def test_emacs_CoDiNg + assert_magic_encoding(Encoding::US_ASCII, "# -*- CoDiNg: ascii -*-") + end + + def test_emacs_whitespace + assert_magic_encoding(Encoding::US_ASCII, "# -*- \s\t\v encoding \s\t\v : \s\t\v ascii \s\t\v -*-") + end + + def test_emacs_multiple + assert_magic_encoding(Encoding::US_ASCII, "# -*- foo: bar; encoding: ascii -*-") + end + + def test_coding_whitespace + assert_magic_encoding(Encoding::ASCII_8BIT, "# coding \t \r \v : \t \v \r ascii-8bit") + end + + def test_vim + assert_magic_encoding(Encoding::Windows_31J, "# vim: filetype=ruby, fileencoding=windows-31j, tabsize=3, shiftwidth=3") + end + + private + + def assert_magic_encoding(expected, line) + source = %Q{#{line}\n""} + actual = Prism.parse(source).encoding + + # Compare against our expectation. + assert_equal expected, actual + + # Compare against Ruby's expectation. + if defined?(RubyVM::InstructionSequence) + expected = RubyVM::InstructionSequence.compile(source).eval.encoding assert_equal expected, actual end end + + def assert_magic_comments(fixture) + source = fixture.read + + # Check that we get the correct number of magic comments when lexing with + # ripper. + expected = MagicCommentRipper.new(source).tap(&:parse).magic_comments + actual = Prism.parse(source).magic_comments + + assert_equal expected.length, actual.length + expected.zip(actual).each do |(expected_key, expected_value), magic_comment| + assert_equal expected_key, magic_comment.key + assert_equal expected_value, magic_comment.value + end + end end end diff --git a/test/prism/newline_offsets_test.rb b/test/prism/newline_offsets_test.rb new file mode 100644 index 00000000000..99b808b1df6 --- /dev/null +++ b/test/prism/newline_offsets_test.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +module Prism + class NewlineOffsetsTest < TestCase + Fixture.each do |fixture| + define_method(fixture.test_name) { assert_newline_offsets(fixture) } + end + + private + + def assert_newline_offsets(fixture) + source = fixture.read + + expected = [0] + source.b.scan("\n") { expected << $~.offset(0)[0] + 1 } + + assert_equal expected, Prism.parse(source).source.offsets + end + end +end diff --git a/test/prism/newline_test.rb b/test/prism/newline_test.rb index 75593d34bf1..03d7df4c97f 100644 --- a/test/prism/newline_test.rb +++ b/test/prism/newline_test.rb @@ -6,20 +6,23 @@ module Prism class NewlineTest < TestCase - base = File.expand_path("../", __FILE__) - filepaths = Dir["*.rb", base: base] - %w[ - encoding_test.rb + skips = %w[ errors_test.rb locals_test.rb - parser_test.rb regexp_test.rb - static_literals_test.rb + test_helper.rb unescape_test.rb - warnings_test.rb + encoding/regular_expression_encoding_test.rb + encoding/string_encoding_test.rb + result/static_literals_test.rb + result/warnings_test.rb + ruby/parser_test.rb + ruby/ruby_parser_test.rb ] - filepaths.each do |relative| - define_method("test_newline_flags_#{relative}") do + base = __dir__ + (Dir["{,api/,encoding/,result/,ruby/}*.rb", base: base] - skips).each do |relative| + define_method(:"test_#{relative}") do assert_newlines(base, relative) end end @@ -65,14 +68,6 @@ def assert_newlines(base, relative) assert_equal expected, actual end - def ignore_warnings - previous_verbosity = $VERBOSE - $VERBOSE = nil - yield - ensure - $VERBOSE = previous_verbosity - end - def rubyvm_lines(source) queue = [ignore_warnings { RubyVM::InstructionSequence.compile(source) }] lines = [] diff --git a/test/prism/parse_test.rb b/test/prism/parse_test.rb deleted file mode 100644 index 5c66caebb96..00000000000 --- a/test/prism/parse_test.rb +++ /dev/null @@ -1,371 +0,0 @@ -# frozen_string_literal: true - -require_relative "test_helper" - -module Prism - class ParseTest < TestCase - # A subclass of Ripper that extracts out magic comments. - class MagicCommentRipper < Ripper - attr_reader :magic_comments - - def initialize(*) - super - @magic_comments = [] - end - - def on_magic_comment(key, value) - @magic_comments << [key, value] - super - end - end - - # When we pretty-print the trees to compare against the snapshots, we want to - # be certain that we print with the same external encoding. This is because - # methods like Symbol#inspect take into account external encoding and it could - # change how the snapshot is generated. On machines with certain settings - # (like LANG=C or -Eascii-8bit) this could have been changed. So here we're - # going to force it to be UTF-8 to keep the snapshots consistent. - def setup - @previous_default_external = Encoding.default_external - ignore_warnings { Encoding.default_external = Encoding::UTF_8 } - end - - def teardown - ignore_warnings { Encoding.default_external = @previous_default_external } - end - - def test_empty_string - result = Prism.parse("") - assert_equal [], result.value.statements.body - end - - def test_parse_takes_file_path - filepath = "filepath.rb" - result = Prism.parse("def foo; __FILE__; end", filepath: filepath) - - assert_equal filepath, find_source_file_node(result.value).filepath - end - - def test_parse_takes_line - line = 4 - result = Prism.parse("def foo\n __FILE__\nend", line: line) - - assert_equal line, result.value.location.start_line - assert_equal line + 1, find_source_file_node(result.value).location.start_line - - result = Prism.parse_lex("def foo\n __FILE__\nend", line: line) - assert_equal line, result.value.first.location.start_line - end - - def test_parse_takes_negative_lines - line = -2 - result = Prism.parse("def foo\n __FILE__\nend", line: line) - - assert_equal line, result.value.location.start_line - assert_equal line + 1, find_source_file_node(result.value).location.start_line - - result = Prism.parse_lex("def foo\n __FILE__\nend", line: line) - assert_equal line, result.value.first.location.start_line - end - - def test_parse_lex - node, tokens = Prism.parse_lex("def foo; end").value - - assert_kind_of ProgramNode, node - assert_equal 5, tokens.length - end - - if !ENV["PRISM_BUILD_MINIMAL"] - def test_dump_file - assert_nothing_raised do - Prism.dump_file(__FILE__) - end - - error = assert_raise Errno::ENOENT do - Prism.dump_file("idontexist.rb") - end - - assert_equal "No such file or directory - idontexist.rb", error.message - - assert_raise TypeError do - Prism.dump_file(nil) - end - end - end - - def test_lex_file - assert_nothing_raised do - Prism.lex_file(__FILE__) - end - - error = assert_raise Errno::ENOENT do - Prism.lex_file("idontexist.rb") - end - - assert_equal "No such file or directory - idontexist.rb", error.message - - assert_raise TypeError do - Prism.lex_file(nil) - end - end - - def test_parse_lex_file - node, tokens = Prism.parse_lex_file(__FILE__).value - - assert_kind_of ProgramNode, node - refute_empty tokens - - error = assert_raise Errno::ENOENT do - Prism.parse_lex_file("idontexist.rb") - end - - assert_equal "No such file or directory - idontexist.rb", error.message - - assert_raise TypeError do - Prism.parse_lex_file(nil) - end - end - - def test_parse_file - node = Prism.parse_file(__FILE__).value - assert_kind_of ProgramNode, node - - error = assert_raise Errno::ENOENT do - Prism.parse_file("idontexist.rb") - end - - assert_equal "No such file or directory - idontexist.rb", error.message - - assert_raise TypeError do - Prism.parse_file(nil) - end - end - - def test_parse_file_success - assert_predicate Prism.parse_file_comments(__FILE__), :any? - - error = assert_raise Errno::ENOENT do - Prism.parse_file_comments("idontexist.rb") - end - - assert_equal "No such file or directory - idontexist.rb", error.message - - assert_raise TypeError do - Prism.parse_file_comments(nil) - end - end - - def test_parse_file_comments - assert_predicate Prism.parse_file_comments(__FILE__), :any? - - error = assert_raise Errno::ENOENT do - Prism.parse_file_comments("idontexist.rb") - end - - assert_equal "No such file or directory - idontexist.rb", error.message - - assert_raise TypeError do - Prism.parse_file_comments(nil) - end - end - - # To accurately compare against Ripper, we need to make sure that we're - # running on CRuby 3.2+. - ripper_enabled = RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.2.0" - - # The FOCUS environment variable allows you to specify one particular fixture - # to test, instead of all of them. - base = File.join(__dir__, "fixtures") - relatives = ENV["FOCUS"] ? [ENV["FOCUS"]] : Dir["**/*.txt", base: base] - - relatives.each do |relative| - # These fail on TruffleRuby due to a difference in Symbol#inspect: :测试 vs :"测试" - next if RUBY_ENGINE == "truffleruby" and %w[emoji_method_calls.txt seattlerb/bug202.txt seattlerb/magic_encoding_comment.txt].include?(relative) - - filepath = File.join(base, relative) - snapshot = File.expand_path(File.join("snapshots", relative), __dir__) - - directory = File.dirname(snapshot) - FileUtils.mkdir_p(directory) unless File.directory?(directory) - - ripper_should_match = ripper_enabled - check_valid_syntax = RUBY_VERSION >= "3.2.0" - - case relative - when "seattlerb/pct_w_heredoc_interp_nested.txt" - # This file has changed behavior in Ripper in Ruby 3.3, so we skip it if - # we're on an earlier version. - ripper_should_match = false if RUBY_VERSION < "3.3.0" - when "seattlerb/heredoc_nested.txt", "whitequark/dedenting_heredoc.txt" - # It seems like there are some oddities with nested heredocs and ripper. - # Waiting for feedback on https://bugs.ruby-lang.org/issues/19838. - ripper_should_match = false - when "spanning_heredoc.txt", "spanning_heredoc_newlines.txt" - # Ripper seems to have a bug that the regex portions before and after - # the heredoc are combined into a single token. See - # https://bugs.ruby-lang.org/issues/19838. - ripper_should_match = false - when "heredocs_leading_whitespace.txt" - # Ruby < 3.3.0 cannot parse heredocs where there are leading whitespace - # characters in the heredoc start. - # Example: <<~' EOF' or <<-' EOF' - # https://bugs.ruby-lang.org/issues/19539 - if RUBY_VERSION < "3.3.0" - ripper_should_match = false - check_valid_syntax = false - end - end - - define_method "test_filepath_#{relative}" do - # First, read the source from the filepath. Use binmode to avoid - # converting CRLF on Windows, and explicitly set the external encoding - # to UTF-8 to override the binmode default. - source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8) - - # Make sure that the given source is valid syntax, otherwise we have an - # invalid fixture. - assert_valid_syntax(source) if check_valid_syntax - - # Next, assert that there were no errors during parsing. - result = Prism.parse(source, filepath: relative) - assert_empty result.errors - - # Next, pretty print the source. - printed = PP.pp(result.value, +"", 79) - - if File.exist?(snapshot) - saved = File.read(snapshot) - - # If the snapshot file exists, but the printed value does not match the - # snapshot, then update the snapshot file. - if printed != saved - File.write(snapshot, printed) - warn("Updated snapshot at #{snapshot}.") - end - - # If the snapshot file exists, then assert that the printed value - # matches the snapshot. - assert_equal(saved, printed) - else - # If the snapshot file does not yet exist, then write it out now. - File.write(snapshot, printed) - warn("Created snapshot at #{snapshot}.") - end - - if !ENV["PRISM_BUILD_MINIMAL"] - # Next, assert that the value can be serialized and deserialized - # without changing the shape of the tree. - assert_equal_nodes(result.value, Prism.load(source, Prism.dump(source, filepath: relative)).value) - end - - # Next, check that the location ranges of each node in the tree are a - # superset of their respective child nodes. - assert_non_overlapping_locations(result.value) - - # Next, assert that the newlines are in the expected places. - expected_newlines = [0] - source.b.scan("\n") { expected_newlines << $~.offset(0)[0] + 1 } - assert_equal expected_newlines, Prism.parse(source).source.offsets - - if ripper_should_match - # Finally, assert that we can lex the source and get the same tokens as - # Ripper. - lex_result = Prism.lex_compat(source) - assert_equal [], lex_result.errors - tokens = lex_result.value - - begin - Prism.lex_ripper(source).zip(tokens).each do |(ripper, prism)| - assert_equal ripper, prism - end - rescue SyntaxError - raise ArgumentError, "Test file has invalid syntax #{filepath}" - end - - # Next, check that we get the correct number of magic comments when - # lexing with ripper. - expected = MagicCommentRipper.new(source).tap(&:parse).magic_comments - actual = result.magic_comments - - assert_equal expected.length, actual.length - expected.zip(actual).each do |(expected_key, expected_value), magic_comment| - assert_equal expected_key, magic_comment.key - assert_equal expected_value, magic_comment.value - end - end - end - end - - Dir["*.txt", base: base].each do |relative| - next if relative == "newline_terminated.txt" || relative == "spanning_heredoc_newlines.txt" - - # We test every snippet (separated by \n\n) in isolation - # to ensure the parser does not try to read bytes further than the end of each snippet - define_method "test_individual_snippets_#{relative}" do - filepath = File.join(base, relative) - - # First, read the source from the filepath. Use binmode to avoid converting CRLF on Windows, - # and explicitly set the external encoding to UTF-8 to override the binmode default. - file_contents = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8) - - file_contents.split(/(?<=\S)\n\n(?=\S)/).each do |snippet| - snippet = snippet.rstrip - result = Prism.parse(snippet, filepath: relative) - assert_empty result.errors - - if !ENV["PRISM_BUILD_MINIMAL"] - assert_equal_nodes(result.value, Prism.load(snippet, Prism.dump(snippet, filepath: relative)).value) - end - end - end - end - - private - - # Check that the location ranges of each node in the tree are a superset of - # their respective child nodes. - def assert_non_overlapping_locations(node) - queue = [node] - - while (current = queue.shift) - # We only want to compare parent/child location overlap in the case that - # we are not looking at a heredoc. That's because heredoc locations are - # special in that they only use the declaration of the heredoc. - compare = !(current.is_a?(StringNode) || - current.is_a?(XStringNode) || - current.is_a?(InterpolatedStringNode) || - current.is_a?(InterpolatedXStringNode)) || - !current.opening&.start_with?("<<") - - current.child_nodes.each do |child| - # child_nodes can return nil values, so we need to skip those. - next unless child - - # Now that we know we have a child node, add that to the queue. - queue << child - - if compare - assert_operator current.location.start_offset, :<=, child.location.start_offset - assert_operator current.location.end_offset, :>=, child.location.end_offset - end - end - end - end - - def find_source_file_node(program) - queue = [program] - while (node = queue.shift) - return node if node.is_a?(SourceFileNode) - queue.concat(node.compact_child_nodes) - end - end - - def ignore_warnings - previous_verbosity = $VERBOSE - $VERBOSE = nil - yield - ensure - $VERBOSE = previous_verbosity - end - end -end diff --git a/test/prism/parser_test.rb b/test/prism/parser_test.rb deleted file mode 100644 index 79b65cf75b4..00000000000 --- a/test/prism/parser_test.rb +++ /dev/null @@ -1,186 +0,0 @@ -# frozen_string_literal: true - -require_relative "test_helper" - -begin - verbose, $VERBOSE = $VERBOSE, nil - require "parser/ruby33" - require "prism/translation/parser33" -rescue LoadError - # In CRuby's CI, we're not going to test against the parser gem because we - # don't want to have to install it. So in this case we'll just skip this test. - return -ensure - $VERBOSE = verbose -end - -# First, opt in to every AST feature. -Parser::Builders::Default.modernize - -# Modify the source map == check so that it doesn't check against the node -# itself so we don't get into a recursive loop. -Parser::Source::Map.prepend( - Module.new { - def ==(other) - self.class == other.class && - (instance_variables - %i[@node]).map do |ivar| - instance_variable_get(ivar) == other.instance_variable_get(ivar) - end.reduce(:&) - end - } -) - -# Next, ensure that we're comparing the nodes and also comparing the source -# ranges so that we're getting all of the necessary information. -Parser::AST::Node.prepend( - Module.new { - def ==(other) - super && (location == other.location) - end - } -) - -module Prism - class ParserTest < TestCase - base = File.join(__dir__, "fixtures") - - # These files are erroring because of the parser gem being wrong. - skip_incorrect = [ - "embdoc_no_newline_at_end.txt" - ] - - # These files are either failing to parse or failing to translate, so we'll - # skip them for now. - skip_all = skip_incorrect | [ - "dash_heredocs.txt", - "dos_endings.txt", - "heredocs_with_ignored_newlines.txt", - "regex.txt", - "regex_char_width.txt", - "spanning_heredoc.txt", - "spanning_heredoc_newlines.txt", - "unescaping.txt" - ] - - # Not sure why these files are failing on JRuby, but skipping them for now. - if RUBY_ENGINE == "jruby" - skip_all.push("emoji_method_calls.txt", "symbols.txt") - end - - # These files are failing to translate their lexer output into the lexer - # output expected by the parser gem, so we'll skip them for now. - skip_tokens = [ - "comments.txt", - "heredoc_with_comment.txt", - "indented_file_end.txt", - "methods.txt", - "strings.txt", - "tilde_heredocs.txt", - "xstring_with_backslash.txt" - ] - - Dir["*.txt", base: base].each do |name| - next if skip_all.include?(name) - - define_method("test_#{name}") do - assert_equal_parses(File.join(base, name), compare_tokens: !skip_tokens.include?(name)) - end - end - - private - - def assert_equal_parses(filepath, compare_tokens: true) - buffer = Parser::Source::Buffer.new(filepath, 1) - buffer.source = File.read(filepath) - - parser = Parser::Ruby33.new - parser.diagnostics.consumer = ->(*) {} - parser.diagnostics.all_errors_are_fatal = true - - expected_ast, expected_comments, expected_tokens = - begin - parser.tokenize(buffer) - rescue ArgumentError, Parser::SyntaxError - return - end - - actual_ast, actual_comments, actual_tokens = - Prism::Translation::Parser33.new.tokenize(buffer) - - assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) } - assert_equal_tokens(expected_tokens, actual_tokens) if compare_tokens - assert_equal_comments(expected_comments, actual_comments) - end - - def assert_equal_asts_message(expected_ast, actual_ast) - queue = [[expected_ast, actual_ast]] - - while (left, right = queue.shift) - if left.type != right.type - return "expected: #{left.type}\nactual: #{right.type}" - end - - if left.location != right.location - return "expected:\n#{left.inspect}\n#{left.location.inspect}\nactual:\n#{right.inspect}\n#{right.location.inspect}" - end - - if left.type == :str && left.children[0] != right.children[0] - return "expected: #{left.inspect}\nactual: #{right.inspect}" - end - - left.children.zip(right.children).each do |left_child, right_child| - queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node) - end - end - - "expected: #{expected_ast.inspect}\nactual: #{actual_ast.inspect}" - end - - def assert_equal_tokens(expected_tokens, actual_tokens) - if expected_tokens != actual_tokens - expected_index = 0 - actual_index = 0 - - while expected_index < expected_tokens.length - expected_token = expected_tokens[expected_index] - actual_token = actual_tokens[actual_index] - - expected_index += 1 - actual_index += 1 - - # The parser gem always has a space before a string end in list - # literals, but we don't. So we'll skip over the space. - if expected_token[0] == :tSPACE && actual_token[0] == :tSTRING_END - expected_index += 1 - next - end - - # There are a lot of tokens that have very specific meaning according - # to the context of the parser. We don't expose that information in - # prism, so we need to normalize these tokens a bit. - case actual_token[0] - when :kDO - actual_token[0] = expected_token[0] if %i[kDO_BLOCK kDO_LAMBDA].include?(expected_token[0]) - when :tLPAREN - actual_token[0] = expected_token[0] if expected_token[0] == :tLPAREN2 - when :tPOW - actual_token[0] = expected_token[0] if expected_token[0] == :tDSTAR - end - - # Now we can assert that the tokens are actually equal. - assert_equal expected_token, actual_token, -> { - "expected: #{expected_token.inspect}\n" \ - "actual: #{actual_token.inspect}" - } - end - end - end - - def assert_equal_comments(expected_comments, actual_comments) - assert_equal expected_comments, actual_comments, -> { - "expected: #{expected_comments.inspect}\n" \ - "actual: #{actual_comments.inspect}" - } - end - end -end diff --git a/test/prism/regexp_test.rb b/test/prism/regexp_test.rb index 35be217f793..297020fc72e 100644 --- a/test/prism/regexp_test.rb +++ b/test/prism/regexp_test.rb @@ -223,12 +223,12 @@ def test_flag_combined def test_last_encoding_option_wins regex = "/foo/nu" - option = Prism.parse(regex).value.statements.body.first.options + option = Prism.parse_statement(regex).options assert_equal Regexp::FIXEDENCODING, option regex = "/foo/un" - option = Prism.parse(regex).value.statements.body.first.options + option = Prism.parse_statement(regex).options assert_equal Regexp::NOENCODING, option end @@ -246,7 +246,7 @@ def named_captures(source) def options(flags) options = ["/foo/#{flags}", "/foo\#{1}/#{flags}"].map do |source| - Prism.parse(source).value.statements.body.first.options + Prism.parse_statement(source).options end # Check that we get the same set of options from both regular expressions diff --git a/test/prism/attribute_write_test.rb b/test/prism/result/attribute_write_test.rb similarity index 86% rename from test/prism/attribute_write_test.rb rename to test/prism/result/attribute_write_test.rb index bd83d72da35..8f2e3527380 100644 --- a/test/prism/attribute_write_test.rb +++ b/test/prism/result/attribute_write_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class AttributeWriteTest < TestCase @@ -41,18 +41,14 @@ def test_comparison_operators private - def parse(source) - Prism.parse(source).value.statements.body.first - end - def assert_attribute_write(source) - call = parse(source) + call = Prism.parse_statement(source) assert(call.attribute_write?) assert_equal(1, eval(source)) end def refute_attribute_write(source) - call = parse(source) + call = Prism.parse_statement(source) refute(call.attribute_write?) refute_equal(1, eval(source)) end diff --git a/test/prism/comments_test.rb b/test/prism/result/comments_test.rb similarity index 99% rename from test/prism/comments_test.rb rename to test/prism/result/comments_test.rb index 952d03239c3..178623a75ff 100644 --- a/test/prism/comments_test.rb +++ b/test/prism/result/comments_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class CommentsTest < TestCase diff --git a/test/prism/constant_path_node_test.rb b/test/prism/result/constant_path_node_test.rb similarity index 78% rename from test/prism/constant_path_node_test.rb rename to test/prism/result/constant_path_node_test.rb index dffb55c0ffe..75925600ca7 100644 --- a/test/prism/constant_path_node_test.rb +++ b/test/prism/result/constant_path_node_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class ConstantPathNodeTest < TestCase @@ -11,7 +11,7 @@ def test_full_name_for_constant_path Qux RUBY - constant_path = Prism.parse(source).value.statements.body.first + constant_path = Prism.parse_statement(source) assert_equal("Foo::Bar::Baz::Qux", constant_path.full_name) end @@ -22,7 +22,7 @@ def test_full_name_for_constant_path_with_self Qux RUBY - constant_path = Prism.parse(source).value.statements.body.first + constant_path = Prism.parse_statement(source) assert_raise(ConstantPathNode::DynamicPartsInConstantPathError) do constant_path.full_name end @@ -35,7 +35,7 @@ def test_full_name_for_constant_path_with_variable Qux RUBY - constant_path = Prism.parse(source).value.statements.body.first + constant_path = Prism.parse_statement(source) assert_raise(ConstantPathNode::DynamicPartsInConstantPathError) do constant_path.full_name @@ -49,7 +49,7 @@ def test_full_name_for_constant_path_target Qux, Something = [1, 2] RUBY - node = Prism.parse(source).value.statements.body.first + node = Prism.parse_statement(source) assert_equal("Foo::Bar::Baz::Qux", node.lefts.first.full_name) end @@ -60,7 +60,7 @@ def test_full_name_for_constant_path_with_stovetop_start Qux, Something = [1, 2] RUBY - node = Prism.parse(source).value.statements.body.first + node = Prism.parse_statement(source) assert_equal("::Foo::Bar::Baz::Qux", node.lefts.first.full_name) end @@ -69,7 +69,7 @@ def test_full_name_for_constant_path_target_with_non_constant_parent self::Foo, Bar = [1, 2] RUBY - constant_target = Prism.parse(source).value.statements.body.first + constant_target = Prism.parse_statement(source) dynamic, static = constant_target.lefts assert_raise(ConstantPathNode::DynamicPartsInConstantPathError) do @@ -84,7 +84,7 @@ def test_full_name_for_constant_read_node Bar RUBY - constant = Prism.parse(source).value.statements.body.first + constant = Prism.parse_statement(source) assert_equal("Bar", constant.full_name) end end diff --git a/test/prism/result/equality_test.rb b/test/prism/result/equality_test.rb new file mode 100644 index 00000000000..4f6e665a88c --- /dev/null +++ b/test/prism/result/equality_test.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class EqualityTest < TestCase + def test_equality + assert_operator Prism.parse_statement("1"), :===, Prism.parse_statement("1") + assert_operator Prism.parse("1").value, :===, Prism.parse("1").value + + complex_source = "class Something; @var = something.else { _1 }; end" + assert_operator Prism.parse_statement(complex_source), :===, Prism.parse_statement(complex_source) + + refute_operator Prism.parse_statement("1"), :===, Prism.parse_statement("2") + refute_operator Prism.parse_statement("1"), :===, Prism.parse_statement("0x1") + + complex_source_1 = "class Something; @var = something.else { _1 }; end" + complex_source_2 = "class Something; @var = something.else { _2 }; end" + refute_operator Prism.parse_statement(complex_source_1), :===, Prism.parse_statement(complex_source_2) + end + end +end diff --git a/test/prism/result/heredoc_test.rb b/test/prism/result/heredoc_test.rb new file mode 100644 index 00000000000..7913c04a88e --- /dev/null +++ b/test/prism/result/heredoc_test.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class HeredocTest < TestCase + def test_heredoc? + refute Prism.parse_statement("\"foo\"").heredoc? + refute Prism.parse_statement("\"foo \#{1}\"").heredoc? + refute Prism.parse_statement("`foo`").heredoc? + refute Prism.parse_statement("`foo \#{1}`").heredoc? + + assert Prism.parse_statement("<<~HERE\nfoo\nHERE\n").heredoc? + assert Prism.parse_statement("<<~HERE\nfoo \#{1}\nHERE\n").heredoc? + assert Prism.parse_statement("<<~`HERE`\nfoo\nHERE\n").heredoc? + assert Prism.parse_statement("<<~`HERE`\nfoo \#{1}\nHERE\n").heredoc? + end + end +end diff --git a/test/prism/index_write_test.rb b/test/prism/result/index_write_test.rb similarity index 98% rename from test/prism/index_write_test.rb rename to test/prism/result/index_write_test.rb index cf90eb082fe..0d5383b6012 100644 --- a/test/prism/index_write_test.rb +++ b/test/prism/result/index_write_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class IndexWriteTest < TestCase diff --git a/test/prism/result/integer_base_flags_test.rb b/test/prism/result/integer_base_flags_test.rb new file mode 100644 index 00000000000..ef15fb437c0 --- /dev/null +++ b/test/prism/result/integer_base_flags_test.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class IntegerBaseFlagsTest < TestCase + # Through some bit hackery, we want to allow consumers to use the integer + # base flags as the base itself. It has a nice property that the current + # alignment provides them in the correct order. So here we test that our + # assumption holds so that it doesn't change out from under us. + # + # In C, this would look something like: + # + # ((flags & ~DECIMAL) << 1) || 10 + # + # We have to do some other work in Ruby because 0 is truthy and ~ on an + # integer doesn't have a fixed width. + def test_flags + assert_equal 2, base("0b1") + assert_equal 8, base("0o1") + assert_equal 10, base("0d1") + assert_equal 16, base("0x1") + end + + private + + def base(source) + node = Prism.parse_statement(source) + value = (node.send(:flags) & (0b1111 - IntegerBaseFlags::DECIMAL)) << 1 + value == 0 ? 10 : value + end + end +end diff --git a/test/prism/integer_parse_test.rb b/test/prism/result/integer_parse_test.rb similarity index 90% rename from test/prism/integer_parse_test.rb rename to test/prism/result/integer_parse_test.rb index 11aee174c54..7b5ce98bb6a 100644 --- a/test/prism/integer_parse_test.rb +++ b/test/prism/result/integer_parse_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class IntegerParseTest < TestCase @@ -35,7 +35,7 @@ def test_integer_parse private def assert_integer_parse(expected, source = expected.to_s) - assert_equal expected, Prism.parse(source).value.statements.body.first.value + assert_equal expected, Prism.parse_statement(source).value end end end diff --git a/test/prism/result/numeric_value_test.rb b/test/prism/result/numeric_value_test.rb new file mode 100644 index 00000000000..5c89230a1fe --- /dev/null +++ b/test/prism/result/numeric_value_test.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class NumericValueTest < TestCase + def test_numeric_value + assert_equal 123, Prism.parse_statement("123").value + assert_equal 3.14, Prism.parse_statement("3.14").value + assert_equal 42i, Prism.parse_statement("42i").value + assert_equal 42.1ri, Prism.parse_statement("42.1ri").value + assert_equal 3.14i, Prism.parse_statement("3.14i").value + assert_equal 42r, Prism.parse_statement("42r").value + assert_equal 0.5r, Prism.parse_statement("0.5r").value + assert_equal 42ri, Prism.parse_statement("42ri").value + assert_equal 0.5ri, Prism.parse_statement("0.5ri").value + assert_equal 0xFFr, Prism.parse_statement("0xFFr").value + assert_equal 0xFFri, Prism.parse_statement("0xFFri").value + end + end +end diff --git a/test/prism/result/overlap_test.rb b/test/prism/result/overlap_test.rb new file mode 100644 index 00000000000..155bc870d36 --- /dev/null +++ b/test/prism/result/overlap_test.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class OverlapTest < TestCase + Fixture.each do |fixture| + define_method(fixture.test_name) { assert_overlap(fixture) } + end + + private + + # Check that the location ranges of each node in the tree are a superset of + # their respective child nodes. + def assert_overlap(fixture) + queue = [Prism.parse_file(fixture.full_path).value] + + while (current = queue.shift) + # We only want to compare parent/child location overlap in the case that + # we are not looking at a heredoc. That's because heredoc locations are + # special in that they only use the declaration of the heredoc. + compare = !(current.is_a?(StringNode) || + current.is_a?(XStringNode) || + current.is_a?(InterpolatedStringNode) || + current.is_a?(InterpolatedXStringNode)) || + !current.opening&.start_with?("<<") + + current.child_nodes.each do |child| + # child_nodes can return nil values, so we need to skip those. + next unless child + + # Now that we know we have a child node, add that to the queue. + queue << child + + if compare + assert_operator current.location.start_offset, :<=, child.location.start_offset + assert_operator current.location.end_offset, :>=, child.location.end_offset + end + end + end + end + end +end diff --git a/test/prism/redundant_return_test.rb b/test/prism/result/redundant_return_test.rb similarity index 98% rename from test/prism/redundant_return_test.rb rename to test/prism/result/redundant_return_test.rb index c6681692452..3b20aeba00f 100644 --- a/test/prism/redundant_return_test.rb +++ b/test/prism/result/redundant_return_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class RedundantReturnTest < TestCase diff --git a/test/prism/result/regular_expression_options_test.rb b/test/prism/result/regular_expression_options_test.rb new file mode 100644 index 00000000000..ff6e20526fd --- /dev/null +++ b/test/prism/result/regular_expression_options_test.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class RegularExpressionOptionsTest < TestCase + def test_options + assert_equal "", Prism.parse_statement("__FILE__").filepath + assert_equal "foo.rb", Prism.parse_statement("__FILE__", filepath: "foo.rb").filepath + + assert_equal 1, Prism.parse_statement("foo").location.start_line + assert_equal 10, Prism.parse_statement("foo", line: 10).location.start_line + + refute Prism.parse_statement("\"foo\"").frozen? + assert Prism.parse_statement("\"foo\"", frozen_string_literal: true).frozen? + refute Prism.parse_statement("\"foo\"", frozen_string_literal: false).frozen? + + assert_kind_of CallNode, Prism.parse_statement("foo") + assert_kind_of LocalVariableReadNode, Prism.parse_statement("foo", scopes: [[:foo]]) + assert_equal 1, Prism.parse_statement("foo", scopes: [[:foo], []]).depth + + assert_equal [:foo], Prism.parse("foo", scopes: [[:foo]]).value.locals + end + end +end diff --git a/test/prism/location_test.rb b/test/prism/result/source_location_test.rb similarity index 99% rename from test/prism/location_test.rb rename to test/prism/result/source_location_test.rb index 256e5b41e44..ca74b36e6f8 100644 --- a/test/prism/location_test.rb +++ b/test/prism/result/source_location_test.rb @@ -1,9 +1,9 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism - class LocationTest < TestCase + class SourceLocationTest < TestCase def test_AliasGlobalVariableNode assert_location(AliasGlobalVariableNode, "alias $foo $bar") end @@ -921,7 +921,7 @@ def test_YieldNode def test_all_tested expected = Prism.constants.grep(/.Node$/).sort - %i[MissingNode ProgramNode] - actual = LocationTest.instance_methods(false).grep(/.Node$/).map { |name| name[5..].to_sym }.sort + actual = SourceLocationTest.instance_methods(false).grep(/.Node$/).map { |name| name[5..].to_sym }.sort assert_equal expected, actual end diff --git a/test/prism/static_inspect_test.rb b/test/prism/result/static_inspect_test.rb similarity index 98% rename from test/prism/static_inspect_test.rb rename to test/prism/result/static_inspect_test.rb index cc8ed28c950..cf8cef3298a 100644 --- a/test/prism/static_inspect_test.rb +++ b/test/prism/result/static_inspect_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class StaticInspectTest < TestCase diff --git a/test/prism/static_literals_test.rb b/test/prism/result/static_literals_test.rb similarity index 98% rename from test/prism/static_literals_test.rb rename to test/prism/result/static_literals_test.rb index 31c802bf90d..dcfc692897c 100644 --- a/test/prism/static_literals_test.rb +++ b/test/prism/result/static_literals_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class StaticLiteralsTest < TestCase diff --git a/test/prism/warnings_test.rb b/test/prism/result/warnings_test.rb similarity index 99% rename from test/prism/warnings_test.rb rename to test/prism/result/warnings_test.rb index 7ad704918a0..ea062d42215 100644 --- a/test/prism/warnings_test.rb +++ b/test/prism/result/warnings_test.rb @@ -2,8 +2,7 @@ return if RUBY_VERSION < "3.1" -require_relative "test_helper" -require "stringio" +require_relative "../test_helper" module Prism class WarningsTest < TestCase diff --git a/test/prism/compiler_test.rb b/test/prism/ruby/compiler_test.rb similarity index 94% rename from test/prism/compiler_test.rb rename to test/prism/ruby/compiler_test.rb index 9a326eb8d61..35ccfd59507 100644 --- a/test/prism/compiler_test.rb +++ b/test/prism/ruby/compiler_test.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true # typed: ignore -require_relative "test_helper" +require_relative "../test_helper" module Prism class CompilerTest < TestCase diff --git a/test/prism/desugar_compiler_test.rb b/test/prism/ruby/desugar_compiler_test.rb similarity index 99% rename from test/prism/desugar_compiler_test.rb rename to test/prism/ruby/desugar_compiler_test.rb index 1a1d580d2dc..fe9a25e030c 100644 --- a/test/prism/desugar_compiler_test.rb +++ b/test/prism/ruby/desugar_compiler_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class DesugarCompilerTest < TestCase diff --git a/test/prism/dispatcher_test.rb b/test/prism/ruby/dispatcher_test.rb similarity index 97% rename from test/prism/dispatcher_test.rb rename to test/prism/ruby/dispatcher_test.rb index 0d8a6d35e90..1b6d7f4117e 100644 --- a/test/prism/dispatcher_test.rb +++ b/test/prism/ruby/dispatcher_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class DispatcherTest < TestCase diff --git a/test/prism/ruby/location_test.rb b/test/prism/ruby/location_test.rb new file mode 100644 index 00000000000..fc80a5b875d --- /dev/null +++ b/test/prism/ruby/location_test.rb @@ -0,0 +1,173 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class LocationTest < TestCase + def test_join + call = Prism.parse_statement("1234 + 567") + receiver = call.receiver + argument = call.arguments.arguments.first + + joined = receiver.location.join(argument.location) + assert_equal 0, joined.start_offset + assert_equal 10, joined.length + + assert_raise(RuntimeError, "Incompatible locations") do + argument.location.join(receiver.location) + end + + other_argument = Prism.parse_statement("1234 + 567").arguments.arguments.first + + assert_raise(RuntimeError, "Incompatible sources") do + other_argument.location.join(receiver.location) + end + + assert_raise(RuntimeError, "Incompatible sources") do + receiver.location.join(other_argument.location) + end + end + + def test_character_offsets + program = Prism.parse("😀 + 😀\n😍 ||= 😍").value + + # first 😀 + location = program.statements.body.first.receiver.location + assert_equal 0, location.start_character_offset + assert_equal 1, location.end_character_offset + assert_equal 0, location.start_character_column + assert_equal 1, location.end_character_column + + # second 😀 + location = program.statements.body.first.arguments.arguments.first.location + assert_equal 4, location.start_character_offset + assert_equal 5, location.end_character_offset + assert_equal 4, location.start_character_column + assert_equal 5, location.end_character_column + + # first 😍 + location = program.statements.body.last.name_loc + assert_equal 6, location.start_character_offset + assert_equal 7, location.end_character_offset + assert_equal 0, location.start_character_column + assert_equal 1, location.end_character_column + + # second 😍 + location = program.statements.body.last.value.location + assert_equal 12, location.start_character_offset + assert_equal 13, location.end_character_offset + assert_equal 6, location.start_character_column + assert_equal 7, location.end_character_column + end + + def test_code_units + program = Prism.parse("😀 + 😀\n😍 ||= 😍").value + + # first 😀 + location = program.statements.body.first.receiver.location + + assert_equal 0, location.start_code_units_offset(Encoding::UTF_8) + assert_equal 0, location.start_code_units_offset(Encoding::UTF_16LE) + assert_equal 0, location.start_code_units_offset(Encoding::UTF_32LE) + + assert_equal 1, location.end_code_units_offset(Encoding::UTF_8) + assert_equal 2, location.end_code_units_offset(Encoding::UTF_16LE) + assert_equal 1, location.end_code_units_offset(Encoding::UTF_32LE) + + assert_equal 0, location.start_code_units_column(Encoding::UTF_8) + assert_equal 0, location.start_code_units_column(Encoding::UTF_16LE) + assert_equal 0, location.start_code_units_column(Encoding::UTF_32LE) + + assert_equal 1, location.end_code_units_column(Encoding::UTF_8) + assert_equal 2, location.end_code_units_column(Encoding::UTF_16LE) + assert_equal 1, location.end_code_units_column(Encoding::UTF_32LE) + + # second 😀 + location = program.statements.body.first.arguments.arguments.first.location + + assert_equal 4, location.start_code_units_offset(Encoding::UTF_8) + assert_equal 5, location.start_code_units_offset(Encoding::UTF_16LE) + assert_equal 4, location.start_code_units_offset(Encoding::UTF_32LE) + + assert_equal 5, location.end_code_units_offset(Encoding::UTF_8) + assert_equal 7, location.end_code_units_offset(Encoding::UTF_16LE) + assert_equal 5, location.end_code_units_offset(Encoding::UTF_32LE) + + assert_equal 4, location.start_code_units_column(Encoding::UTF_8) + assert_equal 5, location.start_code_units_column(Encoding::UTF_16LE) + assert_equal 4, location.start_code_units_column(Encoding::UTF_32LE) + + assert_equal 5, location.end_code_units_column(Encoding::UTF_8) + assert_equal 7, location.end_code_units_column(Encoding::UTF_16LE) + assert_equal 5, location.end_code_units_column(Encoding::UTF_32LE) + + # first 😍 + location = program.statements.body.last.name_loc + + assert_equal 6, location.start_code_units_offset(Encoding::UTF_8) + assert_equal 8, location.start_code_units_offset(Encoding::UTF_16LE) + assert_equal 6, location.start_code_units_offset(Encoding::UTF_32LE) + + assert_equal 7, location.end_code_units_offset(Encoding::UTF_8) + assert_equal 10, location.end_code_units_offset(Encoding::UTF_16LE) + assert_equal 7, location.end_code_units_offset(Encoding::UTF_32LE) + + assert_equal 0, location.start_code_units_column(Encoding::UTF_8) + assert_equal 0, location.start_code_units_column(Encoding::UTF_16LE) + assert_equal 0, location.start_code_units_column(Encoding::UTF_32LE) + + assert_equal 1, location.end_code_units_column(Encoding::UTF_8) + assert_equal 2, location.end_code_units_column(Encoding::UTF_16LE) + assert_equal 1, location.end_code_units_column(Encoding::UTF_32LE) + + # second 😍 + location = program.statements.body.last.value.location + + assert_equal 12, location.start_code_units_offset(Encoding::UTF_8) + assert_equal 15, location.start_code_units_offset(Encoding::UTF_16LE) + assert_equal 12, location.start_code_units_offset(Encoding::UTF_32LE) + + assert_equal 13, location.end_code_units_offset(Encoding::UTF_8) + assert_equal 17, location.end_code_units_offset(Encoding::UTF_16LE) + assert_equal 13, location.end_code_units_offset(Encoding::UTF_32LE) + + assert_equal 6, location.start_code_units_column(Encoding::UTF_8) + assert_equal 7, location.start_code_units_column(Encoding::UTF_16LE) + assert_equal 6, location.start_code_units_column(Encoding::UTF_32LE) + + assert_equal 7, location.end_code_units_column(Encoding::UTF_8) + assert_equal 9, location.end_code_units_column(Encoding::UTF_16LE) + assert_equal 7, location.end_code_units_column(Encoding::UTF_32LE) + end + + def test_chop + location = Prism.parse("foo").value.location + + assert_equal "fo", location.chop.slice + assert_equal "", location.chop.chop.chop.slice + + # Check that we don't go negative. + 10.times { location = location.chop } + assert_equal "", location.slice + end + + def test_slice_lines + method = Prism.parse_statement("\nprivate def foo\nend\n").arguments.arguments.first + + assert_equal "private def foo\nend\n", method.slice_lines + end + + def test_adjoin + program = Prism.parse("foo.bar = 1").value + + location = program.statements.body.first.message_loc + adjoined = location.adjoin("=") + + assert_kind_of Location, adjoined + refute_equal location, adjoined + + assert_equal 4, adjoined.start_offset + assert_equal 9, adjoined.end_offset + end + end +end diff --git a/test/prism/parameters_signature_test.rb b/test/prism/ruby/parameters_signature_test.rb similarity index 85% rename from test/prism/parameters_signature_test.rb rename to test/prism/ruby/parameters_signature_test.rb index 0eed8d993d5..9256bcc0703 100644 --- a/test/prism/parameters_signature_test.rb +++ b/test/prism/ruby/parameters_signature_test.rb @@ -1,9 +1,9 @@ # frozen_string_literal: true -require_relative "test_helper" - return if RUBY_VERSION < "3.2" +require_relative "../test_helper" + module Prism class ParametersSignatureTest < TestCase def test_req @@ -56,7 +56,6 @@ def test_keyrest_anonymous def test_key_ordering omit("TruffleRuby returns keys in order they were declared") if RUBY_ENGINE == "truffleruby" - assert_parameters([[:keyreq, :a], [:keyreq, :b], [:key, :c], [:key, :d]], "a:, c: 1, b:, d: 2") end @@ -75,14 +74,13 @@ def test_forwarding private def assert_parameters(expected, source) - eval("def self.m(#{source}); end") - - begin - assert_equal(expected, method(:m).parameters) - assert_equal(expected, signature(source)) - ensure - singleton_class.undef_method(:m) - end + # Compare against our expectation. + assert_equal(expected, signature(source)) + + # Compare against Ruby's expectation. + object = Object.new + eval("def object.m(#{source}); end") + assert_equal(expected, object.method(:m).parameters) end def signature(source) diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb new file mode 100644 index 00000000000..a76f193f52d --- /dev/null +++ b/test/prism/ruby/parser_test.rb @@ -0,0 +1,288 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +begin + verbose, $VERBOSE = $VERBOSE, nil + require "parser/ruby33" + require "prism/translation/parser33" +rescue LoadError + # In CRuby's CI, we're not going to test against the parser gem because we + # don't want to have to install it. So in this case we'll just skip this test. + return +ensure + $VERBOSE = verbose +end + +# First, opt in to every AST feature. +Parser::Builders::Default.modernize + +# Modify the source map == check so that it doesn't check against the node +# itself so we don't get into a recursive loop. +Parser::Source::Map.prepend( + Module.new { + def ==(other) + self.class == other.class && + (instance_variables - %i[@node]).map do |ivar| + instance_variable_get(ivar) == other.instance_variable_get(ivar) + end.reduce(:&) + end + } +) + +# Next, ensure that we're comparing the nodes and also comparing the source +# ranges so that we're getting all of the necessary information. +Parser::AST::Node.prepend( + Module.new { + def ==(other) + super && (location == other.location) + end + } +) + +module Prism + class ParserTest < TestCase + # These files are erroring because of the parser gem being wrong. + skip_incorrect = [ + "embdoc_no_newline_at_end.txt" + ] + + # These files are either failing to parse or failing to translate, so we'll + # skip them for now. + skip_all = skip_incorrect | [ + "dash_heredocs.txt", + "dos_endings.txt", + "heredocs_with_ignored_newlines.txt", + "regex.txt", + "regex_char_width.txt", + "spanning_heredoc.txt", + "spanning_heredoc_newlines.txt", + "unescaping.txt", + "seattlerb/backticks_interpolation_line.txt", + "seattlerb/block_decomp_anon_splat_arg.txt", + "seattlerb/block_decomp_arg_splat_arg.txt", + "seattlerb/block_decomp_arg_splat.txt", + "seattlerb/block_decomp_splat.txt", + "seattlerb/block_paren_splat.txt", + "seattlerb/bug190.txt", + "seattlerb/case_in_hash_pat_rest_solo.txt", + "seattlerb/case_in_hash_pat_rest.txt", + "seattlerb/case_in.txt", + "seattlerb/heredoc_nested.txt", + "seattlerb/heredoc_squiggly_blank_line_plus_interpolation.txt", + "seattlerb/heredoc_with_carriage_return_escapes_windows.txt", + "seattlerb/heredoc_with_carriage_return_escapes.txt", + "seattlerb/heredoc_with_extra_carriage_returns_windows.txt", + "seattlerb/heredoc_with_only_carriage_returns_windows.txt", + "seattlerb/heredoc_with_only_carriage_returns.txt", + "seattlerb/masgn_double_paren.txt", + "seattlerb/parse_line_heredoc_hardnewline.txt", + "seattlerb/parse_pattern_044.txt", + "seattlerb/parse_pattern_058_2.txt", + "seattlerb/parse_pattern_058.txt", + "seattlerb/pct_nl.txt", + "seattlerb/pctW_lineno.txt", + "seattlerb/regexp_esc_C_slash.txt", + "seattlerb/TestRubyParserShared.txt", + "unparser/corpus/literal/assignment.txt", + "unparser/corpus/literal/block.txt", + "unparser/corpus/literal/def.txt", + "unparser/corpus/literal/dstr.txt", + "unparser/corpus/literal/literal.txt", + "unparser/corpus/literal/pattern.txt", + "unparser/corpus/semantic/dstr.txt", + "unparser/corpus/semantic/opasgn.txt", + "whitequark/dedenting_interpolating_heredoc_fake_line_continuation.txt", + "whitequark/masgn_nested.txt", + "whitequark/newline_in_hash_argument.txt", + "whitequark/parser_bug_640.txt", + "whitequark/parser_slash_slash_n_escaping_in_literals.txt", + "whitequark/ruby_bug_11989.txt", + "whitequark/slash_newline_in_heredocs.txt", + "whitequark/unary_num_pow_precedence.txt" + ] + + # Not sure why these files are failing on JRuby, but skipping them for now. + if RUBY_ENGINE == "jruby" + skip_all.push("emoji_method_calls.txt", "symbols.txt") + end + + # These files are failing to translate their lexer output into the lexer + # output expected by the parser gem, so we'll skip them for now. + skip_tokens = [ + "comments.txt", + "heredoc_with_comment.txt", + "indented_file_end.txt", + "methods.txt", + "strings.txt", + "tilde_heredocs.txt", + "xstring_with_backslash.txt", + "seattlerb/bug169.txt", + "seattlerb/class_comments.txt", + "seattlerb/difficult4__leading_dots2.txt", + "seattlerb/difficult6__7.txt", + "seattlerb/difficult6__8.txt", + "seattlerb/dsym_esc_to_sym.txt", + "seattlerb/heredoc__backslash_dos_format.txt", + "seattlerb/heredoc_backslash_nl.txt", + "seattlerb/heredoc_comma_arg.txt", + "seattlerb/heredoc_squiggly_blank_lines.txt", + "seattlerb/heredoc_squiggly_interp.txt", + "seattlerb/heredoc_squiggly_tabs_extra.txt", + "seattlerb/heredoc_squiggly_tabs.txt", + "seattlerb/heredoc_squiggly_visually_blank_lines.txt", + "seattlerb/heredoc_squiggly.txt", + "seattlerb/heredoc_unicode.txt", + "seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt", + "seattlerb/heredoc_with_interpolation_and_carriage_return_escapes.txt", + "seattlerb/interpolated_symbol_array_line_breaks.txt", + "seattlerb/interpolated_word_array_line_breaks.txt", + "seattlerb/label_vs_string.txt", + "seattlerb/module_comments.txt", + "seattlerb/non_interpolated_symbol_array_line_breaks.txt", + "seattlerb/non_interpolated_word_array_line_breaks.txt", + "seattlerb/parse_line_block_inline_comment_leading_newlines.txt", + "seattlerb/parse_line_block_inline_comment.txt", + "seattlerb/parse_line_block_inline_multiline_comment.txt", + "seattlerb/parse_line_dstr_escaped_newline.txt", + "seattlerb/parse_line_heredoc.txt", + "seattlerb/parse_line_multiline_str_literal_n.txt", + "seattlerb/parse_line_str_with_newline_escape.txt", + "seattlerb/pct_Q_backslash_nl.txt", + "seattlerb/pct_w_heredoc_interp_nested.txt", + "seattlerb/qsymbols_empty_space.txt", + "seattlerb/qw_escape_term.txt", + "seattlerb/qWords_space.txt", + "seattlerb/read_escape_unicode_curlies.txt", + "seattlerb/read_escape_unicode_h4.txt", + "seattlerb/required_kwarg_no_value.txt", + "seattlerb/slashy_newlines_within_string.txt", + "seattlerb/str_double_escaped_newline.txt", + "seattlerb/str_double_newline.txt", + "seattlerb/str_evstr_escape.txt", + "seattlerb/str_newline_hash_line_number.txt", + "seattlerb/str_single_newline.txt", + "seattlerb/symbol_empty.txt", + "seattlerb/symbols_empty_space.txt", + "whitequark/args.txt", + "whitequark/beginless_erange_after_newline.txt", + "whitequark/beginless_irange_after_newline.txt", + "whitequark/bug_ascii_8bit_in_literal.txt", + "whitequark/bug_def_no_paren_eql_begin.txt", + "whitequark/dedenting_heredoc.txt", + "whitequark/dedenting_non_interpolating_heredoc_line_continuation.txt", + "whitequark/forward_arg_with_open_args.txt", + "whitequark/interp_digit_var.txt", + "whitequark/lbrace_arg_after_command_args.txt", + "whitequark/multiple_pattern_matches.txt", + "whitequark/parser_drops_truncated_parts_of_squiggly_heredoc.txt", + "whitequark/ruby_bug_11990.txt", + "whitequark/ruby_bug_14690.txt", + "whitequark/ruby_bug_9669.txt", + "whitequark/space_args_arg_block.txt", + "whitequark/space_args_block.txt" + ] + + Fixture.each(except: skip_all) do |fixture| + define_method(fixture.test_name) do + assert_equal_parses(fixture, compare_tokens: !skip_tokens.include?(fixture.path)) + end + end + + private + + def assert_equal_parses(fixture, compare_tokens: true) + buffer = Parser::Source::Buffer.new(fixture.path, 1) + buffer.source = fixture.read + + parser = Parser::Ruby33.new + parser.diagnostics.consumer = ->(*) {} + parser.diagnostics.all_errors_are_fatal = true + + expected_ast, expected_comments, expected_tokens = + begin + ignore_warnings { parser.tokenize(buffer) } + rescue ArgumentError, Parser::SyntaxError + return + end + + actual_ast, actual_comments, actual_tokens = + ignore_warnings { Prism::Translation::Parser33.new.tokenize(buffer) } + + assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) } + assert_equal_tokens(expected_tokens, actual_tokens) if compare_tokens + assert_equal_comments(expected_comments, actual_comments) + end + + def assert_equal_asts_message(expected_ast, actual_ast) + queue = [[expected_ast, actual_ast]] + + while (left, right = queue.shift) + if left.type != right.type + return "expected: #{left.type}\nactual: #{right.type}" + end + + if left.location != right.location + return "expected:\n#{left.inspect}\n#{left.location.inspect}\nactual:\n#{right.inspect}\n#{right.location.inspect}" + end + + if left.type == :str && left.children[0] != right.children[0] + return "expected: #{left.inspect}\nactual: #{right.inspect}" + end + + left.children.zip(right.children).each do |left_child, right_child| + queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node) + end + end + + "expected: #{expected_ast.inspect}\nactual: #{actual_ast.inspect}" + end + + def assert_equal_tokens(expected_tokens, actual_tokens) + if expected_tokens != actual_tokens + expected_index = 0 + actual_index = 0 + + while expected_index < expected_tokens.length + expected_token = expected_tokens[expected_index] + actual_token = actual_tokens[actual_index] + + expected_index += 1 + actual_index += 1 + + # The parser gem always has a space before a string end in list + # literals, but we don't. So we'll skip over the space. + if expected_token[0] == :tSPACE && actual_token[0] == :tSTRING_END + expected_index += 1 + next + end + + # There are a lot of tokens that have very specific meaning according + # to the context of the parser. We don't expose that information in + # prism, so we need to normalize these tokens a bit. + case actual_token[0] + when :kDO + actual_token[0] = expected_token[0] if %i[kDO_BLOCK kDO_LAMBDA].include?(expected_token[0]) + when :tLPAREN + actual_token[0] = expected_token[0] if expected_token[0] == :tLPAREN2 + when :tPOW + actual_token[0] = expected_token[0] if expected_token[0] == :tDSTAR + end + + # Now we can assert that the tokens are actually equal. + assert_equal expected_token, actual_token, -> { + "expected: #{expected_token.inspect}\n" \ + "actual: #{actual_token.inspect}" + } + end + end + end + + def assert_equal_comments(expected_comments, actual_comments) + assert_equal expected_comments, actual_comments, -> { + "expected: #{expected_comments.inspect}\n" \ + "actual: #{actual_comments.inspect}" + } + end + end +end diff --git a/test/prism/pattern_test.rb b/test/prism/ruby/pattern_test.rb similarity index 98% rename from test/prism/pattern_test.rb rename to test/prism/ruby/pattern_test.rb index e0aa079cb9c..23f512fc1c0 100644 --- a/test/prism/pattern_test.rb +++ b/test/prism/ruby/pattern_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class PatternTest < TestCase diff --git a/test/prism/reflection_test.rb b/test/prism/ruby/reflection_test.rb similarity index 93% rename from test/prism/reflection_test.rb rename to test/prism/ruby/reflection_test.rb index 869b68b1f8f..3ac462e1ac8 100644 --- a/test/prism/reflection_test.rb +++ b/test/prism/ruby/reflection_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "test_helper" +require_relative "../test_helper" module Prism class ReflectionTest < TestCase diff --git a/test/prism/ripper_test.rb b/test/prism/ruby/ripper_test.rb similarity index 66% rename from test/prism/ripper_test.rb rename to test/prism/ruby/ripper_test.rb index 07238fc3d54..8db47da3d35 100644 --- a/test/prism/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -2,13 +2,11 @@ return if RUBY_VERSION < "3.3" -require_relative "test_helper" +require_relative "../test_helper" module Prism class RipperTest < TestCase - base = File.join(__dir__, "fixtures") - relatives = ENV["FOCUS"] ? [ENV["FOCUS"]] : Dir["**/*.txt", base: base] - + # Skip these tests that Ripper is reporting the wrong results for. incorrect = [ # Ripper incorrectly attributes the block to the keyword. "seattlerb/block_break.txt", @@ -31,6 +29,7 @@ class RipperTest < TestCase "spanning_heredoc.txt" ] + # Skip these tests that we haven't implemented yet. omitted = [ "dos_endings.txt", "heredocs_with_ignored_newlines.txt", @@ -50,30 +49,8 @@ class RipperTest < TestCase "whitequark/slash_newline_in_heredocs.txt" ] - relatives.each do |relative| - # Skip the tests that Ripper is reporting the wrong results for. - next if incorrect.include?(relative) - - # Skip the tests we haven't implemented yet. - next if omitted.include?(relative) - - filepath = File.join(__dir__, "fixtures", relative) - - define_method "test_ripper_#{relative}" do - source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8) - - case relative - when /break|next|redo|if|unless|rescue|control|keywords|retry/ - source = "-> do\nrescue\n#{source}\nend" - end - - case source - when /^ *yield/ - source = "def __invalid_yield__\n#{source}\nend" - end - - assert_ripper(source) - end + Fixture.each(except: incorrect | omitted) do |fixture| + define_method(fixture.test_name) { assert_ripper(fixture.read) } end private diff --git a/test/prism/ruby/ruby_parser_test.rb b/test/prism/ruby/ruby_parser_test.rb new file mode 100644 index 00000000000..a13daeeb849 --- /dev/null +++ b/test/prism/ruby/ruby_parser_test.rb @@ -0,0 +1,127 @@ +# frozen_string_literal: true + +return if RUBY_ENGINE == "jruby" + +require_relative "../test_helper" + +begin + require "ruby_parser" +rescue LoadError + # In CRuby's CI, we're not going to test against the ruby_parser gem because + # we don't want to have to install it. So in this case we'll just skip this + # test. + return +end + +# We want to also compare lines and files to make sure we're setting them +# correctly. +Sexp.prepend( + Module.new do + def ==(other) + super && line == other.line && file == other.file # && line_max == other.line_max + end + end +) + +module Prism + class RubyParserTest < TestCase + todos = [ + "newline_terminated.txt", + "regex_char_width.txt", + "seattlerb/bug169.txt", + "seattlerb/masgn_colon3.txt", + "seattlerb/messy_op_asgn_lineno.txt", + "seattlerb/op_asgn_primary_colon_const_command_call.txt", + "seattlerb/regexp_esc_C_slash.txt", + "seattlerb/str_lit_concat_bad_encodings.txt", + "unescaping.txt", + "unparser/corpus/literal/kwbegin.txt", + "unparser/corpus/literal/send.txt", + "whitequark/masgn_const.txt", + "whitequark/ruby_bug_12402.txt", + "whitequark/ruby_bug_14690.txt", + "whitequark/space_args_block.txt" + ] + + # https://github.com/seattlerb/ruby_parser/issues/344 + failures = [ + "alias.txt", + "dos_endings.txt", + "heredocs_with_ignored_newlines.txt", + "method_calls.txt", + "methods.txt", + "multi_write.txt", + "not.txt", + "patterns.txt", + "regex.txt", + "seattlerb/and_multi.txt", + "seattlerb/heredoc__backslash_dos_format.txt", + "seattlerb/heredoc_bad_hex_escape.txt", + "seattlerb/heredoc_bad_oct_escape.txt", + "seattlerb/heredoc_with_extra_carriage_horrible_mix.txt", + "seattlerb/heredoc_with_extra_carriage_returns_windows.txt", + "seattlerb/heredoc_with_only_carriage_returns_windows.txt", + "seattlerb/heredoc_with_only_carriage_returns.txt", + "spanning_heredoc_newlines.txt", + "spanning_heredoc.txt", + "tilde_heredocs.txt", + "unparser/corpus/literal/literal.txt", + "while.txt", + "whitequark/cond_eflipflop.txt", + "whitequark/cond_iflipflop.txt", + "whitequark/cond_match_current_line.txt", + "whitequark/dedenting_heredoc.txt", + "whitequark/lvar_injecting_match.txt", + "whitequark/not.txt", + "whitequark/numparam_ruby_bug_19025.txt", + "whitequark/op_asgn_cmd.txt", + "whitequark/parser_bug_640.txt", + "whitequark/parser_slash_slash_n_escaping_in_literals.txt", + "whitequark/pattern_matching_single_line_allowed_omission_of_parentheses.txt", + "whitequark/pattern_matching_single_line.txt", + "whitequark/ruby_bug_11989.txt", + "whitequark/slash_newline_in_heredocs.txt" + ] + + Fixture.each(except: failures) do |fixture| + define_method(fixture.test_name) do + assert_ruby_parser(fixture, todos.include?(fixture.path)) + end + end + + private + + def assert_ruby_parser(fixture, allowed_failure) + source = fixture.read + expected = ignore_warnings { ::RubyParser.new.parse(source, fixture.path) } + actual = Prism::Translation::RubyParser.new.parse(source, fixture.path) + + if !allowed_failure + assert_equal(expected, actual, -> { message(expected, actual) }) + elsif expected == actual + puts "#{name} now passes" + end + end + + def message(expected, actual) + if expected == actual + nil + elsif expected.is_a?(Sexp) && actual.is_a?(Sexp) + if expected.line != actual.line + "expected: (#{expected.inspect} line=#{expected.line}), actual: (#{actual.inspect} line=#{actual.line})" + elsif expected.file != actual.file + "expected: (#{expected.inspect} file=#{expected.file}), actual: (#{actual.inspect} file=#{actual.file})" + elsif expected.length != actual.length + "expected: (#{expected.inspect} length=#{expected.length}), actual: (#{actual.inspect} length=#{actual.length})" + else + expected.zip(actual).find do |expected_field, actual_field| + result = message(expected_field, actual_field) + break result if result + end + end + else + "expected: #{expected.inspect}, actual: #{actual.inspect}" + end + end + end +end diff --git a/test/prism/ruby/tunnel_test.rb b/test/prism/ruby/tunnel_test.rb new file mode 100644 index 00000000000..0214681604a --- /dev/null +++ b/test/prism/ruby/tunnel_test.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class TunnelTest < TestCase + def test_tunnel + program = Prism.parse("foo(1) +\n bar(2, 3) +\n baz(3, 4, 5)").value + + tunnel = program.tunnel(1, 4).last + assert_kind_of IntegerNode, tunnel + assert_equal 1, tunnel.value + + tunnel = program.tunnel(2, 6).last + assert_kind_of IntegerNode, tunnel + assert_equal 2, tunnel.value + + tunnel = program.tunnel(3, 9).last + assert_kind_of IntegerNode, tunnel + assert_equal 4, tunnel.value + + tunnel = program.tunnel(3, 8) + assert_equal [ProgramNode, StatementsNode, CallNode, ArgumentsNode, CallNode, ArgumentsNode], tunnel.map(&:class) + end + end +end diff --git a/test/prism/ruby_api_test.rb b/test/prism/ruby_api_test.rb deleted file mode 100644 index a1e2592d3d4..00000000000 --- a/test/prism/ruby_api_test.rb +++ /dev/null @@ -1,307 +0,0 @@ -# frozen_string_literal: true - -require_relative "test_helper" - -module Prism - class RubyAPITest < TestCase - if !ENV["PRISM_BUILD_MINIMAL"] - def test_ruby_api - filepath = __FILE__ - source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8) - - assert_equal Prism.lex(source, filepath: filepath).value, Prism.lex_file(filepath).value - assert_equal Prism.dump(source, filepath: filepath), Prism.dump_file(filepath) - - serialized = Prism.dump(source, filepath: filepath) - ast1 = Prism.load(source, serialized).value - ast2 = Prism.parse(source, filepath: filepath).value - ast3 = Prism.parse_file(filepath).value - - assert_equal_nodes ast1, ast2 - assert_equal_nodes ast2, ast3 - end - end - - def test_parse_success? - assert Prism.parse_success?("1") - refute Prism.parse_success?("<>") - end - - def test_parse_file_success? - assert Prism.parse_file_success?(__FILE__) - end - - def test_options - assert_equal "", Prism.parse("__FILE__").value.statements.body[0].filepath - assert_equal "foo.rb", Prism.parse("__FILE__", filepath: "foo.rb").value.statements.body[0].filepath - - assert_equal 1, Prism.parse("foo").value.statements.body[0].location.start_line - assert_equal 10, Prism.parse("foo", line: 10).value.statements.body[0].location.start_line - - refute Prism.parse("\"foo\"").value.statements.body[0].frozen? - assert Prism.parse("\"foo\"", frozen_string_literal: true).value.statements.body[0].frozen? - refute Prism.parse("\"foo\"", frozen_string_literal: false).value.statements.body[0].frozen? - - assert_kind_of Prism::CallNode, Prism.parse("foo").value.statements.body[0] - assert_kind_of Prism::LocalVariableReadNode, Prism.parse("foo", scopes: [[:foo]]).value.statements.body[0] - assert_equal 1, Prism.parse("foo", scopes: [[:foo], []]).value.statements.body[0].depth - - assert_equal [:foo], Prism.parse("foo", scopes: [[:foo]]).value.locals - end - - def test_literal_value_method - assert_equal 123, parse_expression("123").value - assert_equal 3.14, parse_expression("3.14").value - assert_equal 42i, parse_expression("42i").value - assert_equal 42.1ri, parse_expression("42.1ri").value - assert_equal 3.14i, parse_expression("3.14i").value - assert_equal 42r, parse_expression("42r").value - assert_equal 0.5r, parse_expression("0.5r").value - assert_equal 42ri, parse_expression("42ri").value - assert_equal 0.5ri, parse_expression("0.5ri").value - assert_equal 0xFFr, parse_expression("0xFFr").value - assert_equal 0xFFri, parse_expression("0xFFri").value - end - - def test_location_join - recv, args_node, _ = parse_expression("1234 + 567").child_nodes - arg = args_node.arguments[0] - - joined = recv.location.join(arg.location) - assert_equal 0, joined.start_offset - assert_equal 10, joined.length - - assert_raise RuntimeError, "Incompatible locations" do - arg.location.join(recv.location) - end - - other_arg = parse_expression("1234 + 567").arguments.arguments[0] - - assert_raise RuntimeError, "Incompatible sources" do - other_arg.location.join(recv.location) - end - - assert_raise RuntimeError, "Incompatible sources" do - recv.location.join(other_arg.location) - end - end - - def test_location_character_offsets - program = Prism.parse("😀 + 😀\n😍 ||= 😍").value - - # first 😀 - location = program.statements.body.first.receiver.location - assert_equal 0, location.start_character_offset - assert_equal 1, location.end_character_offset - assert_equal 0, location.start_character_column - assert_equal 1, location.end_character_column - - # second 😀 - location = program.statements.body.first.arguments.arguments.first.location - assert_equal 4, location.start_character_offset - assert_equal 5, location.end_character_offset - assert_equal 4, location.start_character_column - assert_equal 5, location.end_character_column - - # first 😍 - location = program.statements.body.last.name_loc - assert_equal 6, location.start_character_offset - assert_equal 7, location.end_character_offset - assert_equal 0, location.start_character_column - assert_equal 1, location.end_character_column - - # second 😍 - location = program.statements.body.last.value.location - assert_equal 12, location.start_character_offset - assert_equal 13, location.end_character_offset - assert_equal 6, location.start_character_column - assert_equal 7, location.end_character_column - end - - def test_location_code_units - program = Prism.parse("😀 + 😀\n😍 ||= 😍").value - - # first 😀 - location = program.statements.body.first.receiver.location - - assert_equal 0, location.start_code_units_offset(Encoding::UTF_8) - assert_equal 0, location.start_code_units_offset(Encoding::UTF_16LE) - assert_equal 0, location.start_code_units_offset(Encoding::UTF_32LE) - - assert_equal 1, location.end_code_units_offset(Encoding::UTF_8) - assert_equal 2, location.end_code_units_offset(Encoding::UTF_16LE) - assert_equal 1, location.end_code_units_offset(Encoding::UTF_32LE) - - assert_equal 0, location.start_code_units_column(Encoding::UTF_8) - assert_equal 0, location.start_code_units_column(Encoding::UTF_16LE) - assert_equal 0, location.start_code_units_column(Encoding::UTF_32LE) - - assert_equal 1, location.end_code_units_column(Encoding::UTF_8) - assert_equal 2, location.end_code_units_column(Encoding::UTF_16LE) - assert_equal 1, location.end_code_units_column(Encoding::UTF_32LE) - - # second 😀 - location = program.statements.body.first.arguments.arguments.first.location - - assert_equal 4, location.start_code_units_offset(Encoding::UTF_8) - assert_equal 5, location.start_code_units_offset(Encoding::UTF_16LE) - assert_equal 4, location.start_code_units_offset(Encoding::UTF_32LE) - - assert_equal 5, location.end_code_units_offset(Encoding::UTF_8) - assert_equal 7, location.end_code_units_offset(Encoding::UTF_16LE) - assert_equal 5, location.end_code_units_offset(Encoding::UTF_32LE) - - assert_equal 4, location.start_code_units_column(Encoding::UTF_8) - assert_equal 5, location.start_code_units_column(Encoding::UTF_16LE) - assert_equal 4, location.start_code_units_column(Encoding::UTF_32LE) - - assert_equal 5, location.end_code_units_column(Encoding::UTF_8) - assert_equal 7, location.end_code_units_column(Encoding::UTF_16LE) - assert_equal 5, location.end_code_units_column(Encoding::UTF_32LE) - - # first 😍 - location = program.statements.body.last.name_loc - - assert_equal 6, location.start_code_units_offset(Encoding::UTF_8) - assert_equal 8, location.start_code_units_offset(Encoding::UTF_16LE) - assert_equal 6, location.start_code_units_offset(Encoding::UTF_32LE) - - assert_equal 7, location.end_code_units_offset(Encoding::UTF_8) - assert_equal 10, location.end_code_units_offset(Encoding::UTF_16LE) - assert_equal 7, location.end_code_units_offset(Encoding::UTF_32LE) - - assert_equal 0, location.start_code_units_column(Encoding::UTF_8) - assert_equal 0, location.start_code_units_column(Encoding::UTF_16LE) - assert_equal 0, location.start_code_units_column(Encoding::UTF_32LE) - - assert_equal 1, location.end_code_units_column(Encoding::UTF_8) - assert_equal 2, location.end_code_units_column(Encoding::UTF_16LE) - assert_equal 1, location.end_code_units_column(Encoding::UTF_32LE) - - # second 😍 - location = program.statements.body.last.value.location - - assert_equal 12, location.start_code_units_offset(Encoding::UTF_8) - assert_equal 15, location.start_code_units_offset(Encoding::UTF_16LE) - assert_equal 12, location.start_code_units_offset(Encoding::UTF_32LE) - - assert_equal 13, location.end_code_units_offset(Encoding::UTF_8) - assert_equal 17, location.end_code_units_offset(Encoding::UTF_16LE) - assert_equal 13, location.end_code_units_offset(Encoding::UTF_32LE) - - assert_equal 6, location.start_code_units_column(Encoding::UTF_8) - assert_equal 7, location.start_code_units_column(Encoding::UTF_16LE) - assert_equal 6, location.start_code_units_column(Encoding::UTF_32LE) - - assert_equal 7, location.end_code_units_column(Encoding::UTF_8) - assert_equal 9, location.end_code_units_column(Encoding::UTF_16LE) - assert_equal 7, location.end_code_units_column(Encoding::UTF_32LE) - end - - def test_location_chop - location = Prism.parse("foo").value.location - - assert_equal "fo", location.chop.slice - assert_equal "", location.chop.chop.chop.slice - - # Check that we don't go negative. - 10.times { location = location.chop } - assert_equal "", location.slice - end - - def test_location_slice_lines - result = Prism.parse("\nprivate def foo\nend\n") - method = result.value.statements.body.first.arguments.arguments.first - - assert_equal "private def foo\nend\n", method.slice_lines - end - - def test_heredoc? - refute parse_expression("\"foo\"").heredoc? - refute parse_expression("\"foo \#{1}\"").heredoc? - refute parse_expression("`foo`").heredoc? - refute parse_expression("`foo \#{1}`").heredoc? - - assert parse_expression("<<~HERE\nfoo\nHERE\n").heredoc? - assert parse_expression("<<~HERE\nfoo \#{1}\nHERE\n").heredoc? - assert parse_expression("<<~`HERE`\nfoo\nHERE\n").heredoc? - assert parse_expression("<<~`HERE`\nfoo \#{1}\nHERE\n").heredoc? - end - - # Through some bit hackery, we want to allow consumers to use the integer - # base flags as the base itself. It has a nice property that the current - # alignment provides them in the correct order. So here we test that our - # assumption holds so that it doesn't change out from under us. - # - # In C, this would look something like: - # - # ((flags & ~DECIMAL) << 1) || 10 - # - # We have to do some other work in Ruby because 0 is truthy and ~ on an - # integer doesn't have a fixed width. - def test_integer_base_flags - base = -> (node) do - value = (node.send(:flags) & (0b1111 - IntegerBaseFlags::DECIMAL)) << 1 - value == 0 ? 10 : value - end - - assert_equal 2, base[parse_expression("0b1")] - assert_equal 8, base[parse_expression("0o1")] - assert_equal 10, base[parse_expression("0d1")] - assert_equal 16, base[parse_expression("0x1")] - end - - def test_node_equality - assert_operator parse_expression("1"), :===, parse_expression("1") - assert_operator Prism.parse("1").value, :===, Prism.parse("1").value - - complex_source = "class Something; @var = something.else { _1 }; end" - assert_operator parse_expression(complex_source), :===, parse_expression(complex_source) - - refute_operator parse_expression("1"), :===, parse_expression("2") - refute_operator parse_expression("1"), :===, parse_expression("0x1") - - complex_source_1 = "class Something; @var = something.else { _1 }; end" - complex_source_2 = "class Something; @var = something.else { _2 }; end" - refute_operator parse_expression(complex_source_1), :===, parse_expression(complex_source_2) - end - - def test_node_tunnel - program = Prism.parse("foo(1) +\n bar(2, 3) +\n baz(3, 4, 5)").value - - tunnel = program.tunnel(1, 4).last - assert_kind_of IntegerNode, tunnel - assert_equal 1, tunnel.value - - tunnel = program.tunnel(2, 6).last - assert_kind_of IntegerNode, tunnel - assert_equal 2, tunnel.value - - tunnel = program.tunnel(3, 9).last - assert_kind_of IntegerNode, tunnel - assert_equal 4, tunnel.value - - tunnel = program.tunnel(3, 8) - assert_equal [ProgramNode, StatementsNode, CallNode, ArgumentsNode, CallNode, ArgumentsNode], tunnel.map(&:class) - end - - def test_location_adjoin - program = Prism.parse("foo.bar = 1").value - - location = program.statements.body.first.message_loc - adjoined = location.adjoin("=") - - assert_kind_of Location, adjoined - refute_equal location, adjoined - - assert_equal 4, adjoined.start_offset - assert_equal 9, adjoined.end_offset - end - - private - - def parse_expression(source) - Prism.parse(source).value.statements.body.first - end - end -end diff --git a/test/prism/ruby_parser_test.rb b/test/prism/ruby_parser_test.rb deleted file mode 100644 index 0fd96d42b51..00000000000 --- a/test/prism/ruby_parser_test.rb +++ /dev/null @@ -1,135 +0,0 @@ -# frozen_string_literal: true - -return if RUBY_ENGINE == "jruby" - -require_relative "test_helper" - -begin - require "ruby_parser" -rescue LoadError - # In CRuby's CI, we're not going to test against the ruby_parser gem because - # we don't want to have to install it. So in this case we'll just skip this - # test. - return -end - -# We want to also compare lines and files to make sure we're setting them -# correctly. -Sexp.prepend( - Module.new do - def ==(other) - super && line == other.line && line_max == other.line_max && file == other.file - end - end -) - -module Prism - class RubyParserTest < TestCase - base = File.join(__dir__, "fixtures") - - todos = %w[ - newline_terminated.txt - regex_char_width.txt - seattlerb/bug169.txt - seattlerb/masgn_colon3.txt - seattlerb/messy_op_asgn_lineno.txt - seattlerb/op_asgn_primary_colon_const_command_call.txt - seattlerb/regexp_esc_C_slash.txt - seattlerb/str_lit_concat_bad_encodings.txt - unescaping.txt - unparser/corpus/literal/kwbegin.txt - unparser/corpus/literal/send.txt - whitequark/masgn_const.txt - whitequark/ruby_bug_12402.txt - whitequark/ruby_bug_14690.txt - whitequark/space_args_block.txt - ] - - # https://github.com/seattlerb/ruby_parser/issues/344 - failures = %w[ - alias.txt - dos_endings.txt - heredocs_with_ignored_newlines.txt - method_calls.txt - methods.txt - multi_write.txt - not.txt - patterns.txt - regex.txt - seattlerb/and_multi.txt - seattlerb/heredoc__backslash_dos_format.txt - seattlerb/heredoc_bad_hex_escape.txt - seattlerb/heredoc_bad_oct_escape.txt - seattlerb/heredoc_with_extra_carriage_horrible_mix.txt - seattlerb/heredoc_with_extra_carriage_returns_windows.txt - seattlerb/heredoc_with_only_carriage_returns_windows.txt - seattlerb/heredoc_with_only_carriage_returns.txt - spanning_heredoc_newlines.txt - spanning_heredoc.txt - tilde_heredocs.txt - unparser/corpus/literal/literal.txt - while.txt - whitequark/cond_eflipflop.txt - whitequark/cond_iflipflop.txt - whitequark/cond_match_current_line.txt - whitequark/dedenting_heredoc.txt - whitequark/lvar_injecting_match.txt - whitequark/not.txt - whitequark/numparam_ruby_bug_19025.txt - whitequark/op_asgn_cmd.txt - whitequark/parser_bug_640.txt - whitequark/parser_slash_slash_n_escaping_in_literals.txt - whitequark/pattern_matching_single_line_allowed_omission_of_parentheses.txt - whitequark/pattern_matching_single_line.txt - whitequark/ruby_bug_11989.txt - whitequark/slash_newline_in_heredocs.txt - ] - - Dir["**/*.txt", base: base].each do |name| - next if failures.include?(name) - - define_method("test_#{name}") do - begin - # Parsing with ruby parser tends to be noisy with warnings, so we're - # turning those off. - previous_verbose, $VERBOSE = $VERBOSE, nil - assert_parse_file(base, name, todos.include?(name)) - ensure - $VERBOSE = previous_verbose - end - end - end - - private - - def assert_parse_file(base, name, allowed_failure) - filepath = File.join(base, name) - expected = ::RubyParser.new.parse(File.read(filepath), filepath) - actual = Prism::Translation::RubyParser.parse_file(filepath) - - if !allowed_failure - assert_equal_nodes expected, actual - elsif expected == actual - puts "#{name} now passes" - end - end - - def assert_equal_nodes(left, right) - return if left == right - - if left.is_a?(Sexp) && right.is_a?(Sexp) - if left.line != right.line - assert_equal "(#{left.inspect} line=#{left.line})", "(#{right.inspect} line=#{right.line})" - elsif left.file != right.file - assert_equal "(#{left.inspect} file=#{left.file})", "(#{right.inspect} file=#{right.file})" - elsif left.length != right.length - assert_equal "(#{left.inspect} length=#{left.length})", "(#{right.inspect} length=#{right.length})" - else - left.zip(right).each { |l, r| assert_equal_nodes(l, r) } - end - else - assert_equal left, right - end - end - end -end diff --git a/test/prism/snapshots_test.rb b/test/prism/snapshots_test.rb new file mode 100644 index 00000000000..0744eafad31 --- /dev/null +++ b/test/prism/snapshots_test.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +module Prism + class SnapshotsTest < TestCase + # When we pretty-print the trees to compare against the snapshots, we want + # to be certain that we print with the same external encoding. This is + # because methods like Symbol#inspect take into account external encoding + # and it could change how the snapshot is generated. On machines with + # certain settings (like LANG=C or -Eascii-8bit) this could have been + # changed. So here we're going to force it to be UTF-8 to keep the snapshots + # consistent. + def setup + @previous_default_external = Encoding.default_external + ignore_warnings { Encoding.default_external = Encoding::UTF_8 } + end + + def teardown + ignore_warnings { Encoding.default_external = @previous_default_external } + end + + except = [] + + # These fail on TruffleRuby due to a difference in Symbol#inspect: + # :测试 vs :"测试" + if RUBY_ENGINE == "truffleruby" + except.push( + "emoji_method_calls.txt", + "seattlerb/bug202.txt", + "seattlerb/magic_encoding_comment.txt" + ) + end + + Fixture.each(except: except) do |fixture| + define_method(fixture.test_name) { assert_snapshot(fixture) } + end + + private + + def assert_snapshot(fixture) + source = fixture.read + + result = Prism.parse(source, filepath: fixture.path) + assert result.success? + + printed = PP.pp(result.value, +"", 79) + snapshot = fixture.snapshot_path + + if File.exist?(snapshot) + saved = File.read(snapshot) + + # If the snapshot file exists, but the printed value does not match the + # snapshot, then update the snapshot file. + if printed != saved + File.write(snapshot, printed) + warn("Updated snapshot at #{snapshot}.") + end + + # If the snapshot file exists, then assert that the printed value + # matches the snapshot. + assert_equal(saved, printed) + else + # If the snapshot file does not yet exist, then write it out now. + directory = File.dirname(snapshot) + FileUtils.mkdir_p(directory) unless File.directory?(directory) + + File.write(snapshot, printed) + warn("Created snapshot at #{snapshot}.") + end + end + end +end diff --git a/test/prism/snippets_test.rb b/test/prism/snippets_test.rb new file mode 100644 index 00000000000..26847da184c --- /dev/null +++ b/test/prism/snippets_test.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +module Prism + class SnippetsTest < TestCase + except = [ + "newline_terminated.txt", + "seattlerb/begin_rescue_else_ensure_no_bodies.txt", + "seattlerb/case_in.txt", + "seattlerb/parse_line_defn_no_parens.txt", + "seattlerb/pct_nl.txt", + "seattlerb/str_heredoc_interp.txt", + "spanning_heredoc_newlines.txt", + "unparser/corpus/semantic/dstr.txt", + "whitequark/dedenting_heredoc.txt", + "whitequark/multiple_pattern_matches.txt" + ] + + Fixture.each(except: except) do |fixture| + define_method(fixture.test_name) { assert_snippets(fixture) } + end + + private + + # We test every snippet (separated by \n\n) in isolation to ensure the + # parser does not try to read bytes further than the end of each snippet. + def assert_snippets(fixture) + fixture.read.split(/(?<=\S)\n\n(?=\S)/).each do |snippet| + snippet = snippet.rstrip + + result = Prism.parse(snippet, filepath: fixture.path) + assert result.success? + + if !ENV["PRISM_BUILD_MINIMAL"] + dumped = Prism.dump(snippet, filepath: fixture.path) + assert_equal_nodes(result.value, Prism.load(snippet, dumped).value) + end + end + end + end +end diff --git a/test/prism/test_helper.rb b/test/prism/test_helper.rb index 77af7e7b459..d6d0abf5482 100644 --- a/test/prism/test_helper.rb +++ b/test/prism/test_helper.rb @@ -1,8 +1,9 @@ # frozen_string_literal: true require "prism" -require "ripper" require "pp" +require "ripper" +require "stringio" require "test/unit" require "tempfile" @@ -16,19 +17,202 @@ end module Prism + # A convenience method for retrieving the first statement in the source string + # parsed by Prism. + def self.parse_statement(source, **options) + parse(source, **options).value.statements.body.first + end + + class ParseResult < Result + # Returns the first statement in the body of the parsed source. + def statement + value.statements.body.first + end + end + class TestCase < ::Test::Unit::TestCase + # We have a set of fixtures that we use to test various aspects of the + # parser. They are all represented as .txt files under the + # test/prism/fixtures directory. Typically in test files you will find calls + # to Fixture.each which yields Fixture objects to the given block. These + # are used to define test methods that assert against each fixture in some + # way. + class Fixture + BASE = File.join(__dir__, "fixtures") + + attr_reader :path + + def initialize(path) + @path = path + end + + def read + File.read(full_path, binmode: true, external_encoding: Encoding::UTF_8) + end + + def full_path + File.join(BASE, path) + end + + def snapshot_path + File.join(__dir__, "snapshots", path) + end + + def test_name + :"test_#{path}" + end + + def self.each(except: [], &block) + paths = Dir[ENV.fetch("FOCUS") { File.join("**", "*.txt") }, base: BASE] - except + paths.each { |path| yield Fixture.new(path) } + end + end + + # Yield each encoding that we want to test, along with a range of the + # codepoints that should be tested. + def self.each_encoding + codepoints_1byte = 0...0x100 + + yield Encoding::ASCII_8BIT, codepoints_1byte + yield Encoding::US_ASCII, codepoints_1byte + + if !ENV["PRISM_BUILD_MINIMAL"] + yield Encoding::Windows_1253, codepoints_1byte + end + + # By default we don't test every codepoint in these encodings because it + # takes a very long time. + return unless ENV["PRISM_TEST_ALL_ENCODINGS"] + + yield Encoding::CP850, codepoints_1byte + yield Encoding::CP852, codepoints_1byte + yield Encoding::CP855, codepoints_1byte + yield Encoding::GB1988, codepoints_1byte + yield Encoding::IBM437, codepoints_1byte + yield Encoding::IBM720, codepoints_1byte + yield Encoding::IBM737, codepoints_1byte + yield Encoding::IBM775, codepoints_1byte + yield Encoding::IBM852, codepoints_1byte + yield Encoding::IBM855, codepoints_1byte + yield Encoding::IBM857, codepoints_1byte + yield Encoding::IBM860, codepoints_1byte + yield Encoding::IBM861, codepoints_1byte + yield Encoding::IBM862, codepoints_1byte + yield Encoding::IBM863, codepoints_1byte + yield Encoding::IBM864, codepoints_1byte + yield Encoding::IBM865, codepoints_1byte + yield Encoding::IBM866, codepoints_1byte + yield Encoding::IBM869, codepoints_1byte + yield Encoding::ISO_8859_1, codepoints_1byte + yield Encoding::ISO_8859_2, codepoints_1byte + yield Encoding::ISO_8859_3, codepoints_1byte + yield Encoding::ISO_8859_4, codepoints_1byte + yield Encoding::ISO_8859_5, codepoints_1byte + yield Encoding::ISO_8859_6, codepoints_1byte + yield Encoding::ISO_8859_7, codepoints_1byte + yield Encoding::ISO_8859_8, codepoints_1byte + yield Encoding::ISO_8859_9, codepoints_1byte + yield Encoding::ISO_8859_10, codepoints_1byte + yield Encoding::ISO_8859_11, codepoints_1byte + yield Encoding::ISO_8859_13, codepoints_1byte + yield Encoding::ISO_8859_14, codepoints_1byte + yield Encoding::ISO_8859_15, codepoints_1byte + yield Encoding::ISO_8859_16, codepoints_1byte + yield Encoding::KOI8_R, codepoints_1byte + yield Encoding::KOI8_U, codepoints_1byte + yield Encoding::MACCENTEURO, codepoints_1byte + yield Encoding::MACCROATIAN, codepoints_1byte + yield Encoding::MACCYRILLIC, codepoints_1byte + yield Encoding::MACGREEK, codepoints_1byte + yield Encoding::MACICELAND, codepoints_1byte + yield Encoding::MACROMAN, codepoints_1byte + yield Encoding::MACROMANIA, codepoints_1byte + yield Encoding::MACTHAI, codepoints_1byte + yield Encoding::MACTURKISH, codepoints_1byte + yield Encoding::MACUKRAINE, codepoints_1byte + yield Encoding::TIS_620, codepoints_1byte + yield Encoding::Windows_1250, codepoints_1byte + yield Encoding::Windows_1251, codepoints_1byte + yield Encoding::Windows_1252, codepoints_1byte + yield Encoding::Windows_1254, codepoints_1byte + yield Encoding::Windows_1255, codepoints_1byte + yield Encoding::Windows_1256, codepoints_1byte + yield Encoding::Windows_1257, codepoints_1byte + yield Encoding::Windows_1258, codepoints_1byte + yield Encoding::Windows_874, codepoints_1byte + + codepoints_2bytes = 0...0x10000 + + yield Encoding::Big5, codepoints_2bytes + yield Encoding::Big5_HKSCS, codepoints_2bytes + yield Encoding::Big5_UAO, codepoints_2bytes + yield Encoding::CP949, codepoints_2bytes + yield Encoding::CP950, codepoints_2bytes + yield Encoding::CP951, codepoints_2bytes + yield Encoding::EUC_KR, codepoints_2bytes + yield Encoding::GBK, codepoints_2bytes + yield Encoding::GB12345, codepoints_2bytes + yield Encoding::GB2312, codepoints_2bytes + yield Encoding::MACJAPANESE, codepoints_2bytes + yield Encoding::Shift_JIS, codepoints_2bytes + yield Encoding::SJIS_DoCoMo, codepoints_2bytes + yield Encoding::SJIS_KDDI, codepoints_2bytes + yield Encoding::SJIS_SoftBank, codepoints_2bytes + yield Encoding::Windows_31J, codepoints_2bytes + + codepoints_unicode = (0...0x110000) + + yield Encoding::UTF_8, codepoints_unicode + yield Encoding::UTF8_MAC, codepoints_unicode + yield Encoding::UTF8_DoCoMo, codepoints_unicode + yield Encoding::UTF8_KDDI, codepoints_unicode + yield Encoding::UTF8_SoftBank, codepoints_unicode + yield Encoding::CESU_8, codepoints_unicode + + codepoints_eucjp = [ + *(0...0x10000), + *(0...0x10000).map { |bytes| bytes | 0x8F0000 } + ] + + yield Encoding::CP51932, codepoints_eucjp + yield Encoding::EUC_JP, codepoints_eucjp + yield Encoding::EUCJP_MS, codepoints_eucjp + yield Encoding::EUC_JIS_2004, codepoints_eucjp + + codepoints_emacs_mule = [ + *(0...0x80), + *((0x81...0x90).flat_map { |byte1| (0x90...0x100).map { |byte2| byte1 << 8 | byte2 } }), + *((0x90...0x9C).flat_map { |byte1| (0xA0...0x100).flat_map { |byte2| (0xA0...0x100).flat_map { |byte3| byte1 << 16 | byte2 << 8 | byte3 } } }), + *((0xF0...0xF5).flat_map { |byte2| (0xA0...0x100).flat_map { |byte3| (0xA0...0x100).flat_map { |byte4| 0x9C << 24 | byte3 << 16 | byte3 << 8 | byte4 } } }), + ] + + yield Encoding::EMACS_MULE, codepoints_emacs_mule + yield Encoding::STATELESS_ISO_2022_JP, codepoints_emacs_mule + yield Encoding::STATELESS_ISO_2022_JP_KDDI, codepoints_emacs_mule + + codepoints_gb18030 = [ + *(0...0x80), + *((0x81..0xFE).flat_map { |byte1| (0x40...0x100).map { |byte2| byte1 << 8 | byte2 } }), + *((0x81..0xFE).flat_map { |byte1| (0x30...0x40).flat_map { |byte2| (0x81..0xFE).flat_map { |byte3| (0x2F...0x41).map { |byte4| byte1 << 24 | byte2 << 16 | byte3 << 8 | byte4 } } } }), + ] + + yield Encoding::GB18030, codepoints_gb18030 + + codepoints_euc_tw = [ + *(0..0x7F), + *(0xA1..0xFF).flat_map { |byte1| (0xA1..0xFF).map { |byte2| (byte1 << 8) | byte2 } }, + *(0xA1..0xB0).flat_map { |byte2| (0xA1..0xFF).flat_map { |byte3| (0xA1..0xFF).flat_map { |byte4| 0x8E << 24 | byte2 << 16 | byte3 << 8 | byte4 } } } + ] + + yield Encoding::EUC_TW, codepoints_euc_tw + end + private if RUBY_ENGINE == "ruby" # Check that the given source is valid syntax by compiling it with RubyVM. def check_syntax(source) - $VERBOSE, previous = nil, $VERBOSE - - begin - RubyVM::InstructionSequence.compile(source) - ensure - $VERBOSE = previous - end + ignore_warnings { RubyVM::InstructionSequence.compile(source) } end # Assert that the given source is valid Ruby syntax by attempting to @@ -51,6 +235,8 @@ def refute_valid_syntax(source) end end + # CRuby has this same method, so define it so that we don't accidentally + # break CRuby CI. def assert_raises(*args, &block) raise "Use assert_raise instead" end @@ -122,5 +308,16 @@ def assert_equal_nodes(expected, actual, compare_location: true, parent: nil) assert_equal expected, actual end end + + def ignore_warnings + previous = $VERBOSE + $VERBOSE = nil + + begin + yield + ensure + $VERBOSE = previous + end + end end end diff --git a/test/prism/unescape_test.rb b/test/prism/unescape_test.rb index 3f78a59b113..35e1952cb2b 100644 --- a/test/prism/unescape_test.rb +++ b/test/prism/unescape_test.rb @@ -2,7 +2,7 @@ require_relative "test_helper" -return if RUBY_VERSION < "3.1.0" || Prism::BACKEND == :FFI +return if RUBY_VERSION < "3.1.0" module Prism class UnescapeTest < TestCase @@ -41,7 +41,7 @@ def prism(escape) result = Prism.parse(code(escape), encoding: "binary") if result.success? - yield result.value.statements.body.first + yield result.statement else :error end