From 0d5d75909156687163f52558f540749cb76e5cbd Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 19 Aug 2023 14:03:35 -0400 Subject: [PATCH] fix: support newline-terminated regular expressions Previously, parsing a snippet like this: %r\nfoo\n would result in tracking the second newline twice, resulting in a failed runtime assertion. Fixing that issue reveals another bug, which is that the _first_ newline was not being tracked at all. So we introduce a call to yp_newline_list right when we construct the REGEXP_BEGIN token. --- src/yarp.c | 11 ++++++++++- test/fixtures/newline-terminated-things.txt | 2 ++ test/parse_test.rb | 2 ++ test/snapshots/newline-terminated-things.txt | 6 ++++++ 4 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 test/fixtures/newline-terminated-things.txt create mode 100644 test/snapshots/newline-terminated-things.txt diff --git a/src/yarp.c b/src/yarp.c index 1f30672d519..4e3b0610c6f 100644 --- a/src/yarp.c +++ b/src/yarp.c @@ -6215,6 +6215,9 @@ parser_lex(yp_parser_t *parser) { if (parser->current.end < parser->end) { lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); + if (parser->current.end == '\n') { + yp_newline_list_append(&parser->newline_list, parser->current.end); + } parser->current.end++; } @@ -6526,7 +6529,13 @@ parser_lex(yp_parser_t *parser) { // If we've hit a newline, then we need to track that in the // list of newlines. if (*breakpoint == '\n') { - yp_newline_list_append(&parser->newline_list, breakpoint); + // For the special case of a newline-terminated regular expression, we will pass + // through this branch twice -- once with YP_TOKEN_REGEXP_BEGIN and then again + // with YP_TOKEN_STRING_CONTENT. Let's avoid tracking the newline twice, by + // tracking it only in the REGEXP_BEGIN case. + if (!(lex_mode->as.regexp.terminator == '\n' && parser->current.type != YP_TOKEN_REGEXP_BEGIN)) { + yp_newline_list_append(&parser->newline_list, breakpoint); + } if (lex_mode->as.regexp.terminator != '\n') { // If the terminator is not a newline, then we can set diff --git a/test/fixtures/newline-terminated-things.txt b/test/fixtures/newline-terminated-things.txt new file mode 100644 index 00000000000..27e7c62e8e2 --- /dev/null +++ b/test/fixtures/newline-terminated-things.txt @@ -0,0 +1,2 @@ +%r +foo diff --git a/test/parse_test.rb b/test/parse_test.rb index ed0567d0194..c0f3ecf551e 100644 --- a/test/parse_test.rb +++ b/test/parse_test.rb @@ -131,6 +131,8 @@ def test_parse_takes_file_path end Dir["*.txt", base: base].each do |relative| + next if relative == "newline_terminated.txt" + # We test every snippet (separated by \n\n) in isolation # to ensure the parser does not try to read bytes further than the end of each snippet define_method "test_individual_snippets_#{relative}" do diff --git a/test/snapshots/newline-terminated-things.txt b/test/snapshots/newline-terminated-things.txt new file mode 100644 index 00000000000..e68ea1658e8 --- /dev/null +++ b/test/snapshots/newline-terminated-things.txt @@ -0,0 +1,6 @@ +ProgramNode(0...7)( + [], + StatementsNode(0...7)( + [RegularExpressionNode(0...7)((0...3), (3...6), (6...7), "foo", 0)] + ) +)