From 0d5d75909156687163f52558f540749cb76e5cbd Mon Sep 17 00:00:00 2001
From: Mike Dalessio <mike.dalessio@gmail.com>
Date: Sat, 19 Aug 2023 14:03:35 -0400
Subject: [PATCH] fix: support newline-terminated regular expressions

Previously, parsing a snippet like this:

    %r\nfoo\n

would result in tracking the second newline twice, resulting in a
failed runtime assertion.

Fixing that issue reveals another bug, which is that the _first_
newline was not being tracked at all. So we introduce a call to
yp_newline_list right when we construct the REGEXP_BEGIN token.
---
 src/yarp.c                                   | 11 ++++++++++-
 test/fixtures/newline-terminated-things.txt  |  2 ++
 test/parse_test.rb                           |  2 ++
 test/snapshots/newline-terminated-things.txt |  6 ++++++
 4 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 test/fixtures/newline-terminated-things.txt
 create mode 100644 test/snapshots/newline-terminated-things.txt

diff --git a/src/yarp.c b/src/yarp.c
index 1f30672d519..4e3b0610c6f 100644
--- a/src/yarp.c
+++ b/src/yarp.c
@@ -6215,6 +6215,9 @@ parser_lex(yp_parser_t *parser) {
 
                                 if (parser->current.end < parser->end) {
                                     lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
+                                    if (parser->current.end == '\n') {
+                                        yp_newline_list_append(&parser->newline_list, parser->current.end);
+                                    }
                                     parser->current.end++;
                                 }
 
@@ -6526,7 +6529,13 @@ parser_lex(yp_parser_t *parser) {
                 // If we've hit a newline, then we need to track that in the
                 // list of newlines.
                 if (*breakpoint == '\n') {
-                    yp_newline_list_append(&parser->newline_list, breakpoint);
+                    // For the special case of a newline-terminated regular expression, we will pass
+                    // through this branch twice -- once with YP_TOKEN_REGEXP_BEGIN and then again
+                    // with YP_TOKEN_STRING_CONTENT. Let's avoid tracking the newline twice, by
+                    // tracking it only in the REGEXP_BEGIN case.
+                    if (!(lex_mode->as.regexp.terminator == '\n' && parser->current.type != YP_TOKEN_REGEXP_BEGIN)) {
+                        yp_newline_list_append(&parser->newline_list, breakpoint);
+                    }
 
                     if (lex_mode->as.regexp.terminator != '\n') {
                         // If the terminator is not a newline, then we can set
diff --git a/test/fixtures/newline-terminated-things.txt b/test/fixtures/newline-terminated-things.txt
new file mode 100644
index 00000000000..27e7c62e8e2
--- /dev/null
+++ b/test/fixtures/newline-terminated-things.txt
@@ -0,0 +1,2 @@
+%r
+foo
diff --git a/test/parse_test.rb b/test/parse_test.rb
index ed0567d0194..c0f3ecf551e 100644
--- a/test/parse_test.rb
+++ b/test/parse_test.rb
@@ -131,6 +131,8 @@ def test_parse_takes_file_path
   end
 
   Dir["*.txt", base: base].each do |relative|
+    next if relative == "newline_terminated.txt"
+
     # We test every snippet (separated by \n\n) in isolation
     # to ensure the parser does not try to read bytes further than the end of each snippet
     define_method "test_individual_snippets_#{relative}" do
diff --git a/test/snapshots/newline-terminated-things.txt b/test/snapshots/newline-terminated-things.txt
new file mode 100644
index 00000000000..e68ea1658e8
--- /dev/null
+++ b/test/snapshots/newline-terminated-things.txt
@@ -0,0 +1,6 @@
+ProgramNode(0...7)(
+  [],
+  StatementsNode(0...7)(
+    [RegularExpressionNode(0...7)((0...3), (3...6), (6...7), "foo", 0)]
+  )
+)