From aa2182f064bfc3c91da46e8f11168c4dac9cbe5d Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 1 Apr 2024 13:08:21 -0400
Subject: [PATCH 1/4] Track captures in pattern matching for duplicates

---
 config.yml                     |   1 +
 src/prism.c                    | 284 ++++++++++++++++++---------------
 templates/src/diagnostic.c.erb |   1 +
 test/prism/errors_test.rb      |  16 ++
 4 files changed, 173 insertions(+), 129 deletions(-)

diff --git a/config.yml b/config.yml
index edbe4b32d84..658492a488b 100644
--- a/config.yml
+++ b/config.yml
@@ -175,6 +175,7 @@ errors:
   - PARAMETER_STAR
   - PARAMETER_UNEXPECTED_FWD
   - PARAMETER_WILD_LOOSE_COMMA
+  - PATTERN_CAPTURE_DUPLICATE
   - PATTERN_EXPRESSION_AFTER_BRACKET
   - PATTERN_EXPRESSION_AFTER_COMMA
   - PATTERN_EXPRESSION_AFTER_HROCKET
diff --git a/src/prism.c b/src/prism.c
index 1f4b9ced5c6..6d46e06c974 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -4939,7 +4939,8 @@ pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const ui
  * name and depth.
  */
 static pm_local_variable_target_node_t *
-pm_local_variable_target_node_create_values(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
+pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
+    pm_refute_numbered_parameter(parser, location->start, location->end);
     pm_local_variable_target_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_target_node_t);
 
     *node = (pm_local_variable_target_node_t) {
@@ -4954,36 +4955,6 @@ pm_local_variable_target_node_create_values(pm_parser_t *parser, const pm_locati
     return node;
 }
 
-/**
- * Allocate and initialize a new LocalVariableTargetNode node.
- */
-static pm_local_variable_target_node_t *
-pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) {
-    pm_refute_numbered_parameter(parser, name->start, name->end);
-
-    return pm_local_variable_target_node_create_values(
-        parser,
-        &(pm_location_t) { .start = name->start, .end = name->end },
-        pm_parser_constant_id_token(parser, name),
-        0
-    );
-}
-
-/**
- * Allocate and initialize a new LocalVariableTargetNode node with the given depth.
- */
-static pm_local_variable_target_node_t *
-pm_local_variable_target_node_create_depth(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
-    pm_refute_numbered_parameter(parser, name->start, name->end);
-
-    return pm_local_variable_target_node_create_values(
-        parser,
-        &(pm_location_t) { .start = name->start, .end = name->end },
-        pm_parser_constant_id_token(parser, name),
-        depth
-    );
-}
-
 /**
  * Allocate and initialize a new MatchPredicateNode node.
  */
@@ -7086,9 +7057,9 @@ pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const ui
 /**
  * Add a local variable from a token to the current scope.
  */
-static inline void
+static inline pm_constant_id_t
 pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token) {
-    pm_parser_local_add_location(parser, token->start, token->end);
+    return pm_parser_local_add_location(parser, token->start, token->end);
 }
 
 /**
@@ -14625,13 +14596,27 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
 }
 
 static pm_node_t *
-parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id);
+parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id);
+
+/**
+ * Add the newly created local to the list of captures for this pattern matching
+ * expression. If it is duplicated from a previous local, then we'll need to add
+ * an error to the parser.
+ */
+static void
+parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
+    if (pm_constant_id_list_includes(captures, capture)) {
+        pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
+    } else {
+        pm_constant_id_list_append(captures, capture);
+    }
+}
 
 /**
  * Accept any number of constants joined by :: delimiters.
  */
 static pm_node_t *
-parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
+parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node) {
     // Now, if there are any :: operators that follow, parse them as constant
     // path nodes.
     while (accept1(parser, PM_TOKEN_COLON_COLON)) {
@@ -14639,7 +14624,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
         expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
 
         pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
-        node = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
+        node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, child);
     }
 
     // If there is a [ or ( that follows, then this is part of a larger pattern
@@ -14658,7 +14643,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
         accept1(parser, PM_TOKEN_NEWLINE);
 
         if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
-            inner = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
+            inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
             accept1(parser, PM_TOKEN_NEWLINE);
             expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
         }
@@ -14670,7 +14655,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
         accept1(parser, PM_TOKEN_NEWLINE);
 
         if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-            inner = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
+            inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
             accept1(parser, PM_TOKEN_NEWLINE);
             expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
         }
@@ -14753,18 +14738,30 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
  * Parse a rest pattern.
  */
 static pm_splat_node_t *
-parse_pattern_rest(pm_parser_t *parser) {
+parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
     assert(parser->previous.type == PM_TOKEN_USTAR);
     pm_token_t operator = parser->previous;
     pm_node_t *name = NULL;
 
     // Rest patterns don't necessarily have a name associated with them. So we
-    // will check for that here. If they do, then we'll add it to the local table
-    // since this pattern will cause it to become a local variable.
+    // will check for that here. If they do, then we'll add it to the local
+    // table since this pattern will cause it to become a local variable.
     if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
         pm_token_t identifier = parser->previous;
-        pm_parser_local_add_token(parser, &identifier);
-        name = (pm_node_t *) pm_local_variable_target_node_create(parser, &identifier);
+        pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
+
+        int depth;
+        if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
+            pm_parser_local_add(parser, constant_id);
+        }
+
+        parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
+        name = (pm_node_t *) pm_local_variable_target_node_create(
+            parser,
+            &PM_LOCATION_TOKEN_VALUE(&identifier),
+            constant_id,
+            (uint32_t) (depth == -1 ? 0 : depth)
+        );
     }
 
     // Finally we can return the created node.
@@ -14775,7 +14772,7 @@ parse_pattern_rest(pm_parser_t *parser) {
  * Parse a keyword rest node.
  */
 static pm_node_t *
-parse_pattern_keyword_rest(pm_parser_t *parser) {
+parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
     assert(parser->current.type == PM_TOKEN_USTAR_STAR);
     parser_lex(parser);
 
@@ -14787,8 +14784,20 @@ parse_pattern_keyword_rest(pm_parser_t *parser) {
     }
 
     if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
-        pm_parser_local_add_token(parser, &parser->previous);
-        value = (pm_node_t *) pm_local_variable_target_node_create(parser, &parser->previous);
+        pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
+
+        int depth;
+        if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
+            pm_parser_local_add(parser, constant_id);
+        }
+
+        parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
+        value = (pm_node_t *) pm_local_variable_target_node_create(
+            parser,
+            &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+            constant_id,
+            (uint32_t) (depth == -1 ? 0 : depth)
+        );
     }
 
     return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
@@ -14799,21 +14808,23 @@ parse_pattern_keyword_rest(pm_parser_t *parser) {
  * value. This will use an implicit local variable target.
  */
 static pm_node_t *
-parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_symbol_node_t *key) {
+parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
     const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
-    pm_constant_id_t name = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
-
-    int current_depth = pm_parser_local_depth_constant_id(parser, name);
-    uint32_t depth;
+    pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
 
-    if (current_depth == -1) {
-        pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
-        depth = 0;
-    } else {
-        depth = (uint32_t) current_depth;
+    int depth;
+    if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
+        pm_parser_local_add(parser, constant_id);
     }
 
-    pm_local_variable_target_node_t *target = pm_local_variable_target_node_create_values(parser, value_loc, name, depth);
+    parse_pattern_capture(parser, captures, constant_id, value_loc);
+    pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
+        parser,
+        value_loc,
+        constant_id,
+        (uint32_t) (depth == -1 ? 0 : depth)
+    );
+
     return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
 }
 
@@ -14821,7 +14832,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_symbol_node_t *key) {
  * Parse a hash pattern.
  */
 static pm_hash_pattern_node_t *
-parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) {
+parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node) {
     pm_node_list_t assocs = { 0 };
     pm_node_t *rest = NULL;
 
@@ -14834,14 +14845,14 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) {
             if (pm_symbol_node_label_p(first_node)) {
                 pm_node_t *value;
 
-                if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
-                    // Here we have a value for the first assoc in the list, so
-                    // we will parse it now.
-                    value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
-                } else {
+                if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
                     // Otherwise, we will create an implicit local variable
                     // target for the value.
-                    value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) first_node);
+                    value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
+                } else {
+                    // Here we have a value for the first assoc in the list, so
+                    // we will parse it now.
+                    value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
                 }
 
                 pm_token_t operator = not_provided(parser);
@@ -14875,7 +14886,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) {
         }
 
         if (match1(parser, PM_TOKEN_USTAR_STAR)) {
-            pm_node_t *assoc = parse_pattern_keyword_rest(parser);
+            pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
 
             if (rest == NULL) {
                 rest = assoc;
@@ -14888,12 +14899,10 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) {
             pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
             pm_node_t *value = NULL;
 
-            if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
-                value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
+            if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+                value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
             } else {
-                const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
-                pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
-                value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) key);
+                value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
             }
 
             pm_token_t operator = not_provided(parser);
@@ -14917,18 +14926,25 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) {
  * Parse a pattern expression primitive.
  */
 static pm_node_t *
-parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
+parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id) {
     switch (parser->current.type) {
         case PM_TOKEN_IDENTIFIER:
         case PM_TOKEN_METHOD_NAME: {
             parser_lex(parser);
-            pm_token_t name = parser->previous;
-            int depth = pm_parser_local_depth(parser, &name);
-            if (depth < 0) {
-                depth = 0;
-                pm_parser_local_add_token(parser, &name);
+            pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
+
+            int depth;
+            if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
+                pm_parser_local_add(parser, constant_id);
             }
-            return (pm_node_t *) pm_local_variable_target_node_create_depth(parser, &name, (uint32_t) depth);
+
+            parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
+            return (pm_node_t *) pm_local_variable_target_node_create(
+                parser,
+                &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+                constant_id,
+                (uint32_t) (depth == -1 ? 0 : depth)
+            );
         }
         case PM_TOKEN_BRACKET_LEFT_ARRAY: {
             pm_token_t opening = parser->current;
@@ -14937,15 +14953,14 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
             if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
                 // If we have an empty array pattern, then we'll just return a new
                 // array pattern node.
-                return (pm_node_t *)pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
+                return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
             }
 
             // Otherwise, we'll parse the inner pattern, then deal with it depending
             // on the type it returns.
-            pm_node_t *inner = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
+            pm_node_t *inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
 
             accept1(parser, PM_TOKEN_NEWLINE);
-
             expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
             pm_token_t closing = parser->previous;
 
@@ -15007,7 +15022,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
                         first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
                         break;
                     case PM_TOKEN_USTAR_STAR:
-                        first_node = parse_pattern_keyword_rest(parser);
+                        first_node = parse_pattern_keyword_rest(parser, captures);
                         break;
                     case PM_TOKEN_STRING_BEGIN:
                         first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
@@ -15021,7 +15036,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
                     }
                 }
 
-                node = parse_pattern_hash(parser, first_node);
+                node = parse_pattern_hash(parser, captures, first_node);
 
                 accept1(parser, PM_TOKEN_NEWLINE);
                 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
@@ -15168,14 +15183,14 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
             pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
             pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, child);
 
-            return parse_pattern_constant_path(parser, (pm_node_t *)node);
+            return parse_pattern_constant_path(parser, captures, (pm_node_t *) node);
         }
         case PM_TOKEN_CONSTANT: {
             pm_token_t constant = parser->current;
             parser_lex(parser);
 
             pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
-            return parse_pattern_constant_path(parser, node);
+            return parse_pattern_constant_path(parser, captures, node);
         }
         default:
             pm_parser_err_current(parser, diag_id);
@@ -15188,7 +15203,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
  * assignment.
  */
 static pm_node_t *
-parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
+parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id) {
     pm_node_t *node = NULL;
 
     do {
@@ -15205,9 +15220,9 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
             case PM_TOKEN_UDOT_DOT_DOT:
             case PM_CASE_PRIMITIVE: {
                 if (node == NULL) {
-                    node = parse_pattern_primitive(parser, diag_id);
+                    node = parse_pattern_primitive(parser, captures, diag_id);
                 } else {
-                    pm_node_t *right = parse_pattern_primitive(parser, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE);
+                    pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE);
                     node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
                 }
 
@@ -15217,7 +15232,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
                 pm_token_t opening = parser->current;
                 parser_lex(parser);
 
-                pm_node_t *body = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
+                pm_node_t *body = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
                 accept1(parser, PM_TOKEN_NEWLINE);
                 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
                 pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
@@ -15249,16 +15264,23 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
     // In this case we should create an assignment node.
     while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
         pm_token_t operator = parser->previous;
-
         expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
-        pm_token_t identifier = parser->previous;
-        int depth = pm_parser_local_depth(parser, &identifier);
-        if (depth < 0) {
-            depth = 0;
-            pm_parser_local_add_token(parser, &identifier);
+
+        pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
+        int depth;
+
+        if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
+            pm_parser_local_add(parser, constant_id);
         }
 
-        pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_depth(parser, &identifier, (uint32_t) depth);
+        parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
+        pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(
+            parser,
+            &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+            constant_id,
+            (uint32_t) (depth == -1 ? 0 : depth)
+        );
+
         node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
     }
 
@@ -15269,7 +15291,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
  * Parse a pattern matching expression.
  */
 static pm_node_t *
-parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id) {
+parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id) {
     pm_node_t *node = NULL;
 
     bool leading_rest = false;
@@ -15279,30 +15301,30 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
         case PM_TOKEN_LABEL: {
             parser_lex(parser);
             pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
-            return (pm_node_t *) parse_pattern_hash(parser, key);
+            return (pm_node_t *) parse_pattern_hash(parser, captures, key);
         }
         case PM_TOKEN_USTAR_STAR: {
-            node = parse_pattern_keyword_rest(parser);
-            return (pm_node_t *) parse_pattern_hash(parser, node);
+            node = parse_pattern_keyword_rest(parser, captures);
+            return (pm_node_t *) parse_pattern_hash(parser, captures, node);
         }
         case PM_TOKEN_USTAR: {
             if (top_pattern) {
                 parser_lex(parser);
-                node = (pm_node_t *) parse_pattern_rest(parser);
+                node = (pm_node_t *) parse_pattern_rest(parser, captures);
                 leading_rest = true;
                 break;
             }
         }
         /* fallthrough */
         default:
-            node = parse_pattern_primitives(parser, diag_id);
+            node = parse_pattern_primitives(parser, captures, diag_id);
             break;
     }
 
     // If we got a dynamic label symbol, then we need to treat it like the
     // beginning of a hash pattern.
     if (pm_symbol_node_label_p(node)) {
-        return (pm_node_t *) parse_pattern_hash(parser, node);
+        return (pm_node_t *) parse_pattern_hash(parser, captures, node);
     }
 
     if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
@@ -15322,7 +15344,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
             }
 
             if (accept1(parser, PM_TOKEN_USTAR)) {
-                node = (pm_node_t *) parse_pattern_rest(parser);
+                node = (pm_node_t *) parse_pattern_rest(parser, captures);
 
                 // If we have already parsed a splat pattern, then this is an error. We
                 // will continue to parse the rest of the patterns, but we will indicate
@@ -15333,7 +15355,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
 
                 trailing_rest = true;
             } else {
-                node = parse_pattern_primitives(parser, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA);
+                node = parse_pattern_primitives(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA);
             }
 
             pm_node_list_append(&nodes, node);
@@ -16300,8 +16322,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
             }
 
-            // At this point we can create a case node, though we don't yet know if it
-            // is a case-in or case-when node.
+            // At this point we can create a case node, though we don't yet know
+            // if it is a case-in or case-when node.
             pm_token_t end_keyword = not_provided(parser);
             pm_node_t *node;
 
@@ -16379,8 +16401,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                     pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
                 }
 
-                // At this point we expect that we're parsing a case-in node. We will
-                // continue to parse the in nodes until we hit the end of the list.
+                // At this point we expect that we're parsing a case-in node. We
+                // will continue to parse the in nodes until we hit the end of
+                // the list.
                 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
                     bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
                     parser->pattern_matching_newlines = true;
@@ -16390,11 +16413,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                     parser_lex(parser);
 
                     pm_token_t in_keyword = parser->previous;
-                    pm_node_t *pattern = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
+
+                    pm_constant_id_list_t captures = { 0 };
+                    pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
+
                     parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+                    pm_constant_id_list_free(&captures);
 
-                    // Since we're in the top-level of the case-in node we need to check
-                    // for guard clauses in the form of `if` or `unless` statements.
+                    // Since we're in the top-level of the case-in node we need
+                    // to check for guard clauses in the form of `if` or
+                    // `unless` statements.
                     if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
                         pm_token_t keyword = parser->previous;
                         pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
@@ -16405,9 +16433,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                         pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
                     }
 
-                    // Now we need to check for the terminator of the in node's pattern.
-                    // It can be a newline or semicolon optionally followed by a `then`
-                    // keyword.
+                    // Now we need to check for the terminator of the in node's
+                    // pattern. It can be a newline or semicolon optionally
+                    // followed by a `then` keyword.
                     pm_token_t then_keyword;
                     if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
                         if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
@@ -16420,8 +16448,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                         then_keyword = parser->previous;
                     }
 
-                    // Now we can actually parse the statements associated with the in
-                    // node.
+                    // Now we can actually parse the statements associated with
+                    // the in node.
                     pm_statements_node_t *statements;
                     if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
                         statements = NULL;
@@ -16429,8 +16457,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                         statements = parse_statements(parser, PM_CONTEXT_CASE_IN);
                     }
 
-                    // Now that we have the full pattern and statements, we can create the
-                    // node and attach it to the case node.
+                    // Now that we have the full pattern and statements, we can
+                    // create the node and attach it to the case node.
                     pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
                     pm_case_match_node_condition_append(case_node, condition);
                 }
@@ -18152,7 +18180,6 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
                 // copy the names directly. The pointers will line up.
                 location = (pm_location_t) { .start = source, .end = source + length };
                 name = pm_parser_constant_id_location(parser, location.start, location.end);
-                pm_refute_numbered_parameter(parser, source, source + length);
             } else {
                 // Otherwise, the name is a slice of the malloc-ed owned string,
                 // in which case we need to copy it out into a new string.
@@ -18163,11 +18190,6 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
 
                 memcpy(memory, source, length);
                 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
-
-                if (pm_token_is_numbered_parameter(source, source + length)) {
-                    const pm_location_t *location = &call->receiver->location;
-                    PM_PARSER_ERR_LOCATION_FORMAT(parser, location, PM_ERR_PARAMETER_NUMBERED_RESERVED, location->start);
-                }
             }
 
             if (name != 0) {
@@ -18191,7 +18213,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
 
                 // Next, create the local variable target and add it to the
                 // list of targets for the match.
-                pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_values(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
+                pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
                 pm_node_list_append(&match->targets, target);
             }
         }
@@ -18963,11 +18985,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
             pm_token_t operator = parser->current;
             parser->command_start = false;
             lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
-
             parser_lex(parser);
 
-            pm_node_t *pattern = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
+            pm_constant_id_list_t captures = { 0 };
+            pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
+
             parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+            pm_constant_id_list_free(&captures);
 
             return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
         }
@@ -18978,11 +19002,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
             pm_token_t operator = parser->current;
             parser->command_start = false;
             lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
-
             parser_lex(parser);
 
-            pm_node_t *pattern = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
+            pm_constant_id_list_t captures = { 0 };
+            pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
+
             parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+            pm_constant_id_list_free(&captures);
 
             return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
         }
diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb
index b608c44f01a..b403eeaa8c8 100644
--- a/templates/src/diagnostic.c.erb
+++ b/templates/src/diagnostic.c.erb
@@ -258,6 +258,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
     [PM_ERR_PARAMETER_STAR]                     = { "unexpected parameter `*`", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PARAMETER_UNEXPECTED_FWD]           = { "unexpected `...` in parameters", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PARAMETER_WILD_LOOSE_COMMA]         = { "unexpected `,` in parameters", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_CAPTURE_DUPLICATE]          = { "duplicated variable name", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET]   = { "expected a pattern expression after the `[` operator", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA]     = { "expected a pattern expression after `,`", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET]   = { "expected a pattern expression after `=>`", PM_ERROR_LEVEL_SYNTAX },
diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb
index 6cc71f96471..b3ae1c8ec59 100644
--- a/test/prism/errors_test.rb
+++ b/test/prism/errors_test.rb
@@ -2157,6 +2157,22 @@ def test_assignment_to_literal_in_conditionals
       ] * source.lines.count
     end
 
+    def test_duplicate_pattern_capture
+      source = <<~RUBY
+        () => [a, a]
+        () => [a, *a]
+        () => {a:, a:}
+        () => {a: a, a: a}
+        () => {a: a, **a}
+        () => [a, {a:}]
+        () => [a, {a: {a: {a: [a]}}}]
+        () => a => a
+        () => [A => a, {a: b => a}]
+      RUBY
+
+      assert_error_messages source, Array.new(source.lines.length, "duplicated variable name"), compare_ripper: false
+    end
+
     private
 
     def assert_errors(expected, source, errors, compare_ripper: RUBY_ENGINE == "ruby")

From 71ea82f299db9df767674573e6980e34c6ed1473 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 1 Apr 2024 13:33:16 -0400
Subject: [PATCH 2/4] Track duplicate hash keys for pattern matching

---
 config.yml                     |  1 +
 src/prism.c                    | 12 ++++++++++++
 templates/src/diagnostic.c.erb |  1 +
 test/prism/errors_test.rb      | 14 ++++++++++++--
 4 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/config.yml b/config.yml
index 658492a488b..34cd388f485 100644
--- a/config.yml
+++ b/config.yml
@@ -187,6 +187,7 @@ errors:
   - PATTERN_EXPRESSION_AFTER_RANGE
   - PATTERN_EXPRESSION_AFTER_REST
   - PATTERN_HASH_KEY
+  - PATTERN_HASH_KEY_DUPLICATE
   - PATTERN_HASH_KEY_LABEL
   - PATTERN_IDENT_AFTER_HROCKET
   - PATTERN_LABEL_AFTER_COMMA
diff --git a/src/prism.c b/src/prism.c
index 6d46e06c974..15c7e4568d5 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -14828,12 +14828,20 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
     return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
 }
 
+static void
+parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
+    if (pm_static_literals_add(parser, keys, node) != NULL) {
+        pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
+    }
+}
+
 /**
  * Parse a hash pattern.
  */
 static pm_hash_pattern_node_t *
 parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node) {
     pm_node_list_t assocs = { 0 };
+    pm_static_literals_t keys = { 0 };
     pm_node_t *rest = NULL;
 
     switch (PM_NODE_TYPE(first_node)) {
@@ -14843,6 +14851,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
             break;
         case PM_SYMBOL_NODE: {
             if (pm_symbol_node_label_p(first_node)) {
+                parse_pattern_hash_key(parser, &keys, first_node);
                 pm_node_t *value;
 
                 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
@@ -14897,6 +14906,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
         } else {
             expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
             pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
+
+            parse_pattern_hash_key(parser, &keys, key);
             pm_node_t *value = NULL;
 
             if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
@@ -14919,6 +14930,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
     pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
     xfree(assocs.nodes);
 
+    pm_static_literals_free(&keys);
     return node;
 }
 
diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb
index b403eeaa8c8..301f98134f5 100644
--- a/templates/src/diagnostic.c.erb
+++ b/templates/src/diagnostic.c.erb
@@ -270,6 +270,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
     [PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE]     = { "expected a pattern expression after the range operator", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PATTERN_EXPRESSION_AFTER_REST]      = { "unexpected pattern expression after the `**` expression", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PATTERN_HASH_KEY]                   = { "expected a key in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_HASH_KEY_DUPLICATE]         = { "duplicated key name", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PATTERN_HASH_KEY_LABEL]             = { "expected a label as the key in the hash pattern", PM_ERROR_LEVEL_SYNTAX }, // TODO // THIS // AND // ABOVE // IS WEIRD
     [PM_ERR_PATTERN_IDENT_AFTER_HROCKET]        = { "expected an identifier after the `=>` operator", PM_ERROR_LEVEL_SYNTAX },
     [PM_ERR_PATTERN_LABEL_AFTER_COMMA]          = { "expected a label after the `,` in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb
index b3ae1c8ec59..d9dedc88e1e 100644
--- a/test/prism/errors_test.rb
+++ b/test/prism/errors_test.rb
@@ -2161,8 +2161,7 @@ def test_duplicate_pattern_capture
       source = <<~RUBY
         () => [a, a]
         () => [a, *a]
-        () => {a:, a:}
-        () => {a: a, a: a}
+        () => {a: a, b: a}
         () => {a: a, **a}
         () => [a, {a:}]
         () => [a, {a: {a: {a: [a]}}}]
@@ -2173,6 +2172,12 @@ def test_duplicate_pattern_capture
       assert_error_messages source, Array.new(source.lines.length, "duplicated variable name"), compare_ripper: false
     end
 
+    def test_duplicate_pattern_hash_key
+      assert_error_messages "() => {a:, a:}", ["duplicated key name", "duplicated variable name"]
+      assert_error_messages "() => {a:1, a:2}", ["duplicated key name"]
+      refute_error_messages "() => [{a:1}, {a:2}]"
+    end
+
     private
 
     def assert_errors(expected, source, errors, compare_ripper: RUBY_ENGINE == "ruby")
@@ -2192,6 +2197,11 @@ def assert_error_messages(source, errors, compare_ripper: RUBY_ENGINE == "ruby")
       assert_equal(errors, result.errors.map(&:message))
     end
 
+    def refute_error_messages(source, compare_ripper: RUBY_ENGINE == "ruby")
+      refute_nil Ripper.sexp_raw(source) if compare_ripper
+      assert Prism.parse_success?(source)
+    end
+
     def assert_warning_messages(source, warnings)
       result = Prism.parse(source)
       assert_equal(warnings, result.warnings.map(&:message))

From ddec1c163d6ccc38c08877d4b41fc18d52a241ab Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 1 Apr 2024 13:48:09 -0400
Subject: [PATCH 3/4] Use RubyVM::InstructionSequence instead of Ripper for
 validity check

---
 test/prism/errors_test.rb | 138 +++++++++++++++++++++++---------------
 1 file changed, 83 insertions(+), 55 deletions(-)

diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb
index d9dedc88e1e..856e46fb769 100644
--- a/test/prism/errors_test.rb
+++ b/test/prism/errors_test.rb
@@ -1130,28 +1130,24 @@ def test_dont_allow_setting_to_back_and_nth_reference
     end
 
     def test_duplicated_parameter_names
-      # For some reason, Ripper reports no error for Ruby 3.0 when you have
-      # duplicated parameter names for positional parameters.
-      unless RUBY_VERSION < "3.1.0"
-        expected = DefNode(
-          :foo,
-          Location(),
-          nil,
-          ParametersNode([RequiredParameterNode(0, :a), RequiredParameterNode(0, :b), RequiredParameterNode(ParameterFlags::REPEATED_PARAMETER, :a)], [], nil, [], [], nil, nil),
-          nil,
-          [:a, :b],
-          Location(),
-          nil,
-          Location(),
-          Location(),
-          nil,
-          Location()
-        )
+      expected = DefNode(
+        :foo,
+        Location(),
+        nil,
+        ParametersNode([RequiredParameterNode(0, :a), RequiredParameterNode(0, :b), RequiredParameterNode(ParameterFlags::REPEATED_PARAMETER, :a)], [], nil, [], [], nil, nil),
+        nil,
+        [:a, :b],
+        Location(),
+        nil,
+        Location(),
+        Location(),
+        nil,
+        Location()
+      )
 
-        assert_errors expected, "def foo(a,b,a);end", [
-          ["duplicated argument name", 12..13]
-        ]
-      end
+      assert_errors expected, "def foo(a,b,a);end", [
+        ["duplicated argument name", 12..13]
+      ]
 
       expected = DefNode(
         :foo,
@@ -1370,7 +1366,7 @@ def test_double_scope_numbered_parameters
       source = "-> { _1 + -> { _2 } }"
       errors = [["numbered parameter is already used in outer scope", 15..17]]
 
-      assert_errors expression(source), source, errors, compare_ripper: false
+      assert_errors expression(source), source, errors
     end
 
     def test_invalid_number_underscores
@@ -1436,7 +1432,7 @@ def test_parameter_name_ending_with_bang_or_question_mark
         ["unexpected name for a parameter", 8..10],
         ["unexpected name for a parameter", 11..13]
       ]
-      assert_errors expression(source), source, errors, compare_ripper: false
+      assert_errors expression(source), source, errors
     end
 
     def test_class_name
@@ -1488,11 +1484,10 @@ def test_shadow_args_in_block
 
     def test_repeated_parameter_name_in_destructured_params
       source = "def f(a, (b, (a))); end"
-      # In Ruby 3.0.x, `Ripper.sexp_raw` does not return `nil` for this case.
-      compare_ripper = RUBY_ENGINE == "ruby" && (RUBY_VERSION.split('.').map { |x| x.to_i } <=> [3, 1]) >= 1
+
       assert_errors expression(source), source, [
         ["duplicated argument name", 14..15],
-      ], compare_ripper: compare_ripper
+      ]
     end
 
     def test_assign_to_numbered_parameter
@@ -1574,6 +1569,7 @@ def test_check_value_expression
         1 => ^(if 1; (return) else (return) end)
         1 => ^(unless 1; (return) else (return) end)
       RUBY
+
       message = 'unexpected void value expression'
       assert_errors expression(source), source, [
         [message, 7..13],
@@ -1584,7 +1580,7 @@ def test_check_value_expression
         [message, 97..103],
         [message, 123..129],
         [message, 168..174],
-      ], compare_ripper: false # Ripper does not check 'void value expression'.
+      ]
     end
 
     def test_void_value_expression_in_statement
@@ -1607,6 +1603,7 @@ class << (return)
         for x in (return)
         end
       RUBY
+
       message = 'unexpected void value expression'
       assert_errors expression(source), source, [
         [message, 4..10],
@@ -1617,7 +1614,7 @@ class << (return)
         [message, 110..116],
         [message, 132..138],
         [message, 154..160],
-      ], compare_ripper: false # Ripper does not check 'void value expression'.
+      ]
     end
 
     def test_void_value_expression_in_def
@@ -1629,12 +1626,13 @@ def x(a = return)
         def x(a: return)
         end
       RUBY
+
       message = 'unexpected void value expression'
       assert_errors expression(source), source, [
         [message, 5..11],
         [message, 29..35],
         [message, 50..56],
-      ], compare_ripper: false # Ripper does not check 'void value expression'.
+      ]
     end
 
     def test_void_value_expression_in_assignment
@@ -1644,13 +1642,14 @@ def test_void_value_expression_in_assignment
         a, b = return, 1
         a, b = 1, *return
       RUBY
+
       message = 'unexpected void value expression'
       assert_errors expression(source), source, [
         [message, 4..10],
         [message, 18..24],
         [message, 32..38],
         [message, 53..59],
-      ], compare_ripper: false # Ripper does not check 'void value expression'.
+      ]
     end
 
     def test_void_value_expression_in_modifier
@@ -1662,6 +1661,7 @@ def test_void_value_expression_in_modifier
         (return) => a
         (return) in a
       RUBY
+
       message = 'unexpected void value expression'
       assert_errors expression(source), source, [
         [message, 6..12],
@@ -1670,7 +1670,7 @@ def test_void_value_expression_in_modifier
         [message, 58..64],
         [message, 67..73],
         [message, 81..87]
-      ], compare_ripper: false # Ripper does not check 'void value expression'.
+      ]
     end
 
     def test_void_value_expression_in_expression
@@ -1685,6 +1685,7 @@ def test_void_value_expression_in_expression
         ((return)..)
         ((return)...)
       RUBY
+
       message = 'unexpected void value expression'
       assert_errors expression(source), source, [
         [message, 1..7],
@@ -1696,7 +1697,7 @@ def test_void_value_expression_in_expression
         [message, 85..91],
         [message, 96..102],
         [message, 109..115]
-      ], compare_ripper: false # Ripper does not check 'void value expression'.
+      ]
     end
 
     def test_void_value_expression_in_array
@@ -1709,6 +1710,7 @@ def test_void_value_expression_in_array
         [ *return ]
         [ **return ]
       RUBY
+
       message = 'unexpected void value expression'
       assert_errors expression(source), source, [
         [message, 1..7],
@@ -1718,7 +1720,7 @@ def test_void_value_expression_in_array
         [message, 58..64],
         [message, 70..76],
         [message, 83..89],
-      ], compare_ripper: false # Ripper does not check 'void value expression'.
+      ]
     end
 
     def test_void_value_expression_in_hash
@@ -1728,13 +1730,14 @@ def test_void_value_expression_in_hash
         { a: return }
         { **return }
       RUBY
+
       message = 'unexpected void value expression'
       assert_errors expression(source), source, [
         [message, 2..8],
         [message, 23..29],
         [message, 37..43],
         [message, 50..56],
-      ], compare_ripper: false # Ripper does not check 'void value expression'.
+      ]
     end
 
     def test_void_value_expression_in_call
@@ -1745,6 +1748,7 @@ def test_void_value_expression_in_call
         (return)[1] = 2
         (return)::foo
       RUBY
+
       message = 'unexpected void value expression'
       assert_errors expression(source), source, [
         [message, 1..7],
@@ -1752,7 +1756,7 @@ def test_void_value_expression_in_call
         [message, 27..33],
         [message, 39..45],
         [message, 55..61],
-      ], compare_ripper: false # Ripper does not check 'void value expression'.
+      ]
     end
 
     def test_void_value_expression_in_constant_path
@@ -1760,11 +1764,12 @@ def test_void_value_expression_in_constant_path
         (return)::A
         class (return)::A; end
       RUBY
+
       message = 'unexpected void value expression'
       assert_errors expression(source), source, [
         [message, 1..7],
         [message, 19..25],
-      ], compare_ripper: false # Ripper does not check 'void value expression'.
+      ]
     end
 
     def test_void_value_expression_in_arguments
@@ -1778,6 +1783,7 @@ def test_void_value_expression_in_arguments
         foo(:a => return)
         foo(a: return)
       RUBY
+
       message = 'unexpected void value expression'
       assert_errors expression(source), source, [
         [message, 4..10],
@@ -1788,7 +1794,7 @@ def test_void_value_expression_in_arguments
         [message, 71..77],
         [message, 94..100],
         [message, 109..115],
-      ], compare_ripper: false # Ripper does not check 'void value expression'.
+      ]
     end
 
     def test_void_value_expression_in_unary_call
@@ -1796,11 +1802,12 @@ def test_void_value_expression_in_unary_call
         +(return)
         not return
       RUBY
+
       message = 'unexpected void value expression'
       assert_errors expression(source), source, [
         [message, 2..8],
         [message, 14..20],
-      ], compare_ripper: false # Ripper does not check 'void value expression'.
+      ]
     end
 
     def test_void_value_expression_in_binary_call
@@ -1812,13 +1819,14 @@ def test_void_value_expression_in_binary_call
         1 or (return)
         (return) or 1
       RUBY
+
       message = 'unexpected void value expression'
       assert_errors expression(source), source, [
         [message, 5..11],
         [message, 14..20],
         [message, 42..48],
         [message, 71..77],
-      ], compare_ripper: false # Ripper does not check 'void value expression'.
+      ]
     end
 
     def test_trailing_comma_in_calls
@@ -1934,13 +1942,14 @@ def foo(bar: bar) = 42
         proc { |foo = foo| }
         proc { |foo: foo| }
       RUBY
+
       message = 'parameter default value references itself'
       assert_errors expression(source), source, [
         [message, 14..17],
         [message, 37..40],
         [message, 61..64],
         [message, 81..84],
-      ], compare_ripper: false # Ripper does not check 'circular reference'.
+      ]
     end
 
     def test_command_calls
@@ -1976,8 +1985,9 @@ def a = b rescue c d
         begin; rescue a b; end
         begin; rescue a b => c; end
       RUBY
+
       sources.each do |source|
-        assert_nil Ripper.sexp_raw(source)
+        refute_valid_syntax(source)
         assert_false(Prism.parse(source).success?)
       end
     end
@@ -1993,8 +2003,9 @@ def test_range_and_bin_op
         1.. % 2
         1.. ** 2
       RUBY
+
       sources.each do |source|
-        assert_nil Ripper.sexp_raw(source)
+        refute_valid_syntax(source)
         assert_false(Prism.parse(source).success?)
       end
     end
@@ -2050,21 +2061,21 @@ def test_block_arg_and_block
       source = 'foo(&1) { }'
       assert_errors expression(source), source, [
         ['multiple block arguments; only one block is allowed', 8..11]
-      ], compare_ripper: false # Ripper does not check 'both block arg and actual block given'.
+      ]
     end
 
     def test_forwarding_arg_and_block
       source = 'def foo(...) = foo(...) { }'
       assert_errors expression(source), source, [
         ['both a block argument and a forwarding argument; only one block is allowed', 24..27]
-      ], compare_ripper: false # Ripper does not check 'both block arg and actual block given'.
+      ]
     end
 
     def test_it_with_ordinary_parameter
       source = "proc { || it }"
       errors = [["`it` is not allowed when an ordinary parameter is defined", 10..12]]
 
-      assert_errors expression(source), source, errors, compare_ripper: false
+      assert_errors expression(source), source, errors
     end
 
     def test_regular_expression_with_unknown_regexp_options
@@ -2124,7 +2135,7 @@ def ([1]).foo; end
         ["cannot define singleton method for literals", 380..388],
         ["cannot define singleton method for literals", 404..407]
       ]
-      assert_errors expression(source), source, errors, compare_ripper: false
+      assert_errors expression(source), source, errors
     end
 
     def test_assignment_to_literal_in_conditionals
@@ -2169,20 +2180,37 @@ def test_duplicate_pattern_capture
         () => [A => a, {a: b => a}]
       RUBY
 
-      assert_error_messages source, Array.new(source.lines.length, "duplicated variable name"), compare_ripper: false
+      assert_error_messages source, Array.new(source.lines.length, "duplicated variable name")
     end
 
     def test_duplicate_pattern_hash_key
       assert_error_messages "() => {a:, a:}", ["duplicated key name", "duplicated variable name"]
       assert_error_messages "() => {a:1, a:2}", ["duplicated key name"]
-      refute_error_messages "() => [{a:1}, {a:2}]"
+      refute_error_messages "case (); in [{a:1}, {a:2}]; end"
     end
 
     private
 
-    def assert_errors(expected, source, errors, compare_ripper: RUBY_ENGINE == "ruby")
-      # Ripper behaves differently on JRuby/TruffleRuby, so only check this on CRuby
-      assert_nil Ripper.sexp_raw(source) if compare_ripper
+    def check_syntax(source)
+      $VERBOSE, previous = nil, $VERBOSE
+
+      begin
+        RubyVM::InstructionSequence.compile(source)
+      ensure
+        $VERBOSE = previous
+      end
+    end
+
+    def assert_valid_syntax(source)
+      check_syntax(source)
+    end
+
+    def refute_valid_syntax(source)
+      assert_raise(SyntaxError) { check_syntax(source) }
+    end
+
+    def assert_errors(expected, source, errors)
+      refute_valid_syntax(source) if RUBY_ENGINE == "ruby"
 
       result = Prism.parse(source)
       node = result.value.statements.body.last
@@ -2191,14 +2219,14 @@ def assert_errors(expected, source, errors, compare_ripper: RUBY_ENGINE == "ruby
       assert_equal(errors, result.errors.map { |e| [e.message, e.location.start_offset..e.location.end_offset] })
     end
 
-    def assert_error_messages(source, errors, compare_ripper: RUBY_ENGINE == "ruby")
-      assert_nil Ripper.sexp_raw(source) if compare_ripper
+    def assert_error_messages(source, errors)
+      refute_valid_syntax(source) if RUBY_ENGINE == "ruby"
       result = Prism.parse(source)
       assert_equal(errors, result.errors.map(&:message))
     end
 
-    def refute_error_messages(source, compare_ripper: RUBY_ENGINE == "ruby")
-      refute_nil Ripper.sexp_raw(source) if compare_ripper
+    def refute_error_messages(source)
+      assert_valid_syntax(source) if RUBY_ENGINE == "ruby"
       assert Prism.parse_success?(source)
     end
 

From 0d5a6d936a8387aa83e1bd68c19dd2ab7e55fcd7 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 1 Apr 2024 13:57:38 -0400
Subject: [PATCH 4/4] Do not track locals starting with _

---
 src/prism.c               |  7 +++++
 test/prism/errors_test.rb | 63 ++++++++++++++++++++++-----------------
 2 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/src/prism.c b/src/prism.c
index 15c7e4568d5..2a288cae934 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -14605,6 +14605,9 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
  */
 static void
 parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
+    // Skip this capture if it starts with an underscore.
+    if (*location->start == '_') return;
+
     if (pm_constant_id_list_includes(captures, capture)) {
         pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
     } else {
@@ -14828,6 +14831,10 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
     return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
 }
 
+/**
+ * Add a node to the list of keys for a hash pattern, and if it is a duplicate
+ * then add an error to the parser.
+ */
 static void
 parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
     if (pm_static_literals_add(parser, keys, node) != NULL) {
diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb
index 856e46fb769..cfe4ea41bec 100644
--- a/test/prism/errors_test.rb
+++ b/test/prism/errors_test.rb
@@ -2075,7 +2075,7 @@ def test_it_with_ordinary_parameter
       source = "proc { || it }"
       errors = [["`it` is not allowed when an ordinary parameter is defined", 10..12]]
 
-      assert_errors expression(source), source, errors
+      assert_errors expression(source), source, errors, check_valid_syntax: RUBY_VERSION >= "3.4.0"
     end
 
     def test_regular_expression_with_unknown_regexp_options
@@ -2170,47 +2170,56 @@ def test_assignment_to_literal_in_conditionals
 
     def test_duplicate_pattern_capture
       source = <<~RUBY
-        () => [a, a]
-        () => [a, *a]
-        () => {a: a, b: a}
-        () => {a: a, **a}
-        () => [a, {a:}]
-        () => [a, {a: {a: {a: [a]}}}]
-        () => a => a
-        () => [A => a, {a: b => a}]
+        case (); in [a, a]; end
+        case (); in [a, *a]; end
+        case (); in {a: a, b: a}; end
+        case (); in {a: a, **a}; end
+        case (); in [a, {a:}]; end
+        case (); in [a, {a: {a: {a: [a]}}}]; end
+        case (); in a => a; end
+        case (); in [A => a, {a: b => a}]; end
       RUBY
 
       assert_error_messages source, Array.new(source.lines.length, "duplicated variable name")
+      refute_error_messages "case (); in [_a, _a]; end"
     end
 
     def test_duplicate_pattern_hash_key
-      assert_error_messages "() => {a:, a:}", ["duplicated key name", "duplicated variable name"]
-      assert_error_messages "() => {a:1, a:2}", ["duplicated key name"]
+      assert_error_messages "case (); in {a:, a:}; end", ["duplicated key name", "duplicated variable name"]
+      assert_error_messages "case (); in {a:1, a:2}; end", ["duplicated key name"]
       refute_error_messages "case (); in [{a:1}, {a:2}]; end"
     end
 
     private
 
-    def check_syntax(source)
-      $VERBOSE, previous = nil, $VERBOSE
+    if RUBY_ENGINE == "ruby"
+      def check_syntax(source)
+        $VERBOSE, previous = nil, $VERBOSE
 
-      begin
-        RubyVM::InstructionSequence.compile(source)
-      ensure
-        $VERBOSE = previous
+        begin
+          RubyVM::InstructionSequence.compile(source)
+        ensure
+          $VERBOSE = previous
+        end
       end
-    end
 
-    def assert_valid_syntax(source)
-      check_syntax(source)
-    end
+      def assert_valid_syntax(source)
+        check_syntax(source)
+      end
 
-    def refute_valid_syntax(source)
-      assert_raise(SyntaxError) { check_syntax(source) }
+      def refute_valid_syntax(source)
+        assert_raise(SyntaxError) { check_syntax(source) }
+      end
+    else
+      def assert_valid_syntax(source)
+      end
+
+      def refute_valid_syntax(source)
+      end
     end
 
-    def assert_errors(expected, source, errors)
-      refute_valid_syntax(source) if RUBY_ENGINE == "ruby"
+    def assert_errors(expected, source, errors, check_valid_syntax: true)
+      refute_valid_syntax(source) if check_valid_syntax
 
       result = Prism.parse(source)
       node = result.value.statements.body.last
@@ -2220,13 +2229,13 @@ def assert_errors(expected, source, errors)
     end
 
     def assert_error_messages(source, errors)
-      refute_valid_syntax(source) if RUBY_ENGINE == "ruby"
+      refute_valid_syntax(source)
       result = Prism.parse(source)
       assert_equal(errors, result.errors.map(&:message))
     end
 
     def refute_error_messages(source)
-      assert_valid_syntax(source) if RUBY_ENGINE == "ruby"
+      assert_valid_syntax(source)
       assert Prism.parse_success?(source)
     end