From aa2182f064bfc3c91da46e8f11168c4dac9cbe5d Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 1 Apr 2024 13:08:21 -0400 Subject: [PATCH 1/4] Track captures in pattern matching for duplicates --- config.yml | 1 + src/prism.c | 284 ++++++++++++++++++--------------- templates/src/diagnostic.c.erb | 1 + test/prism/errors_test.rb | 16 ++ 4 files changed, 173 insertions(+), 129 deletions(-) diff --git a/config.yml b/config.yml index edbe4b32d84..658492a488b 100644 --- a/config.yml +++ b/config.yml @@ -175,6 +175,7 @@ errors: - PARAMETER_STAR - PARAMETER_UNEXPECTED_FWD - PARAMETER_WILD_LOOSE_COMMA + - PATTERN_CAPTURE_DUPLICATE - PATTERN_EXPRESSION_AFTER_BRACKET - PATTERN_EXPRESSION_AFTER_COMMA - PATTERN_EXPRESSION_AFTER_HROCKET diff --git a/src/prism.c b/src/prism.c index 1f4b9ced5c6..6d46e06c974 100644 --- a/src/prism.c +++ b/src/prism.c @@ -4939,7 +4939,8 @@ pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const ui * name and depth. */ static pm_local_variable_target_node_t * -pm_local_variable_target_node_create_values(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) { +pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) { + pm_refute_numbered_parameter(parser, location->start, location->end); pm_local_variable_target_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_target_node_t); *node = (pm_local_variable_target_node_t) { @@ -4954,36 +4955,6 @@ pm_local_variable_target_node_create_values(pm_parser_t *parser, const pm_locati return node; } -/** - * Allocate and initialize a new LocalVariableTargetNode node. - */ -static pm_local_variable_target_node_t * -pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) { - pm_refute_numbered_parameter(parser, name->start, name->end); - - return pm_local_variable_target_node_create_values( - parser, - &(pm_location_t) { .start = name->start, .end = name->end }, - pm_parser_constant_id_token(parser, name), - 0 - ); -} - -/** - * Allocate and initialize a new LocalVariableTargetNode node with the given depth. - */ -static pm_local_variable_target_node_t * -pm_local_variable_target_node_create_depth(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) { - pm_refute_numbered_parameter(parser, name->start, name->end); - - return pm_local_variable_target_node_create_values( - parser, - &(pm_location_t) { .start = name->start, .end = name->end }, - pm_parser_constant_id_token(parser, name), - depth - ); -} - /** * Allocate and initialize a new MatchPredicateNode node. */ @@ -7086,9 +7057,9 @@ pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const ui /** * Add a local variable from a token to the current scope. */ -static inline void +static inline pm_constant_id_t pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token) { - pm_parser_local_add_location(parser, token->start, token->end); + return pm_parser_local_add_location(parser, token->start, token->end); } /** @@ -14625,13 +14596,27 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w } static pm_node_t * -parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id); +parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id); + +/** + * Add the newly created local to the list of captures for this pattern matching + * expression. If it is duplicated from a previous local, then we'll need to add + * an error to the parser. + */ +static void +parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) { + if (pm_constant_id_list_includes(captures, capture)) { + pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE); + } else { + pm_constant_id_list_append(captures, capture); + } +} /** * Accept any number of constants joined by :: delimiters. */ static pm_node_t * -parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) { +parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node) { // Now, if there are any :: operators that follow, parse them as constant // path nodes. while (accept1(parser, PM_TOKEN_COLON_COLON)) { @@ -14639,7 +14624,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) { expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous); - node = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child); + node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, child); } // If there is a [ or ( that follows, then this is part of a larger pattern @@ -14658,7 +14643,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) { accept1(parser, PM_TOKEN_NEWLINE); if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { - inner = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET); + inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET); accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET); } @@ -14670,7 +14655,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) { accept1(parser, PM_TOKEN_NEWLINE); if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - inner = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN); + inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN); accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN); } @@ -14753,18 +14738,30 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) { * Parse a rest pattern. */ static pm_splat_node_t * -parse_pattern_rest(pm_parser_t *parser) { +parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) { assert(parser->previous.type == PM_TOKEN_USTAR); pm_token_t operator = parser->previous; pm_node_t *name = NULL; // Rest patterns don't necessarily have a name associated with them. So we - // will check for that here. If they do, then we'll add it to the local table - // since this pattern will cause it to become a local variable. + // will check for that here. If they do, then we'll add it to the local + // table since this pattern will cause it to become a local variable. if (accept1(parser, PM_TOKEN_IDENTIFIER)) { pm_token_t identifier = parser->previous; - pm_parser_local_add_token(parser, &identifier); - name = (pm_node_t *) pm_local_variable_target_node_create(parser, &identifier); + pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier); + + int depth; + if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) { + pm_parser_local_add(parser, constant_id); + } + + parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier)); + name = (pm_node_t *) pm_local_variable_target_node_create( + parser, + &PM_LOCATION_TOKEN_VALUE(&identifier), + constant_id, + (uint32_t) (depth == -1 ? 0 : depth) + ); } // Finally we can return the created node. @@ -14775,7 +14772,7 @@ parse_pattern_rest(pm_parser_t *parser) { * Parse a keyword rest node. */ static pm_node_t * -parse_pattern_keyword_rest(pm_parser_t *parser) { +parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) { assert(parser->current.type == PM_TOKEN_USTAR_STAR); parser_lex(parser); @@ -14787,8 +14784,20 @@ parse_pattern_keyword_rest(pm_parser_t *parser) { } if (accept1(parser, PM_TOKEN_IDENTIFIER)) { - pm_parser_local_add_token(parser, &parser->previous); - value = (pm_node_t *) pm_local_variable_target_node_create(parser, &parser->previous); + pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous); + + int depth; + if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) { + pm_parser_local_add(parser, constant_id); + } + + parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); + value = (pm_node_t *) pm_local_variable_target_node_create( + parser, + &PM_LOCATION_TOKEN_VALUE(&parser->previous), + constant_id, + (uint32_t) (depth == -1 ? 0 : depth) + ); } return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator); @@ -14799,21 +14808,23 @@ parse_pattern_keyword_rest(pm_parser_t *parser) { * value. This will use an implicit local variable target. */ static pm_node_t * -parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_symbol_node_t *key) { +parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) { const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc; - pm_constant_id_t name = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end); - - int current_depth = pm_parser_local_depth_constant_id(parser, name); - uint32_t depth; + pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end); - if (current_depth == -1) { - pm_parser_local_add_location(parser, value_loc->start, value_loc->end); - depth = 0; - } else { - depth = (uint32_t) current_depth; + int depth; + if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) { + pm_parser_local_add(parser, constant_id); } - pm_local_variable_target_node_t *target = pm_local_variable_target_node_create_values(parser, value_loc, name, depth); + parse_pattern_capture(parser, captures, constant_id, value_loc); + pm_local_variable_target_node_t *target = pm_local_variable_target_node_create( + parser, + value_loc, + constant_id, + (uint32_t) (depth == -1 ? 0 : depth) + ); + return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target); } @@ -14821,7 +14832,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_symbol_node_t *key) { * Parse a hash pattern. */ static pm_hash_pattern_node_t * -parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) { +parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node) { pm_node_list_t assocs = { 0 }; pm_node_t *rest = NULL; @@ -14834,14 +14845,14 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) { if (pm_symbol_node_label_p(first_node)) { pm_node_t *value; - if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - // Here we have a value for the first assoc in the list, so - // we will parse it now. - value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY); - } else { + if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { // Otherwise, we will create an implicit local variable // target for the value. - value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) first_node); + value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node); + } else { + // Here we have a value for the first assoc in the list, so + // we will parse it now. + value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY); } pm_token_t operator = not_provided(parser); @@ -14875,7 +14886,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) { } if (match1(parser, PM_TOKEN_USTAR_STAR)) { - pm_node_t *assoc = parse_pattern_keyword_rest(parser); + pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures); if (rest == NULL) { rest = assoc; @@ -14888,12 +14899,10 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) { pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous); pm_node_t *value = NULL; - if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY); + if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { + value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key); } else { - const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc; - pm_parser_local_add_location(parser, value_loc->start, value_loc->end); - value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) key); + value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY); } pm_token_t operator = not_provided(parser); @@ -14917,18 +14926,25 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) { * Parse a pattern expression primitive. */ static pm_node_t * -parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { +parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id) { switch (parser->current.type) { case PM_TOKEN_IDENTIFIER: case PM_TOKEN_METHOD_NAME: { parser_lex(parser); - pm_token_t name = parser->previous; - int depth = pm_parser_local_depth(parser, &name); - if (depth < 0) { - depth = 0; - pm_parser_local_add_token(parser, &name); + pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous); + + int depth; + if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) { + pm_parser_local_add(parser, constant_id); } - return (pm_node_t *) pm_local_variable_target_node_create_depth(parser, &name, (uint32_t) depth); + + parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); + return (pm_node_t *) pm_local_variable_target_node_create( + parser, + &PM_LOCATION_TOKEN_VALUE(&parser->previous), + constant_id, + (uint32_t) (depth == -1 ? 0 : depth) + ); } case PM_TOKEN_BRACKET_LEFT_ARRAY: { pm_token_t opening = parser->current; @@ -14937,15 +14953,14 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { // If we have an empty array pattern, then we'll just return a new // array pattern node. - return (pm_node_t *)pm_array_pattern_node_empty_create(parser, &opening, &parser->previous); + return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous); } // Otherwise, we'll parse the inner pattern, then deal with it depending // on the type it returns. - pm_node_t *inner = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET); + pm_node_t *inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET); accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET); pm_token_t closing = parser->previous; @@ -15007,7 +15022,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous); break; case PM_TOKEN_USTAR_STAR: - first_node = parse_pattern_keyword_rest(parser); + first_node = parse_pattern_keyword_rest(parser, captures); break; case PM_TOKEN_STRING_BEGIN: first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY); @@ -15021,7 +15036,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { } } - node = parse_pattern_hash(parser, first_node); + node = parse_pattern_hash(parser, captures, first_node); accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE); @@ -15168,14 +15183,14 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous); pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, child); - return parse_pattern_constant_path(parser, (pm_node_t *)node); + return parse_pattern_constant_path(parser, captures, (pm_node_t *) node); } case PM_TOKEN_CONSTANT: { pm_token_t constant = parser->current; parser_lex(parser); pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant); - return parse_pattern_constant_path(parser, node); + return parse_pattern_constant_path(parser, captures, node); } default: pm_parser_err_current(parser, diag_id); @@ -15188,7 +15203,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { * assignment. */ static pm_node_t * -parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { +parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id) { pm_node_t *node = NULL; do { @@ -15205,9 +15220,9 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { case PM_TOKEN_UDOT_DOT_DOT: case PM_CASE_PRIMITIVE: { if (node == NULL) { - node = parse_pattern_primitive(parser, diag_id); + node = parse_pattern_primitive(parser, captures, diag_id); } else { - pm_node_t *right = parse_pattern_primitive(parser, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE); + pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE); node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator); } @@ -15217,7 +15232,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { pm_token_t opening = parser->current; parser_lex(parser); - pm_node_t *body = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN); + pm_node_t *body = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN); accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN); pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous); @@ -15249,16 +15264,23 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { // In this case we should create an assignment node. while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) { pm_token_t operator = parser->previous; - expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET); - pm_token_t identifier = parser->previous; - int depth = pm_parser_local_depth(parser, &identifier); - if (depth < 0) { - depth = 0; - pm_parser_local_add_token(parser, &identifier); + + pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous); + int depth; + + if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) { + pm_parser_local_add(parser, constant_id); } - pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_depth(parser, &identifier, (uint32_t) depth); + parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); + pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create( + parser, + &PM_LOCATION_TOKEN_VALUE(&parser->previous), + constant_id, + (uint32_t) (depth == -1 ? 0 : depth) + ); + node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator); } @@ -15269,7 +15291,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { * Parse a pattern matching expression. */ static pm_node_t * -parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id) { +parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id) { pm_node_t *node = NULL; bool leading_rest = false; @@ -15279,30 +15301,30 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id) case PM_TOKEN_LABEL: { parser_lex(parser); pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous); - return (pm_node_t *) parse_pattern_hash(parser, key); + return (pm_node_t *) parse_pattern_hash(parser, captures, key); } case PM_TOKEN_USTAR_STAR: { - node = parse_pattern_keyword_rest(parser); - return (pm_node_t *) parse_pattern_hash(parser, node); + node = parse_pattern_keyword_rest(parser, captures); + return (pm_node_t *) parse_pattern_hash(parser, captures, node); } case PM_TOKEN_USTAR: { if (top_pattern) { parser_lex(parser); - node = (pm_node_t *) parse_pattern_rest(parser); + node = (pm_node_t *) parse_pattern_rest(parser, captures); leading_rest = true; break; } } /* fallthrough */ default: - node = parse_pattern_primitives(parser, diag_id); + node = parse_pattern_primitives(parser, captures, diag_id); break; } // If we got a dynamic label symbol, then we need to treat it like the // beginning of a hash pattern. if (pm_symbol_node_label_p(node)) { - return (pm_node_t *) parse_pattern_hash(parser, node); + return (pm_node_t *) parse_pattern_hash(parser, captures, node); } if (top_pattern && match1(parser, PM_TOKEN_COMMA)) { @@ -15322,7 +15344,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id) } if (accept1(parser, PM_TOKEN_USTAR)) { - node = (pm_node_t *) parse_pattern_rest(parser); + node = (pm_node_t *) parse_pattern_rest(parser, captures); // If we have already parsed a splat pattern, then this is an error. We // will continue to parse the rest of the patterns, but we will indicate @@ -15333,7 +15355,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id) trailing_rest = true; } else { - node = parse_pattern_primitives(parser, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA); + node = parse_pattern_primitives(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA); } pm_node_list_append(&nodes, node); @@ -16300,8 +16322,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous); } - // At this point we can create a case node, though we don't yet know if it - // is a case-in or case-when node. + // At this point we can create a case node, though we don't yet know + // if it is a case-in or case-when node. pm_token_t end_keyword = not_provided(parser); pm_node_t *node; @@ -16379,8 +16401,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE); } - // At this point we expect that we're parsing a case-in node. We will - // continue to parse the in nodes until we hit the end of the list. + // At this point we expect that we're parsing a case-in node. We + // will continue to parse the in nodes until we hit the end of + // the list. while (match1(parser, PM_TOKEN_KEYWORD_IN)) { bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; parser->pattern_matching_newlines = true; @@ -16390,11 +16413,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_lex(parser); pm_token_t in_keyword = parser->previous; - pm_node_t *pattern = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN); + + pm_constant_id_list_t captures = { 0 }; + pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN); + parser->pattern_matching_newlines = previous_pattern_matching_newlines; + pm_constant_id_list_free(&captures); - // Since we're in the top-level of the case-in node we need to check - // for guard clauses in the form of `if` or `unless` statements. + // Since we're in the top-level of the case-in node we need + // to check for guard clauses in the form of `if` or + // `unless` statements. if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) { pm_token_t keyword = parser->previous; pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_IF_PREDICATE); @@ -16405,9 +16433,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate); } - // Now we need to check for the terminator of the in node's pattern. - // It can be a newline or semicolon optionally followed by a `then` - // keyword. + // Now we need to check for the terminator of the in node's + // pattern. It can be a newline or semicolon optionally + // followed by a `then` keyword. pm_token_t then_keyword; if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { @@ -16420,8 +16448,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b then_keyword = parser->previous; } - // Now we can actually parse the statements associated with the in - // node. + // Now we can actually parse the statements associated with + // the in node. pm_statements_node_t *statements; if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { statements = NULL; @@ -16429,8 +16457,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b statements = parse_statements(parser, PM_CONTEXT_CASE_IN); } - // Now that we have the full pattern and statements, we can create the - // node and attach it to the case node. + // Now that we have the full pattern and statements, we can + // create the node and attach it to the case node. pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword); pm_case_match_node_condition_append(case_node, condition); } @@ -18152,7 +18180,6 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * // copy the names directly. The pointers will line up. location = (pm_location_t) { .start = source, .end = source + length }; name = pm_parser_constant_id_location(parser, location.start, location.end); - pm_refute_numbered_parameter(parser, source, source + length); } else { // Otherwise, the name is a slice of the malloc-ed owned string, // in which case we need to copy it out into a new string. @@ -18163,11 +18190,6 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * memcpy(memory, source, length); name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length); - - if (pm_token_is_numbered_parameter(source, source + length)) { - const pm_location_t *location = &call->receiver->location; - PM_PARSER_ERR_LOCATION_FORMAT(parser, location, PM_ERR_PARAMETER_NUMBERED_RESERVED, location->start); - } } if (name != 0) { @@ -18191,7 +18213,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * // Next, create the local variable target and add it to the // list of targets for the match. - pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_values(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth); + pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth); pm_node_list_append(&match->targets, target); } } @@ -18963,11 +18985,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t pm_token_t operator = parser->current; parser->command_start = false; lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL); - parser_lex(parser); - pm_node_t *pattern = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN); + pm_constant_id_list_t captures = { 0 }; + pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN); + parser->pattern_matching_newlines = previous_pattern_matching_newlines; + pm_constant_id_list_free(&captures); return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator); } @@ -18978,11 +19002,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t pm_token_t operator = parser->current; parser->command_start = false; lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL); - parser_lex(parser); - pm_node_t *pattern = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET); + pm_constant_id_list_t captures = { 0 }; + pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET); + parser->pattern_matching_newlines = previous_pattern_matching_newlines; + pm_constant_id_list_free(&captures); return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator); } diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index b608c44f01a..b403eeaa8c8 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -258,6 +258,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_PARAMETER_STAR] = { "unexpected parameter `*`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PARAMETER_UNEXPECTED_FWD] = { "unexpected `...` in parameters", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PARAMETER_WILD_LOOSE_COMMA] = { "unexpected `,` in parameters", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_PATTERN_CAPTURE_DUPLICATE] = { "duplicated variable name", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET] = { "expected a pattern expression after the `[` operator", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA] = { "expected a pattern expression after `,`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET] = { "expected a pattern expression after `=>`", PM_ERROR_LEVEL_SYNTAX }, diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index 6cc71f96471..b3ae1c8ec59 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -2157,6 +2157,22 @@ def test_assignment_to_literal_in_conditionals ] * source.lines.count end + def test_duplicate_pattern_capture + source = <<~RUBY + () => [a, a] + () => [a, *a] + () => {a:, a:} + () => {a: a, a: a} + () => {a: a, **a} + () => [a, {a:}] + () => [a, {a: {a: {a: [a]}}}] + () => a => a + () => [A => a, {a: b => a}] + RUBY + + assert_error_messages source, Array.new(source.lines.length, "duplicated variable name"), compare_ripper: false + end + private def assert_errors(expected, source, errors, compare_ripper: RUBY_ENGINE == "ruby") From 71ea82f299db9df767674573e6980e34c6ed1473 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 1 Apr 2024 13:33:16 -0400 Subject: [PATCH 2/4] Track duplicate hash keys for pattern matching --- config.yml | 1 + src/prism.c | 12 ++++++++++++ templates/src/diagnostic.c.erb | 1 + test/prism/errors_test.rb | 14 ++++++++++++-- 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/config.yml b/config.yml index 658492a488b..34cd388f485 100644 --- a/config.yml +++ b/config.yml @@ -187,6 +187,7 @@ errors: - PATTERN_EXPRESSION_AFTER_RANGE - PATTERN_EXPRESSION_AFTER_REST - PATTERN_HASH_KEY + - PATTERN_HASH_KEY_DUPLICATE - PATTERN_HASH_KEY_LABEL - PATTERN_IDENT_AFTER_HROCKET - PATTERN_LABEL_AFTER_COMMA diff --git a/src/prism.c b/src/prism.c index 6d46e06c974..15c7e4568d5 100644 --- a/src/prism.c +++ b/src/prism.c @@ -14828,12 +14828,20 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target); } +static void +parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) { + if (pm_static_literals_add(parser, keys, node) != NULL) { + pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE); + } +} + /** * Parse a hash pattern. */ static pm_hash_pattern_node_t * parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node) { pm_node_list_t assocs = { 0 }; + pm_static_literals_t keys = { 0 }; pm_node_t *rest = NULL; switch (PM_NODE_TYPE(first_node)) { @@ -14843,6 +14851,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node break; case PM_SYMBOL_NODE: { if (pm_symbol_node_label_p(first_node)) { + parse_pattern_hash_key(parser, &keys, first_node); pm_node_t *value; if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { @@ -14897,6 +14906,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node } else { expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA); pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous); + + parse_pattern_hash_key(parser, &keys, key); pm_node_t *value = NULL; if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { @@ -14919,6 +14930,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest); xfree(assocs.nodes); + pm_static_literals_free(&keys); return node; } diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index b403eeaa8c8..301f98134f5 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -270,6 +270,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE] = { "expected a pattern expression after the range operator", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PATTERN_EXPRESSION_AFTER_REST] = { "unexpected pattern expression after the `**` expression", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PATTERN_HASH_KEY] = { "expected a key in the hash pattern", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_PATTERN_HASH_KEY_DUPLICATE] = { "duplicated key name", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PATTERN_HASH_KEY_LABEL] = { "expected a label as the key in the hash pattern", PM_ERROR_LEVEL_SYNTAX }, // TODO // THIS // AND // ABOVE // IS WEIRD [PM_ERR_PATTERN_IDENT_AFTER_HROCKET] = { "expected an identifier after the `=>` operator", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PATTERN_LABEL_AFTER_COMMA] = { "expected a label after the `,` in the hash pattern", PM_ERROR_LEVEL_SYNTAX }, diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index b3ae1c8ec59..d9dedc88e1e 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -2161,8 +2161,7 @@ def test_duplicate_pattern_capture source = <<~RUBY () => [a, a] () => [a, *a] - () => {a:, a:} - () => {a: a, a: a} + () => {a: a, b: a} () => {a: a, **a} () => [a, {a:}] () => [a, {a: {a: {a: [a]}}}] @@ -2173,6 +2172,12 @@ def test_duplicate_pattern_capture assert_error_messages source, Array.new(source.lines.length, "duplicated variable name"), compare_ripper: false end + def test_duplicate_pattern_hash_key + assert_error_messages "() => {a:, a:}", ["duplicated key name", "duplicated variable name"] + assert_error_messages "() => {a:1, a:2}", ["duplicated key name"] + refute_error_messages "() => [{a:1}, {a:2}]" + end + private def assert_errors(expected, source, errors, compare_ripper: RUBY_ENGINE == "ruby") @@ -2192,6 +2197,11 @@ def assert_error_messages(source, errors, compare_ripper: RUBY_ENGINE == "ruby") assert_equal(errors, result.errors.map(&:message)) end + def refute_error_messages(source, compare_ripper: RUBY_ENGINE == "ruby") + refute_nil Ripper.sexp_raw(source) if compare_ripper + assert Prism.parse_success?(source) + end + def assert_warning_messages(source, warnings) result = Prism.parse(source) assert_equal(warnings, result.warnings.map(&:message)) From ddec1c163d6ccc38c08877d4b41fc18d52a241ab Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 1 Apr 2024 13:48:09 -0400 Subject: [PATCH 3/4] Use RubyVM::InstructionSequence instead of Ripper for validity check --- test/prism/errors_test.rb | 138 +++++++++++++++++++++++--------------- 1 file changed, 83 insertions(+), 55 deletions(-) diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index d9dedc88e1e..856e46fb769 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -1130,28 +1130,24 @@ def test_dont_allow_setting_to_back_and_nth_reference end def test_duplicated_parameter_names - # For some reason, Ripper reports no error for Ruby 3.0 when you have - # duplicated parameter names for positional parameters. - unless RUBY_VERSION < "3.1.0" - expected = DefNode( - :foo, - Location(), - nil, - ParametersNode([RequiredParameterNode(0, :a), RequiredParameterNode(0, :b), RequiredParameterNode(ParameterFlags::REPEATED_PARAMETER, :a)], [], nil, [], [], nil, nil), - nil, - [:a, :b], - Location(), - nil, - Location(), - Location(), - nil, - Location() - ) + expected = DefNode( + :foo, + Location(), + nil, + ParametersNode([RequiredParameterNode(0, :a), RequiredParameterNode(0, :b), RequiredParameterNode(ParameterFlags::REPEATED_PARAMETER, :a)], [], nil, [], [], nil, nil), + nil, + [:a, :b], + Location(), + nil, + Location(), + Location(), + nil, + Location() + ) - assert_errors expected, "def foo(a,b,a);end", [ - ["duplicated argument name", 12..13] - ] - end + assert_errors expected, "def foo(a,b,a);end", [ + ["duplicated argument name", 12..13] + ] expected = DefNode( :foo, @@ -1370,7 +1366,7 @@ def test_double_scope_numbered_parameters source = "-> { _1 + -> { _2 } }" errors = [["numbered parameter is already used in outer scope", 15..17]] - assert_errors expression(source), source, errors, compare_ripper: false + assert_errors expression(source), source, errors end def test_invalid_number_underscores @@ -1436,7 +1432,7 @@ def test_parameter_name_ending_with_bang_or_question_mark ["unexpected name for a parameter", 8..10], ["unexpected name for a parameter", 11..13] ] - assert_errors expression(source), source, errors, compare_ripper: false + assert_errors expression(source), source, errors end def test_class_name @@ -1488,11 +1484,10 @@ def test_shadow_args_in_block def test_repeated_parameter_name_in_destructured_params source = "def f(a, (b, (a))); end" - # In Ruby 3.0.x, `Ripper.sexp_raw` does not return `nil` for this case. - compare_ripper = RUBY_ENGINE == "ruby" && (RUBY_VERSION.split('.').map { |x| x.to_i } <=> [3, 1]) >= 1 + assert_errors expression(source), source, [ ["duplicated argument name", 14..15], - ], compare_ripper: compare_ripper + ] end def test_assign_to_numbered_parameter @@ -1574,6 +1569,7 @@ def test_check_value_expression 1 => ^(if 1; (return) else (return) end) 1 => ^(unless 1; (return) else (return) end) RUBY + message = 'unexpected void value expression' assert_errors expression(source), source, [ [message, 7..13], @@ -1584,7 +1580,7 @@ def test_check_value_expression [message, 97..103], [message, 123..129], [message, 168..174], - ], compare_ripper: false # Ripper does not check 'void value expression'. + ] end def test_void_value_expression_in_statement @@ -1607,6 +1603,7 @@ class << (return) for x in (return) end RUBY + message = 'unexpected void value expression' assert_errors expression(source), source, [ [message, 4..10], @@ -1617,7 +1614,7 @@ class << (return) [message, 110..116], [message, 132..138], [message, 154..160], - ], compare_ripper: false # Ripper does not check 'void value expression'. + ] end def test_void_value_expression_in_def @@ -1629,12 +1626,13 @@ def x(a = return) def x(a: return) end RUBY + message = 'unexpected void value expression' assert_errors expression(source), source, [ [message, 5..11], [message, 29..35], [message, 50..56], - ], compare_ripper: false # Ripper does not check 'void value expression'. + ] end def test_void_value_expression_in_assignment @@ -1644,13 +1642,14 @@ def test_void_value_expression_in_assignment a, b = return, 1 a, b = 1, *return RUBY + message = 'unexpected void value expression' assert_errors expression(source), source, [ [message, 4..10], [message, 18..24], [message, 32..38], [message, 53..59], - ], compare_ripper: false # Ripper does not check 'void value expression'. + ] end def test_void_value_expression_in_modifier @@ -1662,6 +1661,7 @@ def test_void_value_expression_in_modifier (return) => a (return) in a RUBY + message = 'unexpected void value expression' assert_errors expression(source), source, [ [message, 6..12], @@ -1670,7 +1670,7 @@ def test_void_value_expression_in_modifier [message, 58..64], [message, 67..73], [message, 81..87] - ], compare_ripper: false # Ripper does not check 'void value expression'. + ] end def test_void_value_expression_in_expression @@ -1685,6 +1685,7 @@ def test_void_value_expression_in_expression ((return)..) ((return)...) RUBY + message = 'unexpected void value expression' assert_errors expression(source), source, [ [message, 1..7], @@ -1696,7 +1697,7 @@ def test_void_value_expression_in_expression [message, 85..91], [message, 96..102], [message, 109..115] - ], compare_ripper: false # Ripper does not check 'void value expression'. + ] end def test_void_value_expression_in_array @@ -1709,6 +1710,7 @@ def test_void_value_expression_in_array [ *return ] [ **return ] RUBY + message = 'unexpected void value expression' assert_errors expression(source), source, [ [message, 1..7], @@ -1718,7 +1720,7 @@ def test_void_value_expression_in_array [message, 58..64], [message, 70..76], [message, 83..89], - ], compare_ripper: false # Ripper does not check 'void value expression'. + ] end def test_void_value_expression_in_hash @@ -1728,13 +1730,14 @@ def test_void_value_expression_in_hash { a: return } { **return } RUBY + message = 'unexpected void value expression' assert_errors expression(source), source, [ [message, 2..8], [message, 23..29], [message, 37..43], [message, 50..56], - ], compare_ripper: false # Ripper does not check 'void value expression'. + ] end def test_void_value_expression_in_call @@ -1745,6 +1748,7 @@ def test_void_value_expression_in_call (return)[1] = 2 (return)::foo RUBY + message = 'unexpected void value expression' assert_errors expression(source), source, [ [message, 1..7], @@ -1752,7 +1756,7 @@ def test_void_value_expression_in_call [message, 27..33], [message, 39..45], [message, 55..61], - ], compare_ripper: false # Ripper does not check 'void value expression'. + ] end def test_void_value_expression_in_constant_path @@ -1760,11 +1764,12 @@ def test_void_value_expression_in_constant_path (return)::A class (return)::A; end RUBY + message = 'unexpected void value expression' assert_errors expression(source), source, [ [message, 1..7], [message, 19..25], - ], compare_ripper: false # Ripper does not check 'void value expression'. + ] end def test_void_value_expression_in_arguments @@ -1778,6 +1783,7 @@ def test_void_value_expression_in_arguments foo(:a => return) foo(a: return) RUBY + message = 'unexpected void value expression' assert_errors expression(source), source, [ [message, 4..10], @@ -1788,7 +1794,7 @@ def test_void_value_expression_in_arguments [message, 71..77], [message, 94..100], [message, 109..115], - ], compare_ripper: false # Ripper does not check 'void value expression'. + ] end def test_void_value_expression_in_unary_call @@ -1796,11 +1802,12 @@ def test_void_value_expression_in_unary_call +(return) not return RUBY + message = 'unexpected void value expression' assert_errors expression(source), source, [ [message, 2..8], [message, 14..20], - ], compare_ripper: false # Ripper does not check 'void value expression'. + ] end def test_void_value_expression_in_binary_call @@ -1812,13 +1819,14 @@ def test_void_value_expression_in_binary_call 1 or (return) (return) or 1 RUBY + message = 'unexpected void value expression' assert_errors expression(source), source, [ [message, 5..11], [message, 14..20], [message, 42..48], [message, 71..77], - ], compare_ripper: false # Ripper does not check 'void value expression'. + ] end def test_trailing_comma_in_calls @@ -1934,13 +1942,14 @@ def foo(bar: bar) = 42 proc { |foo = foo| } proc { |foo: foo| } RUBY + message = 'parameter default value references itself' assert_errors expression(source), source, [ [message, 14..17], [message, 37..40], [message, 61..64], [message, 81..84], - ], compare_ripper: false # Ripper does not check 'circular reference'. + ] end def test_command_calls @@ -1976,8 +1985,9 @@ def a = b rescue c d begin; rescue a b; end begin; rescue a b => c; end RUBY + sources.each do |source| - assert_nil Ripper.sexp_raw(source) + refute_valid_syntax(source) assert_false(Prism.parse(source).success?) end end @@ -1993,8 +2003,9 @@ def test_range_and_bin_op 1.. % 2 1.. ** 2 RUBY + sources.each do |source| - assert_nil Ripper.sexp_raw(source) + refute_valid_syntax(source) assert_false(Prism.parse(source).success?) end end @@ -2050,21 +2061,21 @@ def test_block_arg_and_block source = 'foo(&1) { }' assert_errors expression(source), source, [ ['multiple block arguments; only one block is allowed', 8..11] - ], compare_ripper: false # Ripper does not check 'both block arg and actual block given'. + ] end def test_forwarding_arg_and_block source = 'def foo(...) = foo(...) { }' assert_errors expression(source), source, [ ['both a block argument and a forwarding argument; only one block is allowed', 24..27] - ], compare_ripper: false # Ripper does not check 'both block arg and actual block given'. + ] end def test_it_with_ordinary_parameter source = "proc { || it }" errors = [["`it` is not allowed when an ordinary parameter is defined", 10..12]] - assert_errors expression(source), source, errors, compare_ripper: false + assert_errors expression(source), source, errors end def test_regular_expression_with_unknown_regexp_options @@ -2124,7 +2135,7 @@ def ([1]).foo; end ["cannot define singleton method for literals", 380..388], ["cannot define singleton method for literals", 404..407] ] - assert_errors expression(source), source, errors, compare_ripper: false + assert_errors expression(source), source, errors end def test_assignment_to_literal_in_conditionals @@ -2169,20 +2180,37 @@ def test_duplicate_pattern_capture () => [A => a, {a: b => a}] RUBY - assert_error_messages source, Array.new(source.lines.length, "duplicated variable name"), compare_ripper: false + assert_error_messages source, Array.new(source.lines.length, "duplicated variable name") end def test_duplicate_pattern_hash_key assert_error_messages "() => {a:, a:}", ["duplicated key name", "duplicated variable name"] assert_error_messages "() => {a:1, a:2}", ["duplicated key name"] - refute_error_messages "() => [{a:1}, {a:2}]" + refute_error_messages "case (); in [{a:1}, {a:2}]; end" end private - def assert_errors(expected, source, errors, compare_ripper: RUBY_ENGINE == "ruby") - # Ripper behaves differently on JRuby/TruffleRuby, so only check this on CRuby - assert_nil Ripper.sexp_raw(source) if compare_ripper + def check_syntax(source) + $VERBOSE, previous = nil, $VERBOSE + + begin + RubyVM::InstructionSequence.compile(source) + ensure + $VERBOSE = previous + end + end + + def assert_valid_syntax(source) + check_syntax(source) + end + + def refute_valid_syntax(source) + assert_raise(SyntaxError) { check_syntax(source) } + end + + def assert_errors(expected, source, errors) + refute_valid_syntax(source) if RUBY_ENGINE == "ruby" result = Prism.parse(source) node = result.value.statements.body.last @@ -2191,14 +2219,14 @@ def assert_errors(expected, source, errors, compare_ripper: RUBY_ENGINE == "ruby assert_equal(errors, result.errors.map { |e| [e.message, e.location.start_offset..e.location.end_offset] }) end - def assert_error_messages(source, errors, compare_ripper: RUBY_ENGINE == "ruby") - assert_nil Ripper.sexp_raw(source) if compare_ripper + def assert_error_messages(source, errors) + refute_valid_syntax(source) if RUBY_ENGINE == "ruby" result = Prism.parse(source) assert_equal(errors, result.errors.map(&:message)) end - def refute_error_messages(source, compare_ripper: RUBY_ENGINE == "ruby") - refute_nil Ripper.sexp_raw(source) if compare_ripper + def refute_error_messages(source) + assert_valid_syntax(source) if RUBY_ENGINE == "ruby" assert Prism.parse_success?(source) end From 0d5a6d936a8387aa83e1bd68c19dd2ab7e55fcd7 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 1 Apr 2024 13:57:38 -0400 Subject: [PATCH 4/4] Do not track locals starting with _ --- src/prism.c | 7 +++++ test/prism/errors_test.rb | 63 ++++++++++++++++++++++----------------- 2 files changed, 43 insertions(+), 27 deletions(-) diff --git a/src/prism.c b/src/prism.c index 15c7e4568d5..2a288cae934 100644 --- a/src/prism.c +++ b/src/prism.c @@ -14605,6 +14605,9 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat */ static void parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) { + // Skip this capture if it starts with an underscore. + if (*location->start == '_') return; + if (pm_constant_id_list_includes(captures, capture)) { pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE); } else { @@ -14828,6 +14831,10 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target); } +/** + * Add a node to the list of keys for a hash pattern, and if it is a duplicate + * then add an error to the parser. + */ static void parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) { if (pm_static_literals_add(parser, keys, node) != NULL) { diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index 856e46fb769..cfe4ea41bec 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -2075,7 +2075,7 @@ def test_it_with_ordinary_parameter source = "proc { || it }" errors = [["`it` is not allowed when an ordinary parameter is defined", 10..12]] - assert_errors expression(source), source, errors + assert_errors expression(source), source, errors, check_valid_syntax: RUBY_VERSION >= "3.4.0" end def test_regular_expression_with_unknown_regexp_options @@ -2170,47 +2170,56 @@ def test_assignment_to_literal_in_conditionals def test_duplicate_pattern_capture source = <<~RUBY - () => [a, a] - () => [a, *a] - () => {a: a, b: a} - () => {a: a, **a} - () => [a, {a:}] - () => [a, {a: {a: {a: [a]}}}] - () => a => a - () => [A => a, {a: b => a}] + case (); in [a, a]; end + case (); in [a, *a]; end + case (); in {a: a, b: a}; end + case (); in {a: a, **a}; end + case (); in [a, {a:}]; end + case (); in [a, {a: {a: {a: [a]}}}]; end + case (); in a => a; end + case (); in [A => a, {a: b => a}]; end RUBY assert_error_messages source, Array.new(source.lines.length, "duplicated variable name") + refute_error_messages "case (); in [_a, _a]; end" end def test_duplicate_pattern_hash_key - assert_error_messages "() => {a:, a:}", ["duplicated key name", "duplicated variable name"] - assert_error_messages "() => {a:1, a:2}", ["duplicated key name"] + assert_error_messages "case (); in {a:, a:}; end", ["duplicated key name", "duplicated variable name"] + assert_error_messages "case (); in {a:1, a:2}; end", ["duplicated key name"] refute_error_messages "case (); in [{a:1}, {a:2}]; end" end private - def check_syntax(source) - $VERBOSE, previous = nil, $VERBOSE + if RUBY_ENGINE == "ruby" + def check_syntax(source) + $VERBOSE, previous = nil, $VERBOSE - begin - RubyVM::InstructionSequence.compile(source) - ensure - $VERBOSE = previous + begin + RubyVM::InstructionSequence.compile(source) + ensure + $VERBOSE = previous + end end - end - def assert_valid_syntax(source) - check_syntax(source) - end + def assert_valid_syntax(source) + check_syntax(source) + end - def refute_valid_syntax(source) - assert_raise(SyntaxError) { check_syntax(source) } + def refute_valid_syntax(source) + assert_raise(SyntaxError) { check_syntax(source) } + end + else + def assert_valid_syntax(source) + end + + def refute_valid_syntax(source) + end end - def assert_errors(expected, source, errors) - refute_valid_syntax(source) if RUBY_ENGINE == "ruby" + def assert_errors(expected, source, errors, check_valid_syntax: true) + refute_valid_syntax(source) if check_valid_syntax result = Prism.parse(source) node = result.value.statements.body.last @@ -2220,13 +2229,13 @@ def assert_errors(expected, source, errors) end def assert_error_messages(source, errors) - refute_valid_syntax(source) if RUBY_ENGINE == "ruby" + refute_valid_syntax(source) result = Prism.parse(source) assert_equal(errors, result.errors.map(&:message)) end def refute_error_messages(source) - assert_valid_syntax(source) if RUBY_ENGINE == "ruby" + assert_valid_syntax(source) assert Prism.parse_success?(source) end