From d0dbf01bef34c2bc803ae3466e985ba51c4d8072 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 4 Mar 2024 12:34:38 -0500 Subject: [PATCH] Warnings for tokens at EOL --- include/prism/diagnostic.h | 2 + src/diagnostic.c | 4 +- src/prism.c | 54 ++++++++++++++++++-- test/prism/warnings_test.rb | 98 +++++++++++++++++++++++++++++++++++++ 4 files changed, 154 insertions(+), 4 deletions(-) create mode 100644 test/prism/warnings_test.rb diff --git a/include/prism/diagnostic.h b/include/prism/diagnostic.h index 9dadbbcc26e..ba78be0f013 100644 --- a/include/prism/diagnostic.h +++ b/include/prism/diagnostic.h @@ -304,12 +304,14 @@ typedef enum { PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS, PM_WARN_AMBIGUOUS_PREFIX_STAR, PM_WARN_AMBIGUOUS_SLASH, + PM_WARN_DOT_DOT_DOT_EOL, PM_WARN_EQUAL_IN_CONDITIONAL, PM_WARN_END_IN_METHOD, PM_WARN_DUPLICATED_HASH_KEY, PM_WARN_DUPLICATED_WHEN_CLAUSE, PM_WARN_FLOAT_OUT_OF_RANGE, PM_WARN_INTEGER_IN_FLIP_FLOP, + PM_WARN_KEYWORD_EOL, // This is the number of diagnostic codes. PM_DIAGNOSTIC_ID_LEN, diff --git a/src/diagnostic.c b/src/diagnostic.c index 09ac9ee3306..3c43363fc50 100644 --- a/src/diagnostic.c +++ b/src/diagnostic.c @@ -306,12 +306,14 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = { [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS] = { "ambiguous first argument; put parentheses or a space even after `+` operator", PM_WARNING_LEVEL_VERBOSE }, [PM_WARN_AMBIGUOUS_PREFIX_STAR] = { "ambiguous `*` has been interpreted as an argument prefix", PM_WARNING_LEVEL_VERBOSE }, [PM_WARN_AMBIGUOUS_SLASH] = { "ambiguous `/`; wrap regexp in parentheses or add a space after `/` operator", PM_WARNING_LEVEL_VERBOSE }, + [PM_WARN_DOT_DOT_DOT_EOL] = { "... at EOL, should be parenthesized?", PM_WARNING_LEVEL_DEFAULT }, [PM_WARN_DUPLICATED_HASH_KEY] = { "key %.*s is duplicated and overwritten on line %" PRIi32, PM_WARNING_LEVEL_DEFAULT }, [PM_WARN_DUPLICATED_WHEN_CLAUSE] = { "duplicated 'when' clause with line %" PRIi32 " is ignored", PM_WARNING_LEVEL_VERBOSE }, [PM_WARN_EQUAL_IN_CONDITIONAL] = { "found `= literal' in conditional, should be ==", PM_WARNING_LEVEL_DEFAULT }, [PM_WARN_END_IN_METHOD] = { "END in method; use at_exit", PM_WARNING_LEVEL_DEFAULT }, [PM_WARN_FLOAT_OUT_OF_RANGE] = { "Float %.*s%s out of range", PM_WARNING_LEVEL_VERBOSE }, - [PM_WARN_INTEGER_IN_FLIP_FLOP] = { "integer literal in flip-flop", PM_WARNING_LEVEL_DEFAULT } + [PM_WARN_INTEGER_IN_FLIP_FLOP] = { "integer literal in flip-flop", PM_WARNING_LEVEL_DEFAULT }, + [PM_WARN_KEYWORD_EOL] = { "`%.*s` at the end of line without an expression", PM_WARNING_LEVEL_VERBOSE } }; static inline const char * diff --git a/src/prism.c b/src/prism.c index 1c62cd0c313..19b22ba196c 100644 --- a/src/prism.c +++ b/src/prism.c @@ -593,6 +593,26 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id pm_parser_warn(parser, node->location.start, node->location.end, diag_id); } +/** + * Append a warning to the list of warnings on the parser using a format string. + */ +#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \ + pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__) + +/** + * Append a warning to the list of warnings on the parser using the location of + * the given token and a format string. + */ +#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \ + PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__) + +/** + * Append a warning to the list of warnings on the parser using the location of + * the given token and a format string, and add on the content of the token. + */ +#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \ + PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start) + /******************************************************************************/ /* Node-related functions */ /******************************************************************************/ @@ -8677,6 +8697,20 @@ parser_flush_heredoc_end(pm_parser_t *parser) { parser->heredoc_end = NULL; } +/** + * Returns true if the parser has lexed the last token on the current line. +*/ +static bool +parser_end_of_line_p(const pm_parser_t *parser) { + const uint8_t *cursor = parser->current.end; + + while (cursor < parser->end && *cursor != '\n' && *cursor != '#') { + if (!pm_char_is_inline_whitespace(*cursor++)) return false; + } + + return true; +} + /** * When we're lexing certain types (strings, symbols, lists, etc.) we have * string content associated with the tokens. For example: @@ -9695,6 +9729,10 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_UDOT_DOT_DOT); } + if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) { + pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL); + } + lex_state_set(parser, PM_LEX_STATE_BEG); LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT); } @@ -13122,13 +13160,19 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) { // Parse any number of elsif clauses. This will form a linked list of if // nodes pointing to each other from the top. if (context == PM_CONTEXT_IF) { - while (accept1(parser, PM_TOKEN_KEYWORD_ELSIF)) { - pm_token_t elsif_keyword = parser->previous; + while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) { + if (parser_end_of_line_p(parser)) { + PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL); + } + + pm_token_t elsif_keyword = parser->current; + parser_lex(parser); + pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword); pm_accepts_block_stack_push(parser, true); + pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF); pm_accepts_block_stack_pop(parser); - accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword); @@ -16305,6 +16349,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous); } case PM_TOKEN_KEYWORD_IF: + if (parser_end_of_line_p(parser)) { + PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL); + } + parser_lex(parser); return parse_conditional(parser, PM_CONTEXT_IF); case PM_TOKEN_KEYWORD_UNDEF: { diff --git a/test/prism/warnings_test.rb b/test/prism/warnings_test.rb new file mode 100644 index 00000000000..cc6274553ea --- /dev/null +++ b/test/prism/warnings_test.rb @@ -0,0 +1,98 @@ +# frozen_string_literal: true + +return if RUBY_VERSION < "3.0" + +require_relative "test_helper" +require "stringio" + +module Prism + class WarningsTest < TestCase + def test_ambiguous_uminus + assert_warning("a -b", "ambiguous first argument") + end + + def test_ambiguous_uplus + assert_warning("a +b", "ambiguous first argument") + end + + def test_ambiguous_ustar + assert_warning("a *b", "argument prefix") + end + + def test_ambiguous_regexp + assert_warning("a /b/", "wrap regexp in parentheses") + end + + def test_equal_in_conditional + assert_warning("if a = 1; end", "should be ==") + end + + def test_dot_dot_dot_eol + assert_warning("foo...", "... at EOL") + assert_warning("def foo(...) = bar ...", "... at EOL") + + assert_warning("foo... #", "... at EOL") + assert_warning("foo... \t\v\f\n", "... at EOL") + + refute_warning("p foo...bar") + refute_warning("p foo... bar") + end + + def test_END_in_method + assert_warning("def foo; END {}; end", "END in method") + end + + def test_duplicated_hash_key + assert_warning("{ a: 1, a: 2 }", "duplicated and overwritten") + end + + def test_duplicated_when_clause + assert_warning("case 1; when 1, 1; end", "clause with line") + end + + def test_float_out_of_range + assert_warning("1.0e100000", "out of range") + end + + def test_integer_in_flip_flop + assert_warning("1 if 2..3.0", "integer") + end + + def test_keyword_eol + assert_warning("if\ntrue; end", "end of line") + assert_warning("if true\nelsif\nfalse; end", "end of line") + end + + private + + def assert_warning(source, message) + warnings = Prism.parse(source).warnings + + assert_equal 1, warnings.length + assert_include warnings.first.message, message + + if defined?(RubyVM::AbstractSyntaxTree) + assert_include capture_warning { RubyVM::AbstractSyntaxTree.parse(source) }, message + end + end + + def refute_warning(source) + assert_empty Prism.parse(source).warnings + + if defined?(RubyVM::AbstractSyntaxTree) + assert_empty capture_warning { RubyVM::AbstractSyntaxTree.parse(source) } + end + end + + def capture_warning + stderr, $stderr, verbose, $VERBOSE = $stderr, StringIO.new, $VERBOSE, true + + begin + yield + $stderr.string + ensure + $stderr, $VERBOSE = stderr, verbose + end + end + end +end