From 925e9a2188dcd6e1988ceaa3ab8d64fcdb3d6d1e Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Wed, 4 Mar 2020 23:37:52 +0300 Subject: [PATCH 1/6] rustc_parse: Use `Token::ident` where possible --- src/librustc_builtin_macros/format.rs | 65 ++++++++++++------------ src/librustc_expand/mbe/macro_parser.rs | 18 +++---- src/librustc_parse/parser/diagnostics.rs | 12 +++-- src/librustc_parse/parser/expr.rs | 50 +++++++++--------- src/librustc_parse/parser/item.rs | 19 +++---- src/librustc_parse/parser/mod.rs | 8 +-- src/librustc_parse/parser/path.rs | 6 +-- 7 files changed, 86 insertions(+), 92 deletions(-) diff --git a/src/librustc_builtin_macros/format.rs b/src/librustc_builtin_macros/format.rs index 1baec5eafe687..2883159a9f34c 100644 --- a/src/librustc_builtin_macros/format.rs +++ b/src/librustc_builtin_macros/format.rs @@ -156,44 +156,43 @@ fn parse_args<'a>( if p.token == token::Eof { break; } // accept trailing commas - if p.token.is_ident() && p.look_ahead(1, |t| *t == token::Eq) { - named = true; - let name = if let token::Ident(name, _) = p.normalized_token.kind { + match p.token.ident() { + Some((ident, _)) if p.look_ahead(1, |t| *t == token::Eq) => { + named = true; p.bump(); - name - } else { - unreachable!(); - }; + p.expect(&token::Eq)?; + let e = p.parse_expr()?; + if let Some(prev) = names.get(&ident.name) { + ecx.struct_span_err(e.span, &format!("duplicate argument named `{}`", ident)) + .span_label(args[*prev].span, "previously here") + .span_label(e.span, "duplicate argument") + .emit(); + continue; + } - p.expect(&token::Eq)?; - let e = p.parse_expr()?; - if let Some(prev) = names.get(&name) { - ecx.struct_span_err(e.span, &format!("duplicate argument named `{}`", name)) - .span_label(args[*prev].span, "previously here") - .span_label(e.span, "duplicate argument") - .emit(); - continue; + // Resolve names into slots early. + // Since all the positional args are already seen at this point + // if the input is valid, we can simply append to the positional + // args. And remember the names. + let slot = args.len(); + names.insert(ident.name, slot); + args.push(e); } - - // Resolve names into slots early. - // Since all the positional args are already seen at this point - // if the input is valid, we can simply append to the positional - // args. And remember the names. - let slot = args.len(); - names.insert(name, slot); - args.push(e); - } else { - let e = p.parse_expr()?; - if named { - let mut err = ecx - .struct_span_err(e.span, "positional arguments cannot follow named arguments"); - err.span_label(e.span, "positional arguments must be before named arguments"); - for pos in names.values() { - err.span_label(args[*pos].span, "named argument"); + _ => { + let e = p.parse_expr()?; + if named { + let mut err = ecx.struct_span_err( + e.span, + "positional arguments cannot follow named arguments", + ); + err.span_label(e.span, "positional arguments must be before named arguments"); + for pos in names.values() { + err.span_label(args[*pos].span, "named argument"); + } + err.emit(); } - err.emit(); + args.push(e); } - args.push(e); } } Ok((fmtstr, args, names)) diff --git a/src/librustc_expand/mbe/macro_parser.rs b/src/librustc_expand/mbe/macro_parser.rs index d2a5c54aae490..efba3a8ccb1e6 100644 --- a/src/librustc_expand/mbe/macro_parser.rs +++ b/src/librustc_expand/mbe/macro_parser.rs @@ -750,15 +750,9 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na /// The token is an identifier, but not `_`. /// We prohibit passing `_` to macros expecting `ident` for now. -fn get_macro_name(token: &Token) -> Option<(Name, bool)> { - match token.kind { - token::Ident(name, is_raw) if name != kw::Underscore => Some((name, is_raw)), - token::Interpolated(ref nt) => match **nt { - token::NtIdent(ident, is_raw) if ident.name != kw::Underscore => { - Some((ident.name, is_raw)) - } - _ => None, - }, +fn get_macro_ident(token: &Token) -> Option<(Ident, bool)> { + match token.ident() { + Some((ident, is_raw)) if ident.name != kw::Underscore => Some((ident, is_raw)), _ => None, } } @@ -783,7 +777,7 @@ fn may_begin_with(token: &Token, name: Name) -> bool { && !token.is_keyword(kw::Let) } sym::ty => token.can_begin_type(), - sym::ident => get_macro_name(token).is_some(), + sym::ident => get_macro_ident(token).is_some(), sym::literal => token.can_begin_literal_or_bool(), sym::vis => match token.kind { // The follow-set of :vis + "priv" keyword + interpolated @@ -888,9 +882,9 @@ fn parse_nt_inner<'a>(p: &mut Parser<'a>, sp: Span, name: Symbol) -> PResult<'a, sym::ty => token::NtTy(p.parse_ty()?), // this could be handled like a token, since it is one sym::ident => { - if let Some((name, is_raw)) = get_macro_name(&p.token) { + if let Some((ident, is_raw)) = get_macro_ident(&p.token) { p.bump(); - token::NtIdent(Ident::new(name, p.normalized_prev_token.span), is_raw) + token::NtIdent(ident, is_raw) } else { let token_str = pprust::token_to_string(&p.token); let msg = &format!("expected ident, found {}", &token_str); diff --git a/src/librustc_parse/parser/diagnostics.rs b/src/librustc_parse/parser/diagnostics.rs index 6587e763d50c5..7c1df531ad16e 100644 --- a/src/librustc_parse/parser/diagnostics.rs +++ b/src/librustc_parse/parser/diagnostics.rs @@ -192,17 +192,19 @@ impl<'a> Parser<'a> { TokenKind::CloseDelim(token::DelimToken::Brace), TokenKind::CloseDelim(token::DelimToken::Paren), ]; - if let token::Ident(name, false) = self.normalized_token.kind { - if Ident::new(name, self.normalized_token.span).is_raw_guess() - && self.look_ahead(1, |t| valid_follow.contains(&t.kind)) + match self.token.ident() { + Some((ident, false)) + if ident.is_raw_guess() + && self.look_ahead(1, |t| valid_follow.contains(&t.kind)) => { err.span_suggestion( - self.normalized_token.span, + ident.span, "you can escape reserved keywords to use them as identifiers", - format!("r#{}", name), + format!("r#{}", ident.name), Applicability::MaybeIncorrect, ); } + _ => {} } if let Some(token_descr) = super::token_descr_opt(&self.token) { err.span_label(self.token.span, format!("expected identifier, found {}", token_descr)); diff --git a/src/librustc_parse/parser/expr.rs b/src/librustc_parse/parser/expr.rs index e7c47b0be8e49..66266aa5dc416 100644 --- a/src/librustc_parse/parser/expr.rs +++ b/src/librustc_parse/parser/expr.rs @@ -97,9 +97,10 @@ impl<'a> Parser<'a> { fn parse_expr_catch_underscore(&mut self) -> PResult<'a, P> { match self.parse_expr() { Ok(expr) => Ok(expr), - Err(mut err) => match self.normalized_token.kind { - token::Ident(name, false) - if name == kw::Underscore && self.look_ahead(1, |t| t == &token::Comma) => + Err(mut err) => match self.token.ident() { + Some((ident, false)) + if ident.name == kw::Underscore + && self.look_ahead(1, |t| t == &token::Comma) => { // Special-case handling of `foo(_, _, _)` err.emit(); @@ -331,21 +332,19 @@ impl<'a> Parser<'a> { /// /// Also performs recovery for `and` / `or` which are mistaken for `&&` and `||` respectively. fn check_assoc_op(&self) -> Option> { - Some(Spanned { - node: match (AssocOp::from_token(&self.token), &self.normalized_token.kind) { - (Some(op), _) => op, - (None, token::Ident(sym::and, false)) => { - self.error_bad_logical_op("and", "&&", "conjunction"); - AssocOp::LAnd - } - (None, token::Ident(sym::or, false)) => { - self.error_bad_logical_op("or", "||", "disjunction"); - AssocOp::LOr - } - _ => return None, - }, - span: self.normalized_token.span, - }) + let (op, span) = match (AssocOp::from_token(&self.token), self.token.ident()) { + (Some(op), _) => (op, self.token.span), + (None, Some((ident, false))) if ident.name == sym::and => { + self.error_bad_logical_op("and", "&&", "conjunction"); + (AssocOp::LAnd, ident.span) + } + (None, Some((ident, false))) if ident.name == sym::or => { + self.error_bad_logical_op("or", "||", "disjunction"); + (AssocOp::LOr, ident.span) + } + _ => return None, + }; + Some(source_map::respan(span, op)) } /// Error on `and` and `or` suggesting `&&` and `||` respectively. @@ -1907,20 +1906,23 @@ impl<'a> Parser<'a> { /// Use in case of error after field-looking code: `S { foo: () with a }`. fn find_struct_error_after_field_looking_code(&self) -> Option { - if let token::Ident(name, _) = self.normalized_token.kind { - if !self.token.is_reserved_ident() && self.look_ahead(1, |t| *t == token::Colon) { - return Some(ast::Field { - ident: Ident::new(name, self.normalized_token.span), + match self.token.ident() { + Some((ident, is_raw)) + if (is_raw || !ident.is_reserved()) + && self.look_ahead(1, |t| *t == token::Colon) => + { + Some(ast::Field { + ident, span: self.token.span, expr: self.mk_expr_err(self.token.span), is_shorthand: false, attrs: AttrVec::new(), id: DUMMY_NODE_ID, is_placeholder: false, - }); + }) } + _ => None, } - None } fn recover_struct_comma_after_dotdot(&mut self, span: Span) { diff --git a/src/librustc_parse/parser/item.rs b/src/librustc_parse/parser/item.rs index 01dd2f885ff5e..c1b38591f56c3 100644 --- a/src/librustc_parse/parser/item.rs +++ b/src/librustc_parse/parser/item.rs @@ -750,10 +750,10 @@ impl<'a> Parser<'a> { } fn parse_ident_or_underscore(&mut self) -> PResult<'a, ast::Ident> { - match self.normalized_token.kind { - token::Ident(name @ kw::Underscore, false) => { + match self.token.ident() { + Some((ident, false)) if ident.name == kw::Underscore => { self.bump(); - Ok(Ident::new(name, self.normalized_prev_token.span)) + Ok(ident) } _ => self.parse_ident(), } @@ -1609,15 +1609,12 @@ impl<'a> Parser<'a> { /// Returns the parsed optional self parameter and whether a self shortcut was used. fn parse_self_param(&mut self) -> PResult<'a, Option> { // Extract an identifier *after* having confirmed that the token is one. - let expect_self_ident = |this: &mut Self| { - match this.normalized_token.kind { - // Preserve hygienic context. - token::Ident(name, _) => { - this.bump(); - Ident::new(name, this.normalized_prev_token.span) - } - _ => unreachable!(), + let expect_self_ident = |this: &mut Self| match this.token.ident() { + Some((ident, false)) => { + this.bump(); + ident } + _ => unreachable!(), }; // Is `self` `n` tokens ahead? let is_isolated_self = |this: &Self, n| { diff --git a/src/librustc_parse/parser/mod.rs b/src/librustc_parse/parser/mod.rs index 74101fef8e39b..0e3cee45dcdb0 100644 --- a/src/librustc_parse/parser/mod.rs +++ b/src/librustc_parse/parser/mod.rs @@ -480,9 +480,9 @@ impl<'a> Parser<'a> { } fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, ast::Ident> { - match self.normalized_token.kind { - token::Ident(name, _) => { - if self.token.is_reserved_ident() { + match self.token.ident() { + Some((ident, is_raw)) => { + if !is_raw && ident.is_reserved() { let mut err = self.expected_ident_found(); if recover { err.emit(); @@ -491,7 +491,7 @@ impl<'a> Parser<'a> { } } self.bump(); - Ok(Ident::new(name, self.normalized_prev_token.span)) + Ok(ident) } _ => Err(match self.prev_token.kind { TokenKind::DocComment(..) => { diff --git a/src/librustc_parse/parser/path.rs b/src/librustc_parse/parser/path.rs index 355b6429a7494..f88b4fe6ff0a8 100644 --- a/src/librustc_parse/parser/path.rs +++ b/src/librustc_parse/parser/path.rs @@ -240,10 +240,10 @@ impl<'a> Parser<'a> { } pub(super) fn parse_path_segment_ident(&mut self) -> PResult<'a, Ident> { - match self.normalized_token.kind { - token::Ident(name, _) if name.is_path_segment_keyword() => { + match self.token.ident() { + Some((ident, false)) if ident.is_path_segment_keyword() => { self.bump(); - Ok(Ident::new(name, self.normalized_prev_token.span)) + Ok(ident) } _ => self.parse_ident(), } From f4a03c44d5859dea64dea7fdbaef5377f37c5dd2 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Thu, 5 Mar 2020 00:34:57 +0300 Subject: [PATCH 2/6] rustc_ast: Introduce `Token::uninterpolated_span` --- src/librustc_ast/attr/mod.rs | 4 ++++ src/librustc_ast/token.rs | 30 ++++++++++++++++++++++++++++++ src/librustc_parse/parser/expr.rs | 17 ++++++++++------- src/librustc_parse/parser/item.rs | 2 +- src/librustc_parse/parser/mod.rs | 6 +++--- src/librustc_parse/parser/ty.rs | 2 +- 6 files changed, 49 insertions(+), 12 deletions(-) diff --git a/src/librustc_ast/attr/mod.rs b/src/librustc_ast/attr/mod.rs index 0638e8e667617..280994116c013 100644 --- a/src/librustc_ast/attr/mod.rs +++ b/src/librustc_ast/attr/mod.rs @@ -286,6 +286,10 @@ impl MetaItem { } impl AttrItem { + pub fn span(&self) -> Span { + self.args.span().map_or(self.path.span, |args_span| self.path.span.to(args_span)) + } + pub fn meta(&self, span: Span) -> Option { Some(MetaItem { path: self.path.clone(), diff --git a/src/librustc_ast/token.rs b/src/librustc_ast/token.rs index a8a2c9b2fb318..c1564882cfe9e 100644 --- a/src/librustc_ast/token.rs +++ b/src/librustc_ast/token.rs @@ -328,6 +328,18 @@ impl Token { mem::replace(self, Token::dummy()) } + /// For interpolated tokens returns a span of the fragment to which the interpolated + /// token refers, for all other tokens this is just a regular span. + /// It is particularly important to use this for identifiers and lifetimes + /// for which spans affect name resolution. This also includes edition checks + /// for edition-specific keyword identifiers. + pub fn uninterpolated_span(&self) -> Span { + match &self.kind { + Interpolated(nt) => nt.span(), + _ => self.span, + } + } + pub fn is_op(&self) -> bool { match self.kind { OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) | Ident(..) @@ -714,6 +726,24 @@ pub enum Nonterminal { #[cfg(target_arch = "x86_64")] rustc_data_structures::static_assert_size!(Nonterminal, 40); +impl Nonterminal { + fn span(&self) -> Span { + match self { + NtItem(item) => item.span, + NtBlock(block) => block.span, + NtStmt(stmt) => stmt.span, + NtPat(pat) => pat.span, + NtExpr(expr) | NtLiteral(expr) => expr.span, + NtTy(ty) => ty.span, + NtIdent(ident, _) | NtLifetime(ident) => ident.span, + NtMeta(attr_item) => attr_item.span(), + NtPath(path) => path.span, + NtVis(vis) => vis.span, + NtTT(tt) => tt.span(), + } + } +} + impl PartialEq for Nonterminal { fn eq(&self, rhs: &Self) -> bool { match (self, rhs) { diff --git a/src/librustc_parse/parser/expr.rs b/src/librustc_parse/parser/expr.rs index 66266aa5dc416..743fc3a4efa1d 100644 --- a/src/librustc_parse/parser/expr.rs +++ b/src/librustc_parse/parser/expr.rs @@ -849,7 +849,7 @@ impl<'a> Parser<'a> { /// Assuming we have just parsed `.`, continue parsing into an expression. fn parse_dot_suffix(&mut self, self_arg: P, lo: Span) -> PResult<'a, P> { - if self.normalized_token.span.rust_2018() && self.eat_keyword(kw::Await) { + if self.token.uninterpolated_span().rust_2018() && self.eat_keyword(kw::Await) { return self.mk_await_expr(self_arg, lo); } @@ -963,7 +963,7 @@ impl<'a> Parser<'a> { // | ^ expected expression self.bump(); Ok(self.mk_expr_err(self.token.span)) - } else if self.normalized_token.span.rust_2018() { + } else if self.token.uninterpolated_span().rust_2018() { // `Span::rust_2018()` is somewhat expensive; don't get it repeatedly. if self.check_keyword(kw::Async) { if self.is_async_block() { @@ -1396,11 +1396,14 @@ impl<'a> Parser<'a> { let movability = if self.eat_keyword(kw::Static) { Movability::Static } else { Movability::Movable }; - let asyncness = - if self.normalized_token.span.rust_2018() { self.parse_asyncness() } else { Async::No }; - if asyncness.is_async() { + let asyncness = if self.token.uninterpolated_span().rust_2018() { + self.parse_asyncness() + } else { + Async::No + }; + if let Async::Yes { span, .. } = asyncness { // Feature-gate `async ||` closures. - self.sess.gated_spans.gate(sym::async_closure, self.normalized_prev_token.span); + self.sess.gated_spans.gate(sym::async_closure, span); } let capture_clause = self.parse_capture_clause(); @@ -1756,7 +1759,7 @@ impl<'a> Parser<'a> { fn is_try_block(&self) -> bool { self.token.is_keyword(kw::Try) && self.look_ahead(1, |t| *t == token::OpenDelim(token::Brace)) && - self.normalized_token.span.rust_2018() && + self.token.uninterpolated_span().rust_2018() && // Prevent `while try {} {}`, `if try {} {} else {}`, etc. !self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL) } diff --git a/src/librustc_parse/parser/item.rs b/src/librustc_parse/parser/item.rs index c1b38591f56c3..232ecd6fdaed1 100644 --- a/src/librustc_parse/parser/item.rs +++ b/src/librustc_parse/parser/item.rs @@ -574,7 +574,7 @@ impl<'a> Parser<'a> { && self.look_ahead(1, |t| t.is_non_raw_ident_where(|i| i.name != kw::As)) { self.bump(); // `default` - Defaultness::Default(self.normalized_prev_token.span) + Defaultness::Default(self.prev_token.uninterpolated_span()) } else { Defaultness::Final } diff --git a/src/librustc_parse/parser/mod.rs b/src/librustc_parse/parser/mod.rs index 0e3cee45dcdb0..252a80431ac36 100644 --- a/src/librustc_parse/parser/mod.rs +++ b/src/librustc_parse/parser/mod.rs @@ -884,7 +884,7 @@ impl<'a> Parser<'a> { /// Parses asyncness: `async` or nothing. fn parse_asyncness(&mut self) -> Async { if self.eat_keyword(kw::Async) { - let span = self.normalized_prev_token.span; + let span = self.prev_token.uninterpolated_span(); Async::Yes { span, closure_id: DUMMY_NODE_ID, return_impl_trait_id: DUMMY_NODE_ID } } else { Async::No @@ -894,7 +894,7 @@ impl<'a> Parser<'a> { /// Parses unsafety: `unsafe` or nothing. fn parse_unsafety(&mut self) -> Unsafe { if self.eat_keyword(kw::Unsafe) { - Unsafe::Yes(self.normalized_prev_token.span) + Unsafe::Yes(self.prev_token.uninterpolated_span()) } else { Unsafe::No } @@ -903,7 +903,7 @@ impl<'a> Parser<'a> { /// Parses constness: `const` or nothing. fn parse_constness(&mut self) -> Const { if self.eat_keyword(kw::Const) { - Const::Yes(self.normalized_prev_token.span) + Const::Yes(self.prev_token.uninterpolated_span()) } else { Const::No } diff --git a/src/librustc_parse/parser/ty.rs b/src/librustc_parse/parser/ty.rs index c4469331b6669..16adf5c05a4ee 100644 --- a/src/librustc_parse/parser/ty.rs +++ b/src/librustc_parse/parser/ty.rs @@ -323,7 +323,7 @@ impl<'a> Parser<'a> { /// Is a `dyn B0 + ... + Bn` type allowed here? fn is_explicit_dyn_type(&mut self) -> bool { self.check_keyword(kw::Dyn) - && (self.normalized_token.span.rust_2018() + && (self.token.uninterpolated_span().rust_2018() || self.look_ahead(1, |t| { t.can_begin_bound() && !can_continue_type_after_non_fn_ident(t) })) From 43b27df5b211bc0fae74f34834ae84d17215c5ac Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 7 Mar 2020 14:37:38 +0300 Subject: [PATCH 3/6] rustc_ast: Introduce `Token::uninterpolate` --- src/librustc_ast/token.rs | 20 ++++++++++++++++++-- src/librustc_parse/parser/expr.rs | 4 ++-- src/librustc_parse/parser/item.rs | 4 ++-- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/librustc_ast/token.rs b/src/librustc_ast/token.rs index c1564882cfe9e..b80694ab6de84 100644 --- a/src/librustc_ast/token.rs +++ b/src/librustc_ast/token.rs @@ -14,8 +14,8 @@ use rustc_macros::HashStable_Generic; use rustc_span::symbol::kw; use rustc_span::symbol::Symbol; use rustc_span::{self, Span, DUMMY_SP}; -use std::fmt; -use std::mem; +use std::borrow::Cow; +use std::{fmt, mem}; #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)] #[derive(HashStable_Generic)] @@ -457,6 +457,22 @@ impl Token { } } + // Turns interpolated identifier (`$i: ident`) or lifetime (`$l: lifetime`) token + // into the regular identifier or lifetime token it refers to, + // otherwise returns the original token. + pub fn uninterpolate(&self) -> Cow<'_, Token> { + match &self.kind { + Interpolated(nt) => match **nt { + NtIdent(ident, is_raw) => { + Cow::Owned(Token::new(Ident(ident.name, is_raw), ident.span)) + } + NtLifetime(ident) => Cow::Owned(Token::new(Lifetime(ident.name), ident.span)), + _ => Cow::Borrowed(self), + }, + _ => Cow::Borrowed(self), + } + } + /// Returns an identifier if this token is an identifier. pub fn ident(&self) -> Option<(ast::Ident, /* is_raw */ bool)> { match self.kind { diff --git a/src/librustc_parse/parser/expr.rs b/src/librustc_parse/parser/expr.rs index 743fc3a4efa1d..f7cfb028a7a66 100644 --- a/src/librustc_parse/parser/expr.rs +++ b/src/librustc_parse/parser/expr.rs @@ -435,7 +435,7 @@ impl<'a> Parser<'a> { let attrs = self.parse_or_use_outer_attributes(attrs)?; let lo = self.token.span; // Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr() - let (hi, ex) = match self.normalized_token.kind { + let (hi, ex) = match self.token.uninterpolate().kind { token::Not => self.parse_unary_expr(lo, UnOp::Not), // `!expr` token::Tilde => self.recover_tilde_expr(lo), // `~expr` token::BinOp(token::Minus) => self.parse_unary_expr(lo, UnOp::Neg), // `-expr` @@ -755,7 +755,7 @@ impl<'a> Parser<'a> { } fn parse_dot_suffix_expr(&mut self, lo: Span, base: P) -> PResult<'a, P> { - match self.normalized_token.kind { + match self.token.uninterpolate().kind { token::Ident(..) => self.parse_dot_suffix(base, lo), token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) => { Ok(self.parse_tuple_field_access_expr(lo, base, symbol, suffix)) diff --git a/src/librustc_parse/parser/item.rs b/src/librustc_parse/parser/item.rs index 232ecd6fdaed1..08d71f03976d6 100644 --- a/src/librustc_parse/parser/item.rs +++ b/src/librustc_parse/parser/item.rs @@ -1544,7 +1544,7 @@ impl<'a> Parser<'a> { let is_name_required = match self.token.kind { token::DotDotDot => false, - _ => req_name(self.normalized_token.span.edition()), + _ => req_name(self.token.uninterpolate().span.edition()), }; let (pat, ty) = if is_name_required || self.is_named_param() { debug!("parse_param_general parse_pat (is_name_required:{})", is_name_required); @@ -1648,7 +1648,7 @@ impl<'a> Parser<'a> { // Only a limited set of initial token sequences is considered `self` parameters; anything // else is parsed as a normal function parameter list, so some lookahead is required. let eself_lo = self.token.span; - let (eself, eself_ident, eself_hi) = match self.normalized_token.kind { + let (eself, eself_ident, eself_hi) = match self.token.uninterpolate().kind { token::BinOp(token::And) => { let eself = if is_isolated_self(self, 1) { // `&self` From 5d7f67d3b109e95fb0dca8f773a2146db4eb4a93 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 7 Mar 2020 16:34:29 +0300 Subject: [PATCH 4/6] rustc_parse: Remove `Parser::normalized(_prev)_token` --- src/librustc_parse/lib.rs | 5 ++-- src/librustc_parse/parser/mod.rs | 42 ++++---------------------------- 2 files changed, 7 insertions(+), 40 deletions(-) diff --git a/src/librustc_parse/lib.rs b/src/librustc_parse/lib.rs index 25f9f8fd3ad4f..10d524776a11b 100644 --- a/src/librustc_parse/lib.rs +++ b/src/librustc_parse/lib.rs @@ -4,7 +4,7 @@ #![feature(crate_visibility_modifier)] use rustc_ast::ast; -use rustc_ast::token::{self, Nonterminal, Token}; +use rustc_ast::token::{self, Nonterminal}; use rustc_ast::tokenstream::{self, TokenStream, TokenTree}; use rustc_ast_pretty::pprust; use rustc_data_structures::sync::Lrc; @@ -171,8 +171,7 @@ fn maybe_source_file_to_parser( let mut parser = stream_to_parser(sess, stream, None); parser.unclosed_delims = unclosed_delims; if parser.token == token::Eof { - let span = Span::new(end_pos, end_pos, parser.token.span.ctxt()); - parser.set_token(Token::new(token::Eof, span)); + parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt()); } Ok(parser) diff --git a/src/librustc_parse/parser/mod.rs b/src/librustc_parse/parser/mod.rs index 252a80431ac36..9376c7c1c724d 100644 --- a/src/librustc_parse/parser/mod.rs +++ b/src/librustc_parse/parser/mod.rs @@ -88,21 +88,10 @@ macro_rules! maybe_recover_from_interpolated_ty_qpath { #[derive(Clone)] pub struct Parser<'a> { pub sess: &'a ParseSess, - /// The current non-normalized token. + /// The current token. pub token: Token, - /// The current normalized token. - /// "Normalized" means that some interpolated tokens - /// (`$i: ident` and `$l: lifetime` meta-variables) are replaced - /// with non-interpolated identifier and lifetime tokens they refer to. - /// Use this if you need to check for `token::Ident` or `token::Lifetime` specifically, - /// this also includes edition checks for edition-specific keyword identifiers. - pub normalized_token: Token, - /// The previous non-normalized token. + /// The previous token. pub prev_token: Token, - /// The previous normalized token. - /// Use this if you need to check for `token::Ident` or `token::Lifetime` specifically, - /// this also includes edition checks for edition-specific keyword identifiers. - pub normalized_prev_token: Token, restrictions: Restrictions, /// Used to determine the path to externally loaded source files. pub(super) directory: Directory, @@ -374,9 +363,7 @@ impl<'a> Parser<'a> { let mut parser = Parser { sess, token: Token::dummy(), - normalized_token: Token::dummy(), prev_token: Token::dummy(), - normalized_prev_token: Token::dummy(), restrictions: Restrictions::empty(), recurse_into_file_modules, directory: Directory { @@ -609,7 +596,7 @@ impl<'a> Parser<'a> { Some((first, second)) if first == expected => { let first_span = self.sess.source_map().start_point(self.token.span); let second_span = self.token.span.with_lo(first_span.hi()); - self.set_token(Token::new(first, first_span)); + self.token = Token::new(first, first_span); self.bump_with(Token::new(second, second_span)); true } @@ -817,23 +804,6 @@ impl<'a> Parser<'a> { self.parse_delim_comma_seq(token::Paren, f) } - // Interpolated identifier (`$i: ident`) and lifetime (`$l: lifetime`) - // tokens are replaced with usual identifier and lifetime tokens, - // so the former are never encountered during normal parsing. - crate fn set_token(&mut self, token: Token) { - self.token = token; - self.normalized_token = match &self.token.kind { - token::Interpolated(nt) => match **nt { - token::NtIdent(ident, is_raw) => { - Token::new(token::Ident(ident.name, is_raw), ident.span) - } - token::NtLifetime(ident) => Token::new(token::Lifetime(ident.name), ident.span), - _ => self.token.clone(), - }, - _ => self.token.clone(), - } - } - /// Advance the parser by one token using provided token as the next one. fn bump_with(&mut self, next_token: Token) { // Bumping after EOF is a bad sign, usually an infinite loop. @@ -843,9 +813,7 @@ impl<'a> Parser<'a> { } // Update the current and previous tokens. - self.prev_token = self.token.take(); - self.normalized_prev_token = self.normalized_token.take(); - self.set_token(next_token); + self.prev_token = mem::replace(&mut self.token, next_token); // Diagnostics. self.expected_tokens.clear(); @@ -1005,7 +973,7 @@ impl<'a> Parser<'a> { &mut self.token_cursor.frame, self.token_cursor.stack.pop().unwrap(), ); - self.set_token(Token::new(TokenKind::CloseDelim(frame.delim), frame.span.close)); + self.token = Token::new(TokenKind::CloseDelim(frame.delim), frame.span.close); self.bump(); TokenTree::Delimited(frame.span, frame.delim, frame.tree_cursor.stream) } From 9be233cbfe134f032ed2d50f7cc66e901bbe3f6f Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 7 Mar 2020 15:58:27 +0300 Subject: [PATCH 5/6] Use `Token::uninterpolate` in couple more places matching on `(Nt)Ident` --- src/librustc_ast/attr/mod.rs | 5 ++--- src/librustc_ast/token.rs | 32 +++++++++++-------------------- src/librustc_ast/tokenstream.rs | 7 +++++++ src/librustc_ast/util/literal.rs | 15 ++++----------- src/librustc_parse/parser/expr.rs | 3 +-- src/librustc_parse/parser/item.rs | 2 ++ src/librustc_parse/parser/pat.rs | 2 +- 7 files changed, 28 insertions(+), 38 deletions(-) diff --git a/src/librustc_ast/attr/mod.rs b/src/librustc_ast/attr/mod.rs index 280994116c013..52a59e82ae23f 100644 --- a/src/librustc_ast/attr/mod.rs +++ b/src/librustc_ast/attr/mod.rs @@ -441,7 +441,7 @@ impl MetaItem { I: Iterator, { // FIXME: Share code with `parse_path`. - let path = match tokens.next() { + let path = match tokens.next().map(TokenTree::uninterpolate) { Some(TokenTree::Token(Token { kind: kind @ token::Ident(..), span })) | Some(TokenTree::Token(Token { kind: kind @ token::ModSep, span })) => 'arm: { let mut segments = if let token::Ident(name, _) = kind { @@ -457,7 +457,7 @@ impl MetaItem { }; loop { if let Some(TokenTree::Token(Token { kind: token::Ident(name, _), span })) = - tokens.next() + tokens.next().map(TokenTree::uninterpolate) { segments.push(PathSegment::from_ident(Ident::new(name, span))); } else { @@ -474,7 +474,6 @@ impl MetaItem { Path { span, segments } } Some(TokenTree::Token(Token { kind: token::Interpolated(nt), .. })) => match *nt { - token::Nonterminal::NtIdent(ident, _) => Path::from_ident(ident), token::Nonterminal::NtMeta(ref item) => return item.meta(item.path.span), token::Nonterminal::NtPath(ref path) => path.clone(), _ => return None, diff --git a/src/librustc_ast/token.rs b/src/librustc_ast/token.rs index b80694ab6de84..b022d969deccb 100644 --- a/src/librustc_ast/token.rs +++ b/src/librustc_ast/token.rs @@ -357,7 +357,7 @@ impl Token { /// Returns `true` if the token can appear at the start of an expression. pub fn can_begin_expr(&self) -> bool { - match self.kind { + match self.uninterpolate().kind { Ident(name, is_raw) => ident_can_begin_expr(name, self.span, is_raw), // value name or keyword OpenDelim(..) | // tuple, array or block @@ -375,12 +375,10 @@ impl Token { Lifetime(..) | // labeled loop Pound => true, // expression attributes Interpolated(ref nt) => match **nt { - NtIdent(ident, is_raw) => ident_can_begin_expr(ident.name, ident.span, is_raw), NtLiteral(..) | NtExpr(..) | NtBlock(..) | - NtPath(..) | - NtLifetime(..) => true, + NtPath(..) => true, _ => false, }, _ => false, @@ -389,7 +387,7 @@ impl Token { /// Returns `true` if the token can appear at the start of a type. pub fn can_begin_type(&self) -> bool { - match self.kind { + match self.uninterpolate().kind { Ident(name, is_raw) => ident_can_begin_type(name, self.span, is_raw), // type name or keyword OpenDelim(Paren) | // tuple @@ -403,8 +401,7 @@ impl Token { Lt | BinOp(Shl) | // associated path ModSep => true, // global path Interpolated(ref nt) => match **nt { - NtIdent(ident, is_raw) => ident_can_begin_type(ident.name, ident.span, is_raw), - NtTy(..) | NtPath(..) | NtLifetime(..) => true, + NtTy(..) | NtPath(..) => true, _ => false, }, _ => false, @@ -445,11 +442,10 @@ impl Token { /// /// Keep this in sync with `Lit::from_token`. pub fn can_begin_literal_or_bool(&self) -> bool { - match self.kind { + match self.uninterpolate().kind { Literal(..) | BinOp(Minus) => true, Ident(name, false) if name.is_bool_lit() => true, Interpolated(ref nt) => match &**nt { - NtIdent(ident, false) if ident.name.is_bool_lit() => true, NtExpr(e) | NtLiteral(e) => matches!(e.kind, ast::ExprKind::Lit(_)), _ => false, }, @@ -475,24 +471,18 @@ impl Token { /// Returns an identifier if this token is an identifier. pub fn ident(&self) -> Option<(ast::Ident, /* is_raw */ bool)> { - match self.kind { - Ident(name, is_raw) => Some((ast::Ident::new(name, self.span), is_raw)), - Interpolated(ref nt) => match **nt { - NtIdent(ident, is_raw) => Some((ident, is_raw)), - _ => None, - }, + let token = self.uninterpolate(); + match token.kind { + Ident(name, is_raw) => Some((ast::Ident::new(name, token.span), is_raw)), _ => None, } } /// Returns a lifetime identifier if this token is a lifetime. pub fn lifetime(&self) -> Option { - match self.kind { - Lifetime(name) => Some(ast::Ident::new(name, self.span)), - Interpolated(ref nt) => match **nt { - NtLifetime(ident) => Some(ident), - _ => None, - }, + let token = self.uninterpolate(); + match token.kind { + Lifetime(name) => Some(ast::Ident::new(name, token.span)), _ => None, } } diff --git a/src/librustc_ast/tokenstream.rs b/src/librustc_ast/tokenstream.rs index 03e8fff247b39..916a5ff6f46f4 100644 --- a/src/librustc_ast/tokenstream.rs +++ b/src/librustc_ast/tokenstream.rs @@ -116,6 +116,13 @@ impl TokenTree { pub fn close_tt(span: DelimSpan, delim: DelimToken) -> TokenTree { TokenTree::token(token::CloseDelim(delim), span.close) } + + pub fn uninterpolate(self) -> TokenTree { + match self { + TokenTree::Token(token) => TokenTree::Token(token.uninterpolate().into_owned()), + tt => tt, + } + } } impl HashStable for TokenStream diff --git a/src/librustc_ast/util/literal.rs b/src/librustc_ast/util/literal.rs index ecf17efc4e0d1..d1757394f3a1d 100644 --- a/src/librustc_ast/util/literal.rs +++ b/src/librustc_ast/util/literal.rs @@ -191,23 +191,16 @@ impl Lit { /// /// Keep this in sync with `Token::can_begin_literal_or_bool`. pub fn from_token(token: &Token) -> Result { - let lit = match token.kind { + let lit = match token.uninterpolate().kind { token::Ident(name, false) if name.is_bool_lit() => { token::Lit::new(token::Bool, name, None) } token::Literal(lit) => lit, token::Interpolated(ref nt) => { - match &**nt { - token::NtIdent(ident, false) if ident.name.is_bool_lit() => { - let lit = token::Lit::new(token::Bool, ident.name, None); - return Lit::from_lit_token(lit, ident.span); + if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt { + if let ast::ExprKind::Lit(lit) = &expr.kind { + return Ok(lit.clone()); } - token::NtExpr(expr) | token::NtLiteral(expr) => { - if let ast::ExprKind::Lit(lit) = &expr.kind { - return Ok(lit.clone()); - } - } - _ => {} } return Err(LitError::NotLiteral); } diff --git a/src/librustc_parse/parser/expr.rs b/src/librustc_parse/parser/expr.rs index f7cfb028a7a66..d28b9cd968218 100644 --- a/src/librustc_parse/parser/expr.rs +++ b/src/librustc_parse/parser/expr.rs @@ -50,7 +50,6 @@ macro_rules! maybe_whole_expr { AttrVec::new(), )); } - // N.B., `NtIdent(ident)` is normalized to `Ident` in `fn bump`. _ => {} }; } @@ -482,7 +481,7 @@ impl<'a> Parser<'a> { } fn is_mistaken_not_ident_negation(&self) -> bool { - let token_cannot_continue_expr = |t: &Token| match t.kind { + let token_cannot_continue_expr = |t: &Token| match t.uninterpolate().kind { // These tokens can start an expression after `!`, but // can't continue an expression after an ident token::Ident(name, is_raw) => token::ident_can_begin_expr(name, t.span, is_raw), diff --git a/src/librustc_parse/parser/item.rs b/src/librustc_parse/parser/item.rs index 08d71f03976d6..bf612bfc0e4a1 100644 --- a/src/librustc_parse/parser/item.rs +++ b/src/librustc_parse/parser/item.rs @@ -1544,6 +1544,8 @@ impl<'a> Parser<'a> { let is_name_required = match self.token.kind { token::DotDotDot => false, + // FIXME: Consider using interpolated token for this edition check, + // it should match the intent of edition hygiene better. _ => req_name(self.token.uninterpolate().span.edition()), }; let (pat, ty) = if is_name_required || self.is_named_param() { diff --git a/src/librustc_parse/parser/pat.rs b/src/librustc_parse/parser/pat.rs index 4c041fd669d67..f52a91ff5989d 100644 --- a/src/librustc_parse/parser/pat.rs +++ b/src/librustc_parse/parser/pat.rs @@ -151,7 +151,7 @@ impl<'a> Parser<'a> { /// Note that there are more tokens such as `@` for which we know that the `|` /// is an illegal parse. However, the user's intent is less clear in that case. fn recover_trailing_vert(&mut self, lo: Option) -> bool { - let is_end_ahead = self.look_ahead(1, |token| match &token.kind { + let is_end_ahead = self.look_ahead(1, |token| match &token.uninterpolate().kind { token::FatArrow // e.g. `a | => 0,`. | token::Ident(kw::If, false) // e.g. `a | if expr`. | token::Eq // e.g. `let a | = 0`. From 7a30bb1676690596f73659d18959877d993510ae Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Mon, 9 Mar 2020 12:42:33 +0300 Subject: [PATCH 6/6] Address review comments --- src/librustc_ast/token.rs | 19 ++++++++++++++----- src/librustc_expand/mbe/macro_parser.rs | 5 +---- src/librustc_parse/parser/expr.rs | 13 ++++++------- src/librustc_parse/parser/item.rs | 2 +- 4 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/librustc_ast/token.rs b/src/librustc_ast/token.rs index b022d969deccb..b67b7d346f756 100644 --- a/src/librustc_ast/token.rs +++ b/src/librustc_ast/token.rs @@ -225,8 +225,15 @@ pub enum TokenKind { /* Literals */ Literal(Lit), - /* Name components */ + /// Identifier token. + /// Do not forget about `NtIdent` when you want to match on identifiers. + /// It's recommended to use `Token::(ident,uninterpolate,uninterpolated_span)` to + /// treat regular and interpolated identifiers in the same way. Ident(ast::Name, /* is_raw */ bool), + /// Lifetime identifier token. + /// Do not forget about `NtLifetime` when you want to match on lifetime identifiers. + /// It's recommended to use `Token::(lifetime,uninterpolate,uninterpolated_span)` to + /// treat regular and interpolated lifetime identifiers in the same way. Lifetime(ast::Name), Interpolated(Lrc), @@ -328,11 +335,12 @@ impl Token { mem::replace(self, Token::dummy()) } - /// For interpolated tokens returns a span of the fragment to which the interpolated - /// token refers, for all other tokens this is just a regular span. + /// For interpolated tokens, returns a span of the fragment to which the interpolated + /// token refers. For all other tokens this is just a regular span. /// It is particularly important to use this for identifiers and lifetimes - /// for which spans affect name resolution. This also includes edition checks - /// for edition-specific keyword identifiers. + /// for which spans affect name resolution and edition checks. + /// Note that keywords are also identifiers, so they should use this + /// if they keep spans or perform edition checks. pub fn uninterpolated_span(&self) -> Span { match &self.kind { Interpolated(nt) => nt.span(), @@ -453,6 +461,7 @@ impl Token { } } + // A convenience function for matching on identifiers during parsing. // Turns interpolated identifier (`$i: ident`) or lifetime (`$l: lifetime`) token // into the regular identifier or lifetime token it refers to, // otherwise returns the original token. diff --git a/src/librustc_expand/mbe/macro_parser.rs b/src/librustc_expand/mbe/macro_parser.rs index efba3a8ccb1e6..6d4d7f5b4f394 100644 --- a/src/librustc_expand/mbe/macro_parser.rs +++ b/src/librustc_expand/mbe/macro_parser.rs @@ -751,10 +751,7 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na /// The token is an identifier, but not `_`. /// We prohibit passing `_` to macros expecting `ident` for now. fn get_macro_ident(token: &Token) -> Option<(Ident, bool)> { - match token.ident() { - Some((ident, is_raw)) if ident.name != kw::Underscore => Some((ident, is_raw)), - _ => None, - } + token.ident().filter(|(ident, _)| ident.name != kw::Underscore) } /// Checks whether a non-terminal may begin with a particular token. diff --git a/src/librustc_parse/parser/expr.rs b/src/librustc_parse/parser/expr.rs index d28b9cd968218..bfca9f07f05d5 100644 --- a/src/librustc_parse/parser/expr.rs +++ b/src/librustc_parse/parser/expr.rs @@ -97,9 +97,8 @@ impl<'a> Parser<'a> { match self.parse_expr() { Ok(expr) => Ok(expr), Err(mut err) => match self.token.ident() { - Some((ident, false)) - if ident.name == kw::Underscore - && self.look_ahead(1, |t| t == &token::Comma) => + Some((Ident { name: kw::Underscore, .. }, false)) + if self.look_ahead(1, |t| t == &token::Comma) => { // Special-case handling of `foo(_, _, _)` err.emit(); @@ -333,13 +332,13 @@ impl<'a> Parser<'a> { fn check_assoc_op(&self) -> Option> { let (op, span) = match (AssocOp::from_token(&self.token), self.token.ident()) { (Some(op), _) => (op, self.token.span), - (None, Some((ident, false))) if ident.name == sym::and => { + (None, Some((Ident { name: sym::and, span }, false))) => { self.error_bad_logical_op("and", "&&", "conjunction"); - (AssocOp::LAnd, ident.span) + (AssocOp::LAnd, span) } - (None, Some((ident, false))) if ident.name == sym::or => { + (None, Some((Ident { name: sym::or, span }, false))) => { self.error_bad_logical_op("or", "||", "disjunction"); - (AssocOp::LOr, ident.span) + (AssocOp::LOr, span) } _ => return None, }; diff --git a/src/librustc_parse/parser/item.rs b/src/librustc_parse/parser/item.rs index bf612bfc0e4a1..126686c8defbf 100644 --- a/src/librustc_parse/parser/item.rs +++ b/src/librustc_parse/parser/item.rs @@ -751,7 +751,7 @@ impl<'a> Parser<'a> { fn parse_ident_or_underscore(&mut self) -> PResult<'a, ast::Ident> { match self.token.ident() { - Some((ident, false)) if ident.name == kw::Underscore => { + Some((ident @ Ident { name: kw::Underscore, .. }, false)) => { self.bump(); Ok(ident) }