Skip to content

Commit

Permalink
Auto merge of #97251 - petrochenkov:eqtokens, r=nnethercote
Browse files Browse the repository at this point in the history
rustc_parse: Move AST -> TokenStream conversion logic to rustc_ast

In the past falling back to reparsing pretty-printed strings was common, so some of this logic had to live in `rustc_parse`, but now the reparsing fallback is only used in two corner cases so we can move this logic to `rustc_ast` which makes many things simpler.

It also helps to fix `MacArgs::inner_tokens` for `MacArgs::Eq` with non-literal expressions, which is done in the second commit.
r? `@nnethercote`
  • Loading branch information
bors committed May 22, 2022
2 parents 6534637 + 09b4c7c commit 4bb4dc4
Show file tree
Hide file tree
Showing 14 changed files with 127 additions and 300 deletions.
22 changes: 4 additions & 18 deletions compiler/rustc_ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ pub use GenericArgs::*;
pub use UnsafeSource::*;

use crate::ptr::P;
use crate::token::{self, CommentKind, Delimiter, Token, TokenKind};
use crate::tokenstream::{DelimSpan, LazyTokenStream, TokenStream, TokenTree};
use crate::token::{self, CommentKind, Delimiter};
use crate::tokenstream::{DelimSpan, LazyTokenStream, TokenStream};

use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::stack::ensure_sufficient_stack;
Expand Down Expand Up @@ -444,8 +444,7 @@ impl Default for Generics {
pub struct WhereClause {
/// `true` if we ate a `where` token: this can happen
/// if we parsed no predicates (e.g. `struct Foo where {}`).
/// This allows us to accurately pretty-print
/// in `nt_to_tokenstream`
/// This allows us to pretty-print accurately.
pub has_where_token: bool,
pub predicates: Vec<WherePredicate>,
pub span: Span,
Expand Down Expand Up @@ -1571,20 +1570,7 @@ impl MacArgs {
match self {
MacArgs::Empty => TokenStream::default(),
MacArgs::Delimited(.., tokens) => tokens.clone(),
MacArgs::Eq(_, MacArgsEq::Ast(expr)) => {
// Currently only literals are allowed here. If more complex expression kinds are
// allowed in the future, then `nt_to_tokenstream` should be used to extract the
// token stream. This will require some cleverness, perhaps with a function
// pointer, because `nt_to_tokenstream` is not directly usable from this crate.
// It will also require changing the `parse_expr` call in `parse_mac_args_common`
// to `parse_expr_force_collect`.
if let ExprKind::Lit(lit) = &expr.kind {
let token = Token::new(TokenKind::Literal(lit.token), lit.span);
TokenTree::Token(token).into()
} else {
unreachable!("couldn't extract literal when getting inner tokens: {:?}", expr)
}
}
MacArgs::Eq(_, MacArgsEq::Ast(expr)) => TokenStream::from_ast(expr),
MacArgs::Eq(_, MacArgsEq::Hir(lit)) => {
unreachable!("in literal form when getting inner tokens: {:?}", lit)
}
Expand Down
8 changes: 7 additions & 1 deletion compiler/rustc_ast/src/ast_traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,20 @@ macro_rules! impl_has_span {
};
}

impl_has_span!(AssocItem, Expr, ForeignItem, Item, Stmt);
impl_has_span!(AssocItem, Block, Expr, ForeignItem, Item, Pat, Path, Stmt, Ty, Visibility);

impl<T: AstDeref<Target: HasSpan>> HasSpan for T {
fn span(&self) -> Span {
self.ast_deref().span()
}
}

impl HasSpan for AttrItem {
fn span(&self) -> Span {
self.span()
}
}

/// A trait for AST nodes having (or not having) collected tokens.
pub trait HasTokens {
fn tokens(&self) -> Option<&LazyTokenStream>;
Expand Down
93 changes: 86 additions & 7 deletions compiler/rustc_ast/src/tokenstream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
//! and a borrowed `TokenStream` is sufficient to build an owned `TokenStream` without taking
//! ownership of the original.

use crate::token::{self, Delimiter, Token, TokenKind};
use crate::ast::StmtKind;
use crate::ast_traits::{HasAttrs, HasSpan, HasTokens};
use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind};
use crate::AttrVec;

use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
Expand Down Expand Up @@ -45,12 +47,6 @@ pub enum TokenTree {
Delimited(DelimSpan, Delimiter, TokenStream),
}

#[derive(Copy, Clone)]
pub enum CanSynthesizeMissingTokens {
Yes,
No,
}

// Ensure all fields of `TokenTree` is `Send` and `Sync`.
#[cfg(parallel_compiler)]
fn _dummy()
Expand Down Expand Up @@ -471,6 +467,89 @@ impl TokenStream {
.collect(),
))
}

fn opt_from_ast(node: &(impl HasAttrs + HasTokens)) -> Option<TokenStream> {
let tokens = node.tokens()?;
let attrs = node.attrs();
let attr_annotated = if attrs.is_empty() {
tokens.create_token_stream()
} else {
let attr_data = AttributesData { attrs: attrs.to_vec().into(), tokens: tokens.clone() };
AttrAnnotatedTokenStream::new(vec![(
AttrAnnotatedTokenTree::Attributes(attr_data),
Spacing::Alone,
)])
};
Some(attr_annotated.to_tokenstream())
}

pub fn from_ast(node: &(impl HasAttrs + HasSpan + HasTokens + fmt::Debug)) -> TokenStream {
TokenStream::opt_from_ast(node)
.unwrap_or_else(|| panic!("missing tokens for node at {:?}: {:?}", node.span(), node))
}

pub fn from_nonterminal_ast(nt: &Nonterminal) -> TokenStream {
match nt {
Nonterminal::NtIdent(ident, is_raw) => {
TokenTree::token(token::Ident(ident.name, *is_raw), ident.span).into()
}
Nonterminal::NtLifetime(ident) => {
TokenTree::token(token::Lifetime(ident.name), ident.span).into()
}
Nonterminal::NtItem(item) => TokenStream::from_ast(item),
Nonterminal::NtBlock(block) => TokenStream::from_ast(block),
Nonterminal::NtStmt(stmt) if let StmtKind::Empty = stmt.kind => {
// FIXME: Properly collect tokens for empty statements.
TokenTree::token(token::Semi, stmt.span).into()
}
Nonterminal::NtStmt(stmt) => TokenStream::from_ast(stmt),
Nonterminal::NtPat(pat) => TokenStream::from_ast(pat),
Nonterminal::NtTy(ty) => TokenStream::from_ast(ty),
Nonterminal::NtMeta(attr) => TokenStream::from_ast(attr),
Nonterminal::NtPath(path) => TokenStream::from_ast(path),
Nonterminal::NtVis(vis) => TokenStream::from_ast(vis),
Nonterminal::NtExpr(expr) | Nonterminal::NtLiteral(expr) => TokenStream::from_ast(expr),
}
}

fn flatten_token(token: &Token) -> TokenTree {
match &token.kind {
token::Interpolated(nt) if let token::NtIdent(ident, is_raw) = **nt => {
TokenTree::token(token::Ident(ident.name, is_raw), ident.span)
}
token::Interpolated(nt) => TokenTree::Delimited(
DelimSpan::from_single(token.span),
Delimiter::Invisible,
TokenStream::from_nonterminal_ast(&nt).flattened(),
),
_ => TokenTree::Token(token.clone()),
}
}

fn flatten_token_tree(tree: &TokenTree) -> TokenTree {
match tree {
TokenTree::Token(token) => TokenStream::flatten_token(token),
TokenTree::Delimited(span, delim, tts) => {
TokenTree::Delimited(*span, *delim, tts.flattened())
}
}
}

#[must_use]
pub fn flattened(&self) -> TokenStream {
fn can_skip(stream: &TokenStream) -> bool {
stream.trees().all(|tree| match tree {
TokenTree::Token(token) => !matches!(token.kind, token::Interpolated(_)),
TokenTree::Delimited(_, _, inner) => can_skip(inner),
})
}

if can_skip(self) {
return self.clone();
}

self.trees().map(|tree| TokenStream::flatten_token_tree(tree)).collect()
}
}

// 99.5%+ of the time we have 1 or 2 elements in this vector.
Expand Down
3 changes: 0 additions & 3 deletions compiler/rustc_ast_lowering/src/item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use rustc_hir::def::{DefKind, Res};
use rustc_hir::def_id::{LocalDefId, CRATE_DEF_ID};
use rustc_hir::PredicateOrigin;
use rustc_index::vec::{Idx, IndexVec};
use rustc_session::utils::NtToTokenstream;
use rustc_session::Session;
use rustc_span::source_map::DesugaringKind;
use rustc_span::symbol::{kw, sym, Ident};
Expand All @@ -27,7 +26,6 @@ use std::iter;
pub(super) struct ItemLowerer<'a, 'hir> {
pub(super) sess: &'a Session,
pub(super) resolver: &'a mut dyn ResolverAstLowering,
pub(super) nt_to_tokenstream: NtToTokenstream,
pub(super) arena: &'hir Arena<'hir>,
pub(super) ast_index: &'a IndexVec<LocalDefId, AstOwner<'a>>,
pub(super) owners: &'a mut IndexVec<LocalDefId, hir::MaybeOwner<&'hir hir::OwnerInfo<'hir>>>,
Expand Down Expand Up @@ -63,7 +61,6 @@ impl<'a, 'hir> ItemLowerer<'a, 'hir> {
// Pseudo-globals.
sess: &self.sess,
resolver: self.resolver,
nt_to_tokenstream: self.nt_to_tokenstream,
arena: self.arena,

// HirId handling.
Expand Down
38 changes: 3 additions & 35 deletions compiler/rustc_ast_lowering/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
#![recursion_limit = "256"]
#![allow(rustc::potential_query_instability)]

use rustc_ast::tokenstream::{CanSynthesizeMissingTokens, TokenStream};
use rustc_ast::visit;
use rustc_ast::{self as ast, *};
use rustc_ast_pretty::pprust;
Expand All @@ -56,7 +55,6 @@ use rustc_hir::{ConstArg, GenericArg, ItemLocalId, ParamName, TraitCandidate};
use rustc_index::vec::{Idx, IndexVec};
use rustc_query_system::ich::StableHashingContext;
use rustc_session::parse::feature_err;
use rustc_session::utils::{FlattenNonterminals, NtToTokenstream};
use rustc_session::Session;
use rustc_span::hygiene::{ExpnId, MacroKind};
use rustc_span::source_map::DesugaringKind;
Expand Down Expand Up @@ -89,11 +87,6 @@ struct LoweringContext<'a, 'hir: 'a> {

resolver: &'a mut dyn ResolverAstLowering,

/// HACK(Centril): there is a cyclic dependency between the parser and lowering
/// if we don't have this function pointer. To avoid that dependency so that
/// `rustc_middle` is independent of the parser, we use dynamic dispatch here.
nt_to_tokenstream: NtToTokenstream,

/// Used to allocate HIR nodes.
arena: &'hir Arena<'hir>,

Expand Down Expand Up @@ -436,7 +429,6 @@ pub fn lower_crate<'a, 'hir>(
sess: &'a Session,
krate: &'a Crate,
resolver: &'a mut dyn ResolverAstLowering,
nt_to_tokenstream: NtToTokenstream,
arena: &'hir Arena<'hir>,
) -> &'hir hir::Crate<'hir> {
let _prof_timer = sess.prof.verbose_generic_activity("hir_lowering");
Expand All @@ -447,15 +439,8 @@ pub fn lower_crate<'a, 'hir>(
IndexVec::from_fn_n(|_| hir::MaybeOwner::Phantom, resolver.definitions().def_index_count());

for def_id in ast_index.indices() {
item::ItemLowerer {
sess,
resolver,
nt_to_tokenstream,
arena,
ast_index: &ast_index,
owners: &mut owners,
}
.lower_node(def_id);
item::ItemLowerer { sess, resolver, arena, ast_index: &ast_index, owners: &mut owners }
.lower_node(def_id);
}

let hir_hash = compute_hir_hash(resolver, &owners);
Expand Down Expand Up @@ -875,11 +860,7 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
// ```
//
// In both cases, we don't want to synthesize any tokens
MacArgs::Delimited(
dspan,
delim,
self.lower_token_stream(tokens.clone(), CanSynthesizeMissingTokens::No),
)
MacArgs::Delimited(dspan, delim, tokens.flattened())
}
// This is an inert key-value attribute - it will never be visible to macros
// after it gets lowered to HIR. Therefore, we can extract literals to handle
Expand All @@ -904,19 +885,6 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
}
}

fn lower_token_stream(
&self,
tokens: TokenStream,
synthesize_tokens: CanSynthesizeMissingTokens,
) -> TokenStream {
FlattenNonterminals {
parse_sess: &self.sess.parse_sess,
synthesize_tokens,
nt_to_tokenstream: self.nt_to_tokenstream,
}
.process_token_stream(tokens)
}

/// Given an associated type constraint like one of these:
///
/// ```ignore (illustrative)
Expand Down
32 changes: 1 addition & 31 deletions compiler/rustc_ast_pretty/src/pprust/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,42 +4,12 @@ mod tests;
pub mod state;
pub use state::{print_crate, AnnNode, Comments, PpAnn, PrintState, State};

use rustc_ast as ast;
use rustc_ast::token::{Nonterminal, Token, TokenKind};
use rustc_ast::tokenstream::{TokenStream, TokenTree};
use rustc_ast::{self as ast, AstDeref};

use std::borrow::Cow;

pub trait AstPrettyPrint {
fn pretty_print(&self) -> String;
}

impl<T: AstDeref<Target: AstPrettyPrint>> AstPrettyPrint for T {
fn pretty_print(&self) -> String {
self.ast_deref().pretty_print()
}
}

macro_rules! impl_ast_pretty_print {
($($T:ty => $method:ident),+ $(,)?) => {
$(
impl AstPrettyPrint for $T {
fn pretty_print(&self) -> String {
State::new().$method(self)
}
}
)+
};
}

impl_ast_pretty_print! {
ast::Item => item_to_string,
ast::AssocItem => assoc_item_to_string,
ast::ForeignItem => foreign_item_to_string,
ast::Expr => expr_to_string,
ast::Stmt => stmt_to_string,
}

pub fn nonterminal_to_string(nt: &Nonterminal) -> String {
State::new().nonterminal_to_string(nt)
}
Expand Down
11 changes: 1 addition & 10 deletions compiler/rustc_builtin_macros/src/cfg_eval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use crate::util::{check_builtin_macro_attribute, warn_on_duplicate_attribute};
use rustc_ast as ast;
use rustc_ast::mut_visit::MutVisitor;
use rustc_ast::ptr::P;
use rustc_ast::tokenstream::CanSynthesizeMissingTokens;
use rustc_ast::visit::Visitor;
use rustc_ast::NodeId;
use rustc_ast::{mut_visit, visit};
Expand All @@ -13,7 +12,6 @@ use rustc_expand::config::StripUnconfigured;
use rustc_expand::configure;
use rustc_feature::Features;
use rustc_parse::parser::{ForceCollect, Parser};
use rustc_session::utils::FlattenNonterminals;
use rustc_session::Session;
use rustc_span::symbol::sym;
use rustc_span::Span;
Expand Down Expand Up @@ -174,8 +172,6 @@ impl CfgEval<'_, '_> {
_ => unreachable!(),
};

let mut orig_tokens = annotatable.to_tokens(&self.cfg.sess.parse_sess);

// 'Flatten' all nonterminals (i.e. `TokenKind::Interpolated`)
// to `None`-delimited groups containing the corresponding tokens. This
// is normally delayed until the proc-macro server actually needs to
Expand All @@ -189,12 +185,7 @@ impl CfgEval<'_, '_> {
// where `$item` is `#[cfg_attr] struct Foo {}`. We want to make
// sure to evaluate *all* `#[cfg]` and `#[cfg_attr]` attributes - the simplest
// way to do this is to do a single parse of a stream without any nonterminals.
let mut flatten = FlattenNonterminals {
nt_to_tokenstream: rustc_parse::nt_to_tokenstream,
parse_sess: &self.cfg.sess.parse_sess,
synthesize_tokens: CanSynthesizeMissingTokens::No,
};
orig_tokens = flatten.process_token_stream(orig_tokens);
let orig_tokens = annotatable.to_tokens().flattened();

// Re-parse the tokens, setting the `capture_cfg` flag to save extra information
// to the captured `AttrAnnotatedTokenStream` (specifically, we capture
Expand Down
Loading

0 comments on commit 4bb4dc4

Please sign in to comment.