From 69f6145ad82e931f730cca81f12a369d894121a6 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Sat, 13 Jul 2024 11:15:40 +1000 Subject: [PATCH 1/3] Change `look` from a macro to a function. Using `#[track_caller]` works if the assertion is moved outside of the closure. --- compiler/rustc_parse/src/parser/tests.rs | 125 ++++++++++++----------- 1 file changed, 63 insertions(+), 62 deletions(-) diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs index 5b2d119e42b45..ee2d39b03bd7a 100644 --- a/compiler/rustc_parse/src/parser/tests.rs +++ b/compiler/rustc_parse/src/parser/tests.rs @@ -1376,12 +1376,13 @@ fn ttdelim_span() { }); } -// Uses a macro rather than a function so that failure messages mention the -// correct line in the test function. -macro_rules! look { - ($p:ident, $dist:literal, $kind:expr) => { - $p.look_ahead($dist, |tok| assert_eq!($kind, tok.kind)); - }; +#[track_caller] +fn look(p: &Parser<'_>, dist: usize, kind: rustc_ast::token::TokenKind) { + // Do the `assert_eq` outside the closure so that `track_caller` works. + // (`#![feature(closure_track_caller)]` + `#[track_caller]` on the closure + // doesn't give the line number in the test below if the assertion fails.) + let tok = p.look_ahead(dist, |tok| tok.clone()); + assert_eq!(kind, tok.kind); } #[test] @@ -1397,63 +1398,63 @@ fn look_ahead() { let mut p = string_to_parser(&psess, "fn f(x: u32) { x } struct S;".to_string()); // Current position is the `fn`. - look!(p, 0, token::Ident(kw::Fn, raw_no)); - look!(p, 1, token::Ident(sym_f, raw_no)); - look!(p, 2, token::OpenDelim(Delimiter::Parenthesis)); - look!(p, 3, token::Ident(sym_x, raw_no)); - look!(p, 4, token::Colon); - look!(p, 5, token::Ident(sym::u32, raw_no)); - look!(p, 6, token::CloseDelim(Delimiter::Parenthesis)); - look!(p, 7, token::OpenDelim(Delimiter::Brace)); - look!(p, 8, token::Ident(sym_x, raw_no)); - look!(p, 9, token::CloseDelim(Delimiter::Brace)); - look!(p, 10, token::Ident(kw::Struct, raw_no)); - look!(p, 11, token::Ident(sym_S, raw_no)); - look!(p, 12, token::Semi); + look(&p, 0, token::Ident(kw::Fn, raw_no)); + look(&p, 1, token::Ident(sym_f, raw_no)); + look(&p, 2, token::OpenDelim(Delimiter::Parenthesis)); + look(&p, 3, token::Ident(sym_x, raw_no)); + look(&p, 4, token::Colon); + look(&p, 5, token::Ident(sym::u32, raw_no)); + look(&p, 6, token::CloseDelim(Delimiter::Parenthesis)); + look(&p, 7, token::OpenDelim(Delimiter::Brace)); + look(&p, 8, token::Ident(sym_x, raw_no)); + look(&p, 9, token::CloseDelim(Delimiter::Brace)); + look(&p, 10, token::Ident(kw::Struct, raw_no)); + look(&p, 11, token::Ident(sym_S, raw_no)); + look(&p, 12, token::Semi); // Any lookahead past the end of the token stream returns `Eof`. - look!(p, 13, token::Eof); - look!(p, 14, token::Eof); - look!(p, 15, token::Eof); - look!(p, 100, token::Eof); + look(&p, 13, token::Eof); + look(&p, 14, token::Eof); + look(&p, 15, token::Eof); + look(&p, 100, token::Eof); // Move forward to the first `x`. for _ in 0..3 { p.bump(); } - look!(p, 0, token::Ident(sym_x, raw_no)); - look!(p, 1, token::Colon); - look!(p, 2, token::Ident(sym::u32, raw_no)); - look!(p, 3, token::CloseDelim(Delimiter::Parenthesis)); - look!(p, 4, token::OpenDelim(Delimiter::Brace)); - look!(p, 5, token::Ident(sym_x, raw_no)); - look!(p, 6, token::CloseDelim(Delimiter::Brace)); - look!(p, 7, token::Ident(kw::Struct, raw_no)); - look!(p, 8, token::Ident(sym_S, raw_no)); - look!(p, 9, token::Semi); - look!(p, 10, token::Eof); - look!(p, 11, token::Eof); - look!(p, 100, token::Eof); + look(&p, 0, token::Ident(sym_x, raw_no)); + look(&p, 1, token::Colon); + look(&p, 2, token::Ident(sym::u32, raw_no)); + look(&p, 3, token::CloseDelim(Delimiter::Parenthesis)); + look(&p, 4, token::OpenDelim(Delimiter::Brace)); + look(&p, 5, token::Ident(sym_x, raw_no)); + look(&p, 6, token::CloseDelim(Delimiter::Brace)); + look(&p, 7, token::Ident(kw::Struct, raw_no)); + look(&p, 8, token::Ident(sym_S, raw_no)); + look(&p, 9, token::Semi); + look(&p, 10, token::Eof); + look(&p, 11, token::Eof); + look(&p, 100, token::Eof); // Move forward to the `;`. for _ in 0..9 { p.bump(); } - look!(p, 0, token::Semi); + look(&p, 0, token::Semi); // Any lookahead past the end of the token stream returns `Eof`. - look!(p, 1, token::Eof); - look!(p, 100, token::Eof); + look(&p, 1, token::Eof); + look(&p, 100, token::Eof); // Move one past the `;`, i.e. past the end of the token stream. p.bump(); - look!(p, 0, token::Eof); - look!(p, 1, token::Eof); - look!(p, 100, token::Eof); + look(&p, 0, token::Eof); + look(&p, 1, token::Eof); + look(&p, 100, token::Eof); // Bumping after Eof is idempotent. p.bump(); - look!(p, 0, token::Eof); - look!(p, 1, token::Eof); - look!(p, 100, token::Eof); + look(&p, 0, token::Eof); + look(&p, 1, token::Eof); + look(&p, 100, token::Eof); }); } @@ -1476,24 +1477,24 @@ fn look_ahead_non_outermost_stream() { for _ in 0..3 { p.bump(); } - look!(p, 0, token::Ident(kw::Fn, raw_no)); - look!(p, 1, token::Ident(sym_f, raw_no)); - look!(p, 2, token::OpenDelim(Delimiter::Parenthesis)); - look!(p, 3, token::Ident(sym_x, raw_no)); - look!(p, 4, token::Colon); - look!(p, 5, token::Ident(sym::u32, raw_no)); - look!(p, 6, token::CloseDelim(Delimiter::Parenthesis)); - look!(p, 7, token::OpenDelim(Delimiter::Brace)); - look!(p, 8, token::Ident(sym_x, raw_no)); - look!(p, 9, token::CloseDelim(Delimiter::Brace)); - look!(p, 10, token::Ident(kw::Struct, raw_no)); - look!(p, 11, token::Ident(sym_S, raw_no)); - look!(p, 12, token::Semi); - look!(p, 13, token::CloseDelim(Delimiter::Brace)); + look(&p, 0, token::Ident(kw::Fn, raw_no)); + look(&p, 1, token::Ident(sym_f, raw_no)); + look(&p, 2, token::OpenDelim(Delimiter::Parenthesis)); + look(&p, 3, token::Ident(sym_x, raw_no)); + look(&p, 4, token::Colon); + look(&p, 5, token::Ident(sym::u32, raw_no)); + look(&p, 6, token::CloseDelim(Delimiter::Parenthesis)); + look(&p, 7, token::OpenDelim(Delimiter::Brace)); + look(&p, 8, token::Ident(sym_x, raw_no)); + look(&p, 9, token::CloseDelim(Delimiter::Brace)); + look(&p, 10, token::Ident(kw::Struct, raw_no)); + look(&p, 11, token::Ident(sym_S, raw_no)); + look(&p, 12, token::Semi); + look(&p, 13, token::CloseDelim(Delimiter::Brace)); // Any lookahead past the end of the token stream returns `Eof`. - look!(p, 14, token::Eof); - look!(p, 15, token::Eof); - look!(p, 100, token::Eof); + look(&p, 14, token::Eof); + look(&p, 15, token::Eof); + look(&p, 100, token::Eof); }); } From cf2dfb2ced7862077ceff468c679f44d1e6d56cc Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Sat, 13 Jul 2024 13:17:14 +1000 Subject: [PATCH 2/3] Add a test for `Parser::debug_lookahead`. That method is currently badly broken, and the test output reflects this. The obtained tokens list is always empty, except in the case where we go two `bump`s past the final token, whereupon it will produce as many `Eof` tokens as asked for. --- compiler/rustc_parse/src/parser/tests.rs | 149 +++++++++++++++++++++++ 1 file changed, 149 insertions(+) diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs index ee2d39b03bd7a..8703d5b849053 100644 --- a/compiler/rustc_parse/src/parser/tests.rs +++ b/compiler/rustc_parse/src/parser/tests.rs @@ -1498,6 +1498,155 @@ fn look_ahead_non_outermost_stream() { }); } +// FIXME(nnethercote) All the output is currently wrong. +#[test] +fn debug_lookahead() { + create_default_session_globals_then(|| { + let psess = psess(); + let mut p = string_to_parser(&psess, "fn f(x: u32) { x } struct S;".to_string()); + + // Current position is the `fn`. + assert_eq!( + &format!("{:#?}", p.debug_lookahead(0)), + "Parser { + prev_token: Token { + kind: Question, + span: Span { + lo: BytePos( + 0, + ), + hi: BytePos( + 0, + ), + ctxt: #0, + }, + }, + tokens: [], + approx_token_stream_pos: 1, + .. +}" + ); + assert_eq!( + &format!("{:#?}", p.debug_lookahead(7)), + "Parser { + prev_token: Token { + kind: Question, + span: Span { + lo: BytePos( + 0, + ), + hi: BytePos( + 0, + ), + ctxt: #0, + }, + }, + tokens: [], + approx_token_stream_pos: 1, + .. +}" + ); + assert_eq!( + &format!("{:#?}", p.debug_lookahead(15)), + "Parser { + prev_token: Token { + kind: Question, + span: Span { + lo: BytePos( + 0, + ), + hi: BytePos( + 0, + ), + ctxt: #0, + }, + }, + tokens: [], + approx_token_stream_pos: 1, + .. +}" + ); + + // Move forward to the second `x`. + for _ in 0..8 { + p.bump(); + } + assert_eq!( + &format!("{:#?}", p.debug_lookahead(1)), + "Parser { + prev_token: Token { + kind: OpenDelim( + Brace, + ), + span: Span { + lo: BytePos( + 13, + ), + hi: BytePos( + 14, + ), + ctxt: #0, + }, + }, + tokens: [], + approx_token_stream_pos: 9, + .. +}" + ); + assert_eq!( + &format!("{:#?}", p.debug_lookahead(4)), + "Parser { + prev_token: Token { + kind: OpenDelim( + Brace, + ), + span: Span { + lo: BytePos( + 13, + ), + hi: BytePos( + 14, + ), + ctxt: #0, + }, + }, + tokens: [], + approx_token_stream_pos: 9, + .. +}" + ); + + // Move two past the final token (the `;`). + for _ in 0..6 { + p.bump(); + } + assert_eq!( + &format!("{:#?}", p.debug_lookahead(3)), + "Parser { + prev_token: Token { + kind: Eof, + span: Span { + lo: BytePos( + 27, + ), + hi: BytePos( + 28, + ), + ctxt: #0, + }, + }, + tokens: [ + Eof, + Eof, + Eof, + ], + approx_token_stream_pos: 15, + .. +}" + ); + }); +} + // This tests that when parsing a string (rather than a file) we don't try // and read in a file for a module declaration and just parse a stub. // See `recurse_into_file_modules` in the parser. From aa0e8e147588c75fbb6ad158140c890582886027 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Sat, 13 Jul 2024 15:53:31 +1000 Subject: [PATCH 3/3] Fix `DebugParser`. It currently doesn't work at all. This commit changes it to a simpler imperative style that produces a valid `tokens` vec. (An aside: I find `Iterator::scan` to be a pretty wretched function, that produces code which is very hard to understand. Probably why this is just one of two uses of it in the entire compiler.) --- compiler/rustc_parse/src/parser/mod.rs | 18 ++-- compiler/rustc_parse/src/parser/tests.rs | 100 +++++++++++++++++++++-- 2 files changed, 104 insertions(+), 14 deletions(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index ef9b3aabc61ca..6586baae00a22 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -1537,14 +1537,16 @@ impl<'a> Parser<'a> { // we don't need N spans, but we want at least one, so print all of prev_token dbg_fmt.field("prev_token", &parser.prev_token); - // make it easier to peek farther ahead by taking TokenKinds only until EOF - let tokens = (0..*lookahead) - .map(|i| parser.look_ahead(i, |tok| tok.kind.clone())) - .scan(parser.prev_token == TokenKind::Eof, |eof, tok| { - let current = eof.then_some(tok.clone()); // include a trailing EOF token - *eof |= &tok == &TokenKind::Eof; - current - }); + let mut tokens = vec![]; + for i in 0..*lookahead { + let tok = parser.look_ahead(i, |tok| tok.kind.clone()); + let is_eof = tok == TokenKind::Eof; + tokens.push(tok); + if is_eof { + // Don't look ahead past EOF. + break; + } + } dbg_fmt.field_with("tokens", |field| field.debug_list().entries(tokens).finish()); dbg_fmt.field("approx_token_stream_pos", &parser.num_bump_calls); diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs index 8703d5b849053..cf791d332a2fc 100644 --- a/compiler/rustc_parse/src/parser/tests.rs +++ b/compiler/rustc_parse/src/parser/tests.rs @@ -1541,11 +1541,36 @@ fn debug_lookahead() { ctxt: #0, }, }, - tokens: [], + tokens: [ + Ident( + \"fn\", + No, + ), + Ident( + \"f\", + No, + ), + OpenDelim( + Parenthesis, + ), + Ident( + \"x\", + No, + ), + Colon, + Ident( + \"u32\", + No, + ), + CloseDelim( + Parenthesis, + ), + ], approx_token_stream_pos: 1, .. }" ); + // There are 13 tokens. We request 15, get 14; the last one is `Eof`. assert_eq!( &format!("{:#?}", p.debug_lookahead(15)), "Parser { @@ -1561,7 +1586,51 @@ fn debug_lookahead() { ctxt: #0, }, }, - tokens: [], + tokens: [ + Ident( + \"fn\", + No, + ), + Ident( + \"f\", + No, + ), + OpenDelim( + Parenthesis, + ), + Ident( + \"x\", + No, + ), + Colon, + Ident( + \"u32\", + No, + ), + CloseDelim( + Parenthesis, + ), + OpenDelim( + Brace, + ), + Ident( + \"x\", + No, + ), + CloseDelim( + Brace, + ), + Ident( + \"struct\", + No, + ), + Ident( + \"S\", + No, + ), + Semi, + Eof, + ], approx_token_stream_pos: 1, .. }" @@ -1588,7 +1657,12 @@ fn debug_lookahead() { ctxt: #0, }, }, - tokens: [], + tokens: [ + Ident( + \"x\", + No, + ), + ], approx_token_stream_pos: 9, .. }" @@ -1610,7 +1684,23 @@ fn debug_lookahead() { ctxt: #0, }, }, - tokens: [], + tokens: [ + Ident( + \"x\", + No, + ), + CloseDelim( + Brace, + ), + Ident( + \"struct\", + No, + ), + Ident( + \"S\", + No, + ), + ], approx_token_stream_pos: 9, .. }" @@ -1637,8 +1727,6 @@ fn debug_lookahead() { }, tokens: [ Eof, - Eof, - Eof, ], approx_token_stream_pos: 15, ..