From 69f6145ad82e931f730cca81f12a369d894121a6 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Sat, 13 Jul 2024 11:15:40 +1000
Subject: [PATCH 1/3] Change `look` from a macro to a function.

Using `#[track_caller]` works if the assertion is moved outside of the
closure.
---
 compiler/rustc_parse/src/parser/tests.rs | 125 ++++++++++++-----------
 1 file changed, 63 insertions(+), 62 deletions(-)

diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs
index 5b2d119e42b45..ee2d39b03bd7a 100644
--- a/compiler/rustc_parse/src/parser/tests.rs
+++ b/compiler/rustc_parse/src/parser/tests.rs
@@ -1376,12 +1376,13 @@ fn ttdelim_span() {
     });
 }
 
-// Uses a macro rather than a function so that failure messages mention the
-// correct line in the test function.
-macro_rules! look {
-    ($p:ident, $dist:literal, $kind:expr) => {
-        $p.look_ahead($dist, |tok| assert_eq!($kind, tok.kind));
-    };
+#[track_caller]
+fn look(p: &Parser<'_>, dist: usize, kind: rustc_ast::token::TokenKind) {
+    // Do the `assert_eq` outside the closure so that `track_caller` works.
+    // (`#![feature(closure_track_caller)]` + `#[track_caller]` on the closure
+    // doesn't give the line number in the test below if the assertion fails.)
+    let tok = p.look_ahead(dist, |tok| tok.clone());
+    assert_eq!(kind, tok.kind);
 }
 
 #[test]
@@ -1397,63 +1398,63 @@ fn look_ahead() {
         let mut p = string_to_parser(&psess, "fn f(x: u32) { x } struct S;".to_string());
 
         // Current position is the `fn`.
-        look!(p, 0, token::Ident(kw::Fn, raw_no));
-        look!(p, 1, token::Ident(sym_f, raw_no));
-        look!(p, 2, token::OpenDelim(Delimiter::Parenthesis));
-        look!(p, 3, token::Ident(sym_x, raw_no));
-        look!(p, 4, token::Colon);
-        look!(p, 5, token::Ident(sym::u32, raw_no));
-        look!(p, 6, token::CloseDelim(Delimiter::Parenthesis));
-        look!(p, 7, token::OpenDelim(Delimiter::Brace));
-        look!(p, 8, token::Ident(sym_x, raw_no));
-        look!(p, 9, token::CloseDelim(Delimiter::Brace));
-        look!(p, 10, token::Ident(kw::Struct, raw_no));
-        look!(p, 11, token::Ident(sym_S, raw_no));
-        look!(p, 12, token::Semi);
+        look(&p, 0, token::Ident(kw::Fn, raw_no));
+        look(&p, 1, token::Ident(sym_f, raw_no));
+        look(&p, 2, token::OpenDelim(Delimiter::Parenthesis));
+        look(&p, 3, token::Ident(sym_x, raw_no));
+        look(&p, 4, token::Colon);
+        look(&p, 5, token::Ident(sym::u32, raw_no));
+        look(&p, 6, token::CloseDelim(Delimiter::Parenthesis));
+        look(&p, 7, token::OpenDelim(Delimiter::Brace));
+        look(&p, 8, token::Ident(sym_x, raw_no));
+        look(&p, 9, token::CloseDelim(Delimiter::Brace));
+        look(&p, 10, token::Ident(kw::Struct, raw_no));
+        look(&p, 11, token::Ident(sym_S, raw_no));
+        look(&p, 12, token::Semi);
         // Any lookahead past the end of the token stream returns `Eof`.
-        look!(p, 13, token::Eof);
-        look!(p, 14, token::Eof);
-        look!(p, 15, token::Eof);
-        look!(p, 100, token::Eof);
+        look(&p, 13, token::Eof);
+        look(&p, 14, token::Eof);
+        look(&p, 15, token::Eof);
+        look(&p, 100, token::Eof);
 
         // Move forward to the first `x`.
         for _ in 0..3 {
             p.bump();
         }
-        look!(p, 0, token::Ident(sym_x, raw_no));
-        look!(p, 1, token::Colon);
-        look!(p, 2, token::Ident(sym::u32, raw_no));
-        look!(p, 3, token::CloseDelim(Delimiter::Parenthesis));
-        look!(p, 4, token::OpenDelim(Delimiter::Brace));
-        look!(p, 5, token::Ident(sym_x, raw_no));
-        look!(p, 6, token::CloseDelim(Delimiter::Brace));
-        look!(p, 7, token::Ident(kw::Struct, raw_no));
-        look!(p, 8, token::Ident(sym_S, raw_no));
-        look!(p, 9, token::Semi);
-        look!(p, 10, token::Eof);
-        look!(p, 11, token::Eof);
-        look!(p, 100, token::Eof);
+        look(&p, 0, token::Ident(sym_x, raw_no));
+        look(&p, 1, token::Colon);
+        look(&p, 2, token::Ident(sym::u32, raw_no));
+        look(&p, 3, token::CloseDelim(Delimiter::Parenthesis));
+        look(&p, 4, token::OpenDelim(Delimiter::Brace));
+        look(&p, 5, token::Ident(sym_x, raw_no));
+        look(&p, 6, token::CloseDelim(Delimiter::Brace));
+        look(&p, 7, token::Ident(kw::Struct, raw_no));
+        look(&p, 8, token::Ident(sym_S, raw_no));
+        look(&p, 9, token::Semi);
+        look(&p, 10, token::Eof);
+        look(&p, 11, token::Eof);
+        look(&p, 100, token::Eof);
 
         // Move forward to the `;`.
         for _ in 0..9 {
             p.bump();
         }
-        look!(p, 0, token::Semi);
+        look(&p, 0, token::Semi);
         // Any lookahead past the end of the token stream returns `Eof`.
-        look!(p, 1, token::Eof);
-        look!(p, 100, token::Eof);
+        look(&p, 1, token::Eof);
+        look(&p, 100, token::Eof);
 
         // Move one past the `;`, i.e. past the end of the token stream.
         p.bump();
-        look!(p, 0, token::Eof);
-        look!(p, 1, token::Eof);
-        look!(p, 100, token::Eof);
+        look(&p, 0, token::Eof);
+        look(&p, 1, token::Eof);
+        look(&p, 100, token::Eof);
 
         // Bumping after Eof is idempotent.
         p.bump();
-        look!(p, 0, token::Eof);
-        look!(p, 1, token::Eof);
-        look!(p, 100, token::Eof);
+        look(&p, 0, token::Eof);
+        look(&p, 1, token::Eof);
+        look(&p, 100, token::Eof);
     });
 }
 
@@ -1476,24 +1477,24 @@ fn look_ahead_non_outermost_stream() {
         for _ in 0..3 {
             p.bump();
         }
-        look!(p, 0, token::Ident(kw::Fn, raw_no));
-        look!(p, 1, token::Ident(sym_f, raw_no));
-        look!(p, 2, token::OpenDelim(Delimiter::Parenthesis));
-        look!(p, 3, token::Ident(sym_x, raw_no));
-        look!(p, 4, token::Colon);
-        look!(p, 5, token::Ident(sym::u32, raw_no));
-        look!(p, 6, token::CloseDelim(Delimiter::Parenthesis));
-        look!(p, 7, token::OpenDelim(Delimiter::Brace));
-        look!(p, 8, token::Ident(sym_x, raw_no));
-        look!(p, 9, token::CloseDelim(Delimiter::Brace));
-        look!(p, 10, token::Ident(kw::Struct, raw_no));
-        look!(p, 11, token::Ident(sym_S, raw_no));
-        look!(p, 12, token::Semi);
-        look!(p, 13, token::CloseDelim(Delimiter::Brace));
+        look(&p, 0, token::Ident(kw::Fn, raw_no));
+        look(&p, 1, token::Ident(sym_f, raw_no));
+        look(&p, 2, token::OpenDelim(Delimiter::Parenthesis));
+        look(&p, 3, token::Ident(sym_x, raw_no));
+        look(&p, 4, token::Colon);
+        look(&p, 5, token::Ident(sym::u32, raw_no));
+        look(&p, 6, token::CloseDelim(Delimiter::Parenthesis));
+        look(&p, 7, token::OpenDelim(Delimiter::Brace));
+        look(&p, 8, token::Ident(sym_x, raw_no));
+        look(&p, 9, token::CloseDelim(Delimiter::Brace));
+        look(&p, 10, token::Ident(kw::Struct, raw_no));
+        look(&p, 11, token::Ident(sym_S, raw_no));
+        look(&p, 12, token::Semi);
+        look(&p, 13, token::CloseDelim(Delimiter::Brace));
         // Any lookahead past the end of the token stream returns `Eof`.
-        look!(p, 14, token::Eof);
-        look!(p, 15, token::Eof);
-        look!(p, 100, token::Eof);
+        look(&p, 14, token::Eof);
+        look(&p, 15, token::Eof);
+        look(&p, 100, token::Eof);
     });
 }
 

From cf2dfb2ced7862077ceff468c679f44d1e6d56cc Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Sat, 13 Jul 2024 13:17:14 +1000
Subject: [PATCH 2/3] Add a test for `Parser::debug_lookahead`.

That method is currently badly broken, and the test output reflects
this. The obtained tokens list is always empty, except in the case where
we go two `bump`s past the final token, whereupon it will produce as
many `Eof` tokens as asked for.
---
 compiler/rustc_parse/src/parser/tests.rs | 149 +++++++++++++++++++++++
 1 file changed, 149 insertions(+)

diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs
index ee2d39b03bd7a..8703d5b849053 100644
--- a/compiler/rustc_parse/src/parser/tests.rs
+++ b/compiler/rustc_parse/src/parser/tests.rs
@@ -1498,6 +1498,155 @@ fn look_ahead_non_outermost_stream() {
     });
 }
 
+// FIXME(nnethercote) All the output is currently wrong.
+#[test]
+fn debug_lookahead() {
+    create_default_session_globals_then(|| {
+        let psess = psess();
+        let mut p = string_to_parser(&psess, "fn f(x: u32) { x } struct S;".to_string());
+
+        // Current position is the `fn`.
+        assert_eq!(
+            &format!("{:#?}", p.debug_lookahead(0)),
+            "Parser {
+    prev_token: Token {
+        kind: Question,
+        span: Span {
+            lo: BytePos(
+                0,
+            ),
+            hi: BytePos(
+                0,
+            ),
+            ctxt: #0,
+        },
+    },
+    tokens: [],
+    approx_token_stream_pos: 1,
+    ..
+}"
+        );
+        assert_eq!(
+            &format!("{:#?}", p.debug_lookahead(7)),
+            "Parser {
+    prev_token: Token {
+        kind: Question,
+        span: Span {
+            lo: BytePos(
+                0,
+            ),
+            hi: BytePos(
+                0,
+            ),
+            ctxt: #0,
+        },
+    },
+    tokens: [],
+    approx_token_stream_pos: 1,
+    ..
+}"
+        );
+        assert_eq!(
+            &format!("{:#?}", p.debug_lookahead(15)),
+            "Parser {
+    prev_token: Token {
+        kind: Question,
+        span: Span {
+            lo: BytePos(
+                0,
+            ),
+            hi: BytePos(
+                0,
+            ),
+            ctxt: #0,
+        },
+    },
+    tokens: [],
+    approx_token_stream_pos: 1,
+    ..
+}"
+        );
+
+        // Move forward to the second `x`.
+        for _ in 0..8 {
+            p.bump();
+        }
+        assert_eq!(
+            &format!("{:#?}", p.debug_lookahead(1)),
+            "Parser {
+    prev_token: Token {
+        kind: OpenDelim(
+            Brace,
+        ),
+        span: Span {
+            lo: BytePos(
+                13,
+            ),
+            hi: BytePos(
+                14,
+            ),
+            ctxt: #0,
+        },
+    },
+    tokens: [],
+    approx_token_stream_pos: 9,
+    ..
+}"
+        );
+        assert_eq!(
+            &format!("{:#?}", p.debug_lookahead(4)),
+            "Parser {
+    prev_token: Token {
+        kind: OpenDelim(
+            Brace,
+        ),
+        span: Span {
+            lo: BytePos(
+                13,
+            ),
+            hi: BytePos(
+                14,
+            ),
+            ctxt: #0,
+        },
+    },
+    tokens: [],
+    approx_token_stream_pos: 9,
+    ..
+}"
+        );
+
+        // Move two past the final token (the `;`).
+        for _ in 0..6 {
+            p.bump();
+        }
+        assert_eq!(
+            &format!("{:#?}", p.debug_lookahead(3)),
+            "Parser {
+    prev_token: Token {
+        kind: Eof,
+        span: Span {
+            lo: BytePos(
+                27,
+            ),
+            hi: BytePos(
+                28,
+            ),
+            ctxt: #0,
+        },
+    },
+    tokens: [
+        Eof,
+        Eof,
+        Eof,
+    ],
+    approx_token_stream_pos: 15,
+    ..
+}"
+        );
+    });
+}
+
 // This tests that when parsing a string (rather than a file) we don't try
 // and read in a file for a module declaration and just parse a stub.
 // See `recurse_into_file_modules` in the parser.

From aa0e8e147588c75fbb6ad158140c890582886027 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Sat, 13 Jul 2024 15:53:31 +1000
Subject: [PATCH 3/3] Fix `DebugParser`.

It currently doesn't work at all. This commit changes it to a simpler
imperative style that produces a valid `tokens` vec.

(An aside: I find `Iterator::scan` to be a pretty wretched function,
that produces code which is very hard to understand. Probably why this
is just one of two uses of it in the entire compiler.)
---
 compiler/rustc_parse/src/parser/mod.rs   |  18 ++--
 compiler/rustc_parse/src/parser/tests.rs | 100 +++++++++++++++++++++--
 2 files changed, 104 insertions(+), 14 deletions(-)

diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index ef9b3aabc61ca..6586baae00a22 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -1537,14 +1537,16 @@ impl<'a> Parser<'a> {
 
                 // we don't need N spans, but we want at least one, so print all of prev_token
                 dbg_fmt.field("prev_token", &parser.prev_token);
-                // make it easier to peek farther ahead by taking TokenKinds only until EOF
-                let tokens = (0..*lookahead)
-                    .map(|i| parser.look_ahead(i, |tok| tok.kind.clone()))
-                    .scan(parser.prev_token == TokenKind::Eof, |eof, tok| {
-                        let current = eof.then_some(tok.clone()); // include a trailing EOF token
-                        *eof |= &tok == &TokenKind::Eof;
-                        current
-                    });
+                let mut tokens = vec![];
+                for i in 0..*lookahead {
+                    let tok = parser.look_ahead(i, |tok| tok.kind.clone());
+                    let is_eof = tok == TokenKind::Eof;
+                    tokens.push(tok);
+                    if is_eof {
+                        // Don't look ahead past EOF.
+                        break;
+                    }
+                }
                 dbg_fmt.field_with("tokens", |field| field.debug_list().entries(tokens).finish());
                 dbg_fmt.field("approx_token_stream_pos", &parser.num_bump_calls);
 
diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs
index 8703d5b849053..cf791d332a2fc 100644
--- a/compiler/rustc_parse/src/parser/tests.rs
+++ b/compiler/rustc_parse/src/parser/tests.rs
@@ -1541,11 +1541,36 @@ fn debug_lookahead() {
             ctxt: #0,
         },
     },
-    tokens: [],
+    tokens: [
+        Ident(
+            \"fn\",
+            No,
+        ),
+        Ident(
+            \"f\",
+            No,
+        ),
+        OpenDelim(
+            Parenthesis,
+        ),
+        Ident(
+            \"x\",
+            No,
+        ),
+        Colon,
+        Ident(
+            \"u32\",
+            No,
+        ),
+        CloseDelim(
+            Parenthesis,
+        ),
+    ],
     approx_token_stream_pos: 1,
     ..
 }"
         );
+        // There are 13 tokens. We request 15, get 14; the last one is `Eof`.
         assert_eq!(
             &format!("{:#?}", p.debug_lookahead(15)),
             "Parser {
@@ -1561,7 +1586,51 @@ fn debug_lookahead() {
             ctxt: #0,
         },
     },
-    tokens: [],
+    tokens: [
+        Ident(
+            \"fn\",
+            No,
+        ),
+        Ident(
+            \"f\",
+            No,
+        ),
+        OpenDelim(
+            Parenthesis,
+        ),
+        Ident(
+            \"x\",
+            No,
+        ),
+        Colon,
+        Ident(
+            \"u32\",
+            No,
+        ),
+        CloseDelim(
+            Parenthesis,
+        ),
+        OpenDelim(
+            Brace,
+        ),
+        Ident(
+            \"x\",
+            No,
+        ),
+        CloseDelim(
+            Brace,
+        ),
+        Ident(
+            \"struct\",
+            No,
+        ),
+        Ident(
+            \"S\",
+            No,
+        ),
+        Semi,
+        Eof,
+    ],
     approx_token_stream_pos: 1,
     ..
 }"
@@ -1588,7 +1657,12 @@ fn debug_lookahead() {
             ctxt: #0,
         },
     },
-    tokens: [],
+    tokens: [
+        Ident(
+            \"x\",
+            No,
+        ),
+    ],
     approx_token_stream_pos: 9,
     ..
 }"
@@ -1610,7 +1684,23 @@ fn debug_lookahead() {
             ctxt: #0,
         },
     },
-    tokens: [],
+    tokens: [
+        Ident(
+            \"x\",
+            No,
+        ),
+        CloseDelim(
+            Brace,
+        ),
+        Ident(
+            \"struct\",
+            No,
+        ),
+        Ident(
+            \"S\",
+            No,
+        ),
+    ],
     approx_token_stream_pos: 9,
     ..
 }"
@@ -1637,8 +1727,6 @@ fn debug_lookahead() {
     },
     tokens: [
         Eof,
-        Eof,
-        Eof,
     ],
     approx_token_stream_pos: 15,
     ..