diff --git a/compiler/rustc_next_trait_solver/src/solve/search_graph.rs b/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
index 0994d0e3b3d88..81c89fad8e8a0 100644
--- a/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
@@ -2,12 +2,12 @@ use std::convert::Infallible;
 use std::marker::PhantomData;
 
 use rustc_type_ir::inherent::*;
-use rustc_type_ir::search_graph::{self, CycleKind, UsageKind};
+use rustc_type_ir::search_graph::{self, PathKind};
 use rustc_type_ir::solve::{CanonicalInput, Certainty, QueryResult};
 use rustc_type_ir::Interner;
 
 use super::inspect::ProofTreeBuilder;
-use super::FIXPOINT_STEP_LIMIT;
+use super::{has_no_inference_or_external_constraints, FIXPOINT_STEP_LIMIT};
 use crate::delegate::SolverDelegate;
 
 /// This type is never constructed. We only use it to implement `search_graph::Delegate`
@@ -23,10 +23,11 @@ where
 {
     type Cx = D::Interner;
 
+    const ENABLE_PROVISIONAL_CACHE: bool = true;
     type ValidationScope = Infallible;
     fn enter_validation_scope(
         _cx: Self::Cx,
-        _input: <Self::Cx as search_graph::Cx>::Input,
+        _input: CanonicalInput<I>,
     ) -> Option<Self::ValidationScope> {
         None
     }
@@ -38,39 +39,32 @@ where
         inspect.is_noop()
     }
 
+    const DIVIDE_AVAILABLE_DEPTH_ON_OVERFLOW: usize = 4;
     fn recursion_limit(cx: I) -> usize {
         cx.recursion_limit()
     }
 
     fn initial_provisional_result(
         cx: I,
-        kind: CycleKind,
+        kind: PathKind,
         input: CanonicalInput<I>,
     ) -> QueryResult<I> {
         match kind {
-            CycleKind::Coinductive => response_no_constraints(cx, input, Certainty::Yes),
-            CycleKind::Inductive => response_no_constraints(cx, input, Certainty::overflow(false)),
+            PathKind::Coinductive => response_no_constraints(cx, input, Certainty::Yes),
+            PathKind::Inductive => response_no_constraints(cx, input, Certainty::overflow(false)),
         }
     }
 
-    fn reached_fixpoint(
-        cx: I,
-        kind: UsageKind,
+    fn is_initial_provisional_result(
+        cx: Self::Cx,
+        kind: PathKind,
         input: CanonicalInput<I>,
-        provisional_result: Option<QueryResult<I>>,
         result: QueryResult<I>,
     ) -> bool {
-        if let Some(r) = provisional_result {
-            r == result
-        } else {
-            match kind {
-                UsageKind::Single(CycleKind::Coinductive) => {
-                    response_no_constraints(cx, input, Certainty::Yes) == result
-                }
-                UsageKind::Single(CycleKind::Inductive) => {
-                    response_no_constraints(cx, input, Certainty::overflow(false)) == result
-                }
-                UsageKind::Mixed => false,
+        match kind {
+            PathKind::Coinductive => response_no_constraints(cx, input, Certainty::Yes) == result,
+            PathKind::Inductive => {
+                response_no_constraints(cx, input, Certainty::overflow(false)) == result
             }
         }
     }
@@ -88,6 +82,22 @@ where
         response_no_constraints(cx, input, Certainty::overflow(false))
     }
 
+    fn is_ambiguous_result(result: QueryResult<I>) -> bool {
+        result.is_ok_and(|response| {
+            has_no_inference_or_external_constraints(response)
+                && matches!(response.value.certainty, Certainty::Maybe(_))
+        })
+    }
+
+    fn propagate_ambiguity(
+        cx: I,
+        for_input: CanonicalInput<I>,
+        from_result: QueryResult<I>,
+    ) -> QueryResult<I> {
+        let certainty = from_result.unwrap().value.certainty;
+        response_no_constraints(cx, for_input, certainty)
+    }
+
     fn step_is_coinductive(cx: I, input: CanonicalInput<I>) -> bool {
         input.value.goal.predicate.is_coinductive(cx)
     }
diff --git a/compiler/rustc_type_ir/src/search_graph/global_cache.rs b/compiler/rustc_type_ir/src/search_graph/global_cache.rs
index d63a8d16bea73..47f7cefac6ad1 100644
--- a/compiler/rustc_type_ir/src/search_graph/global_cache.rs
+++ b/compiler/rustc_type_ir/src/search_graph/global_cache.rs
@@ -1,12 +1,17 @@
 use derive_where::derive_where;
-use rustc_index::IndexVec;
 
-use super::{AvailableDepth, Cx, StackDepth, StackEntry};
-use crate::data_structures::{HashMap, HashSet};
+use super::{AvailableDepth, Cx, NestedGoals};
+use crate::data_structures::HashMap;
 
 struct Success<X: Cx> {
-    result: X::Tracked<X::Result>,
     additional_depth: usize,
+    nested_goals: NestedGoals<X>,
+    result: X::Tracked<X::Result>,
+}
+
+struct WithOverflow<X: Cx> {
+    nested_goals: NestedGoals<X>,
+    result: X::Tracked<X::Result>,
 }
 
 /// The cache entry for a given input.
@@ -17,12 +22,7 @@ struct Success<X: Cx> {
 #[derive_where(Default; X: Cx)]
 struct CacheEntry<X: Cx> {
     success: Option<Success<X>>,
-    /// We have to be careful when caching roots of cycles.
-    ///
-    /// See the doc comment of `StackEntry::cycle_participants` for more
-    /// details.
-    nested_goals: HashSet<X::Input>,
-    with_overflow: HashMap<usize, X::Tracked<X::Result>>,
+    with_overflow: HashMap<usize, WithOverflow<X>>,
 }
 
 #[derive_where(Debug; X: Cx)]
@@ -30,10 +30,7 @@ pub(super) struct CacheData<'a, X: Cx> {
     pub(super) result: X::Result,
     pub(super) additional_depth: usize,
     pub(super) encountered_overflow: bool,
-    // FIXME: This is currently unused, but impacts the design
-    // by requiring a closure for `Cx::with_global_cache`.
-    #[allow(dead_code)]
-    pub(super) nested_goals: &'a HashSet<X::Input>,
+    pub(super) nested_goals: &'a NestedGoals<X>,
 }
 #[derive_where(Default; X: Cx)]
 pub struct GlobalCache<X: Cx> {
@@ -52,15 +49,17 @@ impl<X: Cx> GlobalCache<X> {
 
         additional_depth: usize,
         encountered_overflow: bool,
-        nested_goals: &HashSet<X::Input>,
+        nested_goals: NestedGoals<X>,
     ) {
         let result = cx.mk_tracked(result, dep_node);
         let entry = self.map.entry(input).or_default();
-        entry.nested_goals.extend(nested_goals);
         if encountered_overflow {
-            entry.with_overflow.insert(additional_depth, result);
+            let with_overflow = WithOverflow { nested_goals, result };
+            let prev = entry.with_overflow.insert(additional_depth, with_overflow);
+            assert!(prev.is_none());
         } else {
-            entry.success = Some(Success { result, additional_depth });
+            let prev = entry.success.replace(Success { additional_depth, nested_goals, result });
+            assert!(prev.is_none());
         }
     }
 
@@ -72,30 +71,37 @@ impl<X: Cx> GlobalCache<X> {
         &'a self,
         cx: X,
         input: X::Input,
-        stack: &IndexVec<StackDepth, StackEntry<X>>,
         available_depth: AvailableDepth,
+        mut candidate_is_applicable: impl FnMut(&NestedGoals<X>) -> bool,
     ) -> Option<CacheData<'a, X>> {
         let entry = self.map.get(&input)?;
-        if stack.iter().any(|e| entry.nested_goals.contains(&e.input)) {
-            return None;
+        if let Some(Success { additional_depth, ref nested_goals, ref result }) = entry.success {
+            if available_depth.cache_entry_is_applicable(additional_depth)
+                && candidate_is_applicable(nested_goals)
+            {
+                return Some(CacheData {
+                    result: cx.get_tracked(&result),
+                    additional_depth,
+                    encountered_overflow: false,
+                    nested_goals,
+                });
+            }
         }
 
-        if let Some(ref success) = entry.success {
-            if available_depth.cache_entry_is_applicable(success.additional_depth) {
+        let additional_depth = available_depth.0;
+        if let Some(WithOverflow { nested_goals, result }) =
+            entry.with_overflow.get(&additional_depth)
+        {
+            if candidate_is_applicable(nested_goals) {
                 return Some(CacheData {
-                    result: cx.get_tracked(&success.result),
-                    additional_depth: success.additional_depth,
-                    encountered_overflow: false,
-                    nested_goals: &entry.nested_goals,
+                    result: cx.get_tracked(result),
+                    additional_depth,
+                    encountered_overflow: true,
+                    nested_goals,
                 });
             }
         }
 
-        entry.with_overflow.get(&available_depth.0).map(|e| CacheData {
-            result: cx.get_tracked(e),
-            additional_depth: available_depth.0,
-            encountered_overflow: true,
-            nested_goals: &entry.nested_goals,
-        })
+        None
     }
 }
diff --git a/compiler/rustc_type_ir/src/search_graph/mod.rs b/compiler/rustc_type_ir/src/search_graph/mod.rs
index a1aa63c2e7d75..d47c9e725f350 100644
--- a/compiler/rustc_type_ir/src/search_graph/mod.rs
+++ b/compiler/rustc_type_ir/src/search_graph/mod.rs
@@ -1,19 +1,32 @@
+/// The search graph is responsible for caching and cycle detection in the trait
+/// solver. Making sure that caching doesn't result in soundness bugs or unstable
+/// query results is very challenging and makes this one of the most-involved
+/// self-contained components of the compiler.
+///
+/// We added fuzzing support to test its correctness. The fuzzers used to verify
+/// the current implementation can be found in https://github.com/lcnr/search_graph_fuzz.
+///
+/// This is just a quick overview of the general design, please check out the relevant
+/// [rustc-dev-guide chapter](https://rustc-dev-guide.rust-lang.org/solve/caching.html) for
+/// more details. Caching is split between a global cache and the per-cycle `provisional_cache`.
+/// The global cache has to be completely unobservable, while the per-cycle cache may impact
+/// behavior as long as the resulting behavior is still correct.
+use std::cmp::Ordering;
+use std::collections::BTreeSet;
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::marker::PhantomData;
-use std::mem;
 
 use derive_where::derive_where;
 use rustc_index::{Idx, IndexVec};
 use tracing::debug;
 
-use crate::data_structures::{HashMap, HashSet};
+use crate::data_structures::HashMap;
 use crate::solve::SolverMode;
 
 mod global_cache;
 use global_cache::CacheData;
 pub use global_cache::GlobalCache;
-mod validate;
 
 /// The search graph does not simply use `Interner` directly
 /// to enable its fuzzing without having to stub the rest of
@@ -44,6 +57,9 @@ pub trait Cx: Copy {
 
 pub trait Delegate {
     type Cx: Cx;
+    /// Whether to use the provisional cache. Set to `false` by a fuzzer when
+    /// validating the search graph.
+    const ENABLE_PROVISIONAL_CACHE: bool;
     type ValidationScope;
     /// Returning `Some` disables the global cache for the current goal.
     ///
@@ -62,18 +78,18 @@ pub trait Delegate {
     type ProofTreeBuilder;
     fn inspect_is_noop(inspect: &mut Self::ProofTreeBuilder) -> bool;
 
+    const DIVIDE_AVAILABLE_DEPTH_ON_OVERFLOW: usize;
     fn recursion_limit(cx: Self::Cx) -> usize;
 
     fn initial_provisional_result(
         cx: Self::Cx,
-        kind: CycleKind,
+        kind: PathKind,
         input: <Self::Cx as Cx>::Input,
     ) -> <Self::Cx as Cx>::Result;
-    fn reached_fixpoint(
+    fn is_initial_provisional_result(
         cx: Self::Cx,
-        kind: UsageKind,
+        kind: PathKind,
         input: <Self::Cx as Cx>::Input,
-        provisional_result: Option<<Self::Cx as Cx>::Result>,
         result: <Self::Cx as Cx>::Result,
     ) -> bool;
     fn on_stack_overflow(
@@ -86,6 +102,13 @@ pub trait Delegate {
         input: <Self::Cx as Cx>::Input,
     ) -> <Self::Cx as Cx>::Result;
 
+    fn is_ambiguous_result(result: <Self::Cx as Cx>::Result) -> bool;
+    fn propagate_ambiguity(
+        cx: Self::Cx,
+        for_input: <Self::Cx as Cx>::Input,
+        from_result: <Self::Cx as Cx>::Result,
+    ) -> <Self::Cx as Cx>::Result;
+
     fn step_is_coinductive(cx: Self::Cx, input: <Self::Cx as Cx>::Input) -> bool;
 }
 
@@ -93,14 +116,14 @@ pub trait Delegate {
 /// result. In the case we return an initial provisional result depending
 /// on the kind of cycle.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum CycleKind {
+pub enum PathKind {
     Coinductive,
     Inductive,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum UsageKind {
-    Single(CycleKind),
+    Single(PathKind),
     Mixed,
 }
 impl UsageKind {
@@ -116,11 +139,9 @@ impl UsageKind {
             }
         }
     }
-}
-
-enum StepResult<X: Cx> {
-    Done(StackEntry<X>, X::Result),
-    HasChanged,
+    fn and_merge(&mut self, other: Self) {
+        *self = self.merge(other);
+    }
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -142,7 +163,7 @@ impl AvailableDepth {
             }
 
             Some(if last.encountered_overflow {
-                AvailableDepth(last.available_depth.0 / 2)
+                AvailableDepth(last.available_depth.0 / D::DIVIDE_AVAILABLE_DEPTH_ON_OVERFLOW)
             } else {
                 AvailableDepth(last.available_depth.0 - 1)
             })
@@ -158,94 +179,181 @@ impl AvailableDepth {
     }
 }
 
+/// All cycle heads a given goal depends on, ordered by their stack depth.
+///
+/// We therefore pop the cycle heads from highest to lowest.
+#[derive(Clone, Debug, PartialEq, Eq, Default)]
+struct CycleHeads {
+    heads: BTreeSet<StackDepth>,
+}
+
+impl CycleHeads {
+    fn is_empty(&self) -> bool {
+        self.heads.is_empty()
+    }
+
+    fn highest_cycle_head(&self) -> StackDepth {
+        *self.heads.last().unwrap()
+    }
+
+    fn opt_highest_cycle_head(&self) -> Option<StackDepth> {
+        self.heads.last().copied()
+    }
+
+    fn opt_lowest_cycle_head(&self) -> Option<StackDepth> {
+        self.heads.first().copied()
+    }
+
+    fn remove_highest_cycle_head(&mut self) {
+        let last = self.heads.pop_last();
+        debug_assert_ne!(last, None);
+    }
+
+    fn insert(&mut self, head: StackDepth) {
+        self.heads.insert(head);
+    }
+
+    fn merge(&mut self, heads: &CycleHeads) {
+        for &head in heads.heads.iter() {
+            self.insert(head);
+        }
+    }
+
+    /// Update the cycle heads of a goal at depth `this` given the cycle heads
+    /// of a nested goal. This merges the heads after filtering the parent goal
+    /// itself.
+    fn extend_from_child(&mut self, this: StackDepth, child: &CycleHeads) {
+        for &head in child.heads.iter() {
+            match head.cmp(&this) {
+                Ordering::Less => {}
+                Ordering::Equal => continue,
+                Ordering::Greater => unreachable!(),
+            }
+
+            self.insert(head);
+        }
+    }
+}
+
+/// The nested goals of each stack entry and the path from the
+/// stack entry to that nested goal.
+///
+/// We only start tracking nested goals once we've either encountered
+/// overflow or a solver cycle. This is a performance optimization to
+/// avoid tracking nested goals on the happy path.
+///
+/// We use nested goals for two reasons:
+/// - when rebasing provisional cache entries
+/// - when checking whether we have to ignore a global cache entry as reevaluating
+///   it would encounter a cycle or use a provisional cache entry.
+///
+/// We need to disable the global cache if using it would hide a cycle, as
+/// cycles can impact behavior. The cycle ABA may have different final
+/// results from a the cycle BAB depending on the cycle root.
+#[derive_where(Debug, Default; X: Cx)]
+struct NestedGoals<X: Cx> {
+    nested_goals: HashMap<X::Input, UsageKind>,
+}
+impl<X: Cx> NestedGoals<X> {
+    fn is_empty(&self) -> bool {
+        self.nested_goals.is_empty()
+    }
+
+    fn insert(&mut self, input: X::Input, path_from_entry: UsageKind) {
+        self.nested_goals.entry(input).or_insert(path_from_entry).and_merge(path_from_entry);
+    }
+
+    fn merge(&mut self, nested_goals: &NestedGoals<X>) {
+        #[allow(rustc::potential_query_instability)]
+        for (input, path_from_entry) in nested_goals.iter() {
+            self.insert(input, path_from_entry);
+        }
+    }
+
+    /// Adds the nested goals of a nested goal, given that the path `step_kind` from this goal
+    /// to the parent goal.
+    ///
+    /// If the path from this goal to the nested goal is inductive, the paths from this goal
+    /// to all nested goals of that nested goal are also inductive. Otherwise the paths are
+    /// the same as for the child.
+    fn extend_from_child(&mut self, step_kind: PathKind, nested_goals: &NestedGoals<X>) {
+        #[allow(rustc::potential_query_instability)]
+        for (input, path_from_entry) in nested_goals.iter() {
+            let path_from_entry = match step_kind {
+                PathKind::Coinductive => path_from_entry,
+                PathKind::Inductive => UsageKind::Single(PathKind::Inductive),
+            };
+            self.insert(input, path_from_entry);
+        }
+    }
+
+    #[rustc_lint_query_instability]
+    #[allow(rustc::potential_query_instability)]
+    fn iter(&self) -> impl Iterator<Item = (X::Input, UsageKind)> + '_ {
+        self.nested_goals.iter().map(|(i, p)| (*i, *p))
+    }
+
+    fn get(&self, input: X::Input) -> Option<UsageKind> {
+        self.nested_goals.get(&input).copied()
+    }
+
+    fn contains(&self, input: X::Input) -> bool {
+        self.nested_goals.contains_key(&input)
+    }
+}
+
 rustc_index::newtype_index! {
     #[orderable]
     #[gate_rustc_only]
     pub struct StackDepth {}
 }
 
+/// Stack entries of the evaluation stack. Its fields tend to be lazily
+/// when popping a child goal or completely immutable.
 #[derive_where(Debug; X: Cx)]
 struct StackEntry<X: Cx> {
     input: X::Input,
 
+    /// The available depth of a given goal, immutable.
     available_depth: AvailableDepth,
 
     /// The maximum depth reached by this stack entry, only up-to date
     /// for the top of the stack and lazily updated for the rest.
     reached_depth: StackDepth,
 
-    /// Whether this entry is a non-root cycle participant.
-    ///
-    /// We must not move the result of non-root cycle participants to the
-    /// global cache. We store the highest stack depth of a head of a cycle
-    /// this goal is involved in. This necessary to soundly cache its
-    /// provisional result.
-    non_root_cycle_participant: Option<StackDepth>,
+    /// All cycle heads this goal depends on. Lazily updated and only
+    /// up-to date for the top of the stack.
+    heads: CycleHeads,
 
+    /// Whether evaluating this goal encountered overflow. Lazily updated.
     encountered_overflow: bool,
 
+    /// Whether this goal has been used as the root of a cycle. This gets
+    /// eagerly updated when encountering a cycle.
     has_been_used: Option<UsageKind>,
 
-    /// We put only the root goal of a coinductive cycle into the global cache.
-    ///
-    /// If we were to use that result when later trying to prove another cycle
-    /// participant, we can end up with unstable query results.
-    ///
-    /// See tests/ui/next-solver/coinduction/incompleteness-unstable-result.rs for
-    /// an example of where this is needed.
-    ///
-    /// There can  be multiple roots on the same stack, so we need to track
-    /// cycle participants per root:
-    /// ```plain
-    /// A :- B
-    /// B :- A, C
-    /// C :- D
-    /// D :- C
-    /// ```
-    nested_goals: HashSet<X::Input>,
+    /// The nested goals of this goal, see the doc comment of the type.
+    nested_goals: NestedGoals<X>,
+
     /// Starts out as `None` and gets set when rerunning this
     /// goal in case we encounter a cycle.
     provisional_result: Option<X::Result>,
 }
 
-/// The provisional result for a goal which is not on the stack.
-#[derive(Debug)]
-struct DetachedEntry<X: Cx> {
-    /// The head of the smallest non-trivial cycle involving this entry.
-    ///
-    /// Given the following rules, when proving `A` the head for
-    /// the provisional entry of `C` would be `B`.
-    /// ```plain
-    /// A :- B
-    /// B :- C
-    /// C :- A + B + C
-    /// ```
-    head: StackDepth,
-    result: X::Result,
-}
-
-/// Stores the provisional result of already computed results for goals which
-/// depend on other goals still on the stack.
-///
-/// The provisional result may depend on whether the stack above it is inductive
-/// or coinductive. Because of this, we store separate provisional results for
-/// each case. If an provisional entry is not applicable, it may be the case
-/// that we already have provisional result while computing a goal. In this case
-/// we prefer the provisional result to potentially avoid fixpoint iterations.
-/// See tests/ui/traits/next-solver/cycles/mixed-cycles-2.rs for an example.
-///
-/// The provisional cache can theoretically result in changes to the observable behavior,
-/// see tests/ui/traits/next-solver/cycles/provisional-cache-impacts-behavior.rs.
-#[derive_where(Default; X: Cx)]
+/// A provisional result of an already computed goals which depends on other
+/// goals still on the stack.
+#[derive_where(Debug; X: Cx)]
 struct ProvisionalCacheEntry<X: Cx> {
-    with_inductive_stack: Option<DetachedEntry<X>>,
-    with_coinductive_stack: Option<DetachedEntry<X>>,
-}
-
-impl<X: Cx> ProvisionalCacheEntry<X> {
-    fn is_empty(&self) -> bool {
-        self.with_inductive_stack.is_none() && self.with_coinductive_stack.is_none()
-    }
+    /// Whether evaluating the goal encountered overflow. This is used to
+    /// disable the cache entry except if the last goal on the stack is
+    /// already involved in this cycle.
+    encountered_overflow: bool,
+    /// All cycle heads this cache entry depends on.
+    heads: CycleHeads,
+    /// The path from the highest cycle head to this goal.
+    path_from_head: PathKind,
+    nested_goals: NestedGoals<X>,
+    result: X::Result,
 }
 
 pub struct SearchGraph<D: Delegate<Cx = X>, X: Cx = <D as Delegate>::Cx> {
@@ -254,7 +362,11 @@ pub struct SearchGraph<D: Delegate<Cx = X>, X: Cx = <D as Delegate>::Cx> {
     ///
     /// An element is *deeper* in the stack if its index is *lower*.
     stack: IndexVec<StackDepth, StackEntry<X>>,
-    provisional_cache: HashMap<X::Input, ProvisionalCacheEntry<X>>,
+    /// The provisional cache contains entries for already computed goals which
+    /// still depend on goals higher-up in the stack. We don't move them to the
+    /// global cache and track them locally instead. A provisional cache entry
+    /// is only valid until the result of one of its cycle heads changes.
+    provisional_cache: HashMap<X::Input, Vec<ProvisionalCacheEntry<X>>>,
 
     _marker: PhantomData<D>,
 }
@@ -273,67 +385,66 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
         self.mode
     }
 
-    fn update_parent_goal(&mut self, reached_depth: StackDepth, encountered_overflow: bool) {
-        if let Some(parent) = self.stack.raw.last_mut() {
+    /// Lazily update the stack entry for the parent goal.
+    /// This behavior is shared between actually evaluating goals
+    /// and using existing global cache entries to make sure they
+    /// have the same impact on the remaining evaluation.
+    fn update_parent_goal(
+        cx: X,
+        stack: &mut IndexVec<StackDepth, StackEntry<X>>,
+        reached_depth: StackDepth,
+        heads: &CycleHeads,
+        encountered_overflow: bool,
+        nested_goals: &NestedGoals<X>,
+    ) {
+        if let Some(parent_index) = stack.last_index() {
+            let parent = &mut stack[parent_index];
             parent.reached_depth = parent.reached_depth.max(reached_depth);
             parent.encountered_overflow |= encountered_overflow;
+
+            parent.heads.extend_from_child(parent_index, heads);
+            let step_kind = Self::step_kind(cx, parent.input);
+            parent.nested_goals.extend_from_child(step_kind, nested_goals);
+            // Once we've got goals which encountered overflow or a cycle,
+            // we track all goals whose behavior may depend depend on these
+            // goals as this change may cause them to now depend on additional
+            // goals, resulting in new cycles. See the dev-guide for examples.
+            if !nested_goals.is_empty() {
+                parent.nested_goals.insert(parent.input, UsageKind::Single(PathKind::Coinductive))
+            }
         }
     }
 
     pub fn is_empty(&self) -> bool {
-        self.stack.is_empty()
+        if self.stack.is_empty() {
+            debug_assert!(self.provisional_cache.is_empty());
+            true
+        } else {
+            false
+        }
     }
 
-    fn stack_coinductive_from(
-        cx: X,
-        stack: &IndexVec<StackDepth, StackEntry<X>>,
-        head: StackDepth,
-    ) -> bool {
-        stack.raw[head.index()..].iter().all(|entry| D::step_is_coinductive(cx, entry.input))
-    }
-
-    // When encountering a solver cycle, the result of the current goal
-    // depends on goals lower on the stack.
-    //
-    // We have to therefore be careful when caching goals. Only the final result
-    // of the cycle root, i.e. the lowest goal on the stack involved in this cycle,
-    // is moved to the global cache while all others are stored in a provisional cache.
-    //
-    // We update both the head of this cycle to rerun its evaluation until
-    // we reach a fixpoint and all other cycle participants to make sure that
-    // their result does not get moved to the global cache.
-    fn tag_cycle_participants(stack: &mut IndexVec<StackDepth, StackEntry<X>>, head: StackDepth) {
-        // The current root of these cycles. Note that this may not be the final
-        // root in case a later goal depends on a goal higher up the stack.
-        let mut current_root = head;
-        while let Some(parent) = stack[current_root].non_root_cycle_participant {
-            current_root = parent;
-            debug_assert!(stack[current_root].has_been_used.is_some());
-        }
+    /// The number of goals currently in the search graph. This should only be
+    /// used for debugging purposes.
+    pub fn debug_current_depth(&self) -> usize {
+        self.stack.len()
+    }
 
-        let (stack, cycle_participants) = stack.raw.split_at_mut(head.index() + 1);
-        let current_cycle_root = &mut stack[current_root.as_usize()];
-        for entry in cycle_participants {
-            entry.non_root_cycle_participant = entry.non_root_cycle_participant.max(Some(head));
-            current_cycle_root.nested_goals.insert(entry.input);
-            current_cycle_root.nested_goals.extend(mem::take(&mut entry.nested_goals));
-        }
+    fn step_kind(cx: X, input: X::Input) -> PathKind {
+        if D::step_is_coinductive(cx, input) { PathKind::Coinductive } else { PathKind::Inductive }
     }
 
-    fn clear_dependent_provisional_results(
-        provisional_cache: &mut HashMap<X::Input, ProvisionalCacheEntry<X>>,
+    /// Whether the path from `head` to the current stack entry is inductive or coinductive.
+    fn stack_path_kind(
+        cx: X,
+        stack: &IndexVec<StackDepth, StackEntry<X>>,
         head: StackDepth,
-    ) {
-        #[allow(rustc::potential_query_instability)]
-        provisional_cache.retain(|_, entry| {
-            if entry.with_coinductive_stack.as_ref().is_some_and(|p| p.head == head) {
-                entry.with_coinductive_stack.take();
-            }
-            if entry.with_inductive_stack.as_ref().is_some_and(|p| p.head == head) {
-                entry.with_inductive_stack.take();
-            }
-            !entry.is_empty()
-        });
+    ) -> PathKind {
+        if stack.raw[head.index()..].iter().all(|entry| D::step_is_coinductive(cx, entry.input)) {
+            PathKind::Coinductive
+        } else {
+            PathKind::Inductive
+        }
     }
 
     /// Probably the most involved method of the whole solver.
@@ -345,20 +456,27 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
         cx: X,
         input: X::Input,
         inspect: &mut D::ProofTreeBuilder,
-        mut prove_goal: impl FnMut(&mut Self, &mut D::ProofTreeBuilder) -> X::Result,
+        mut evaluate_goal: impl FnMut(&mut Self, &mut D::ProofTreeBuilder) -> X::Result,
     ) -> X::Result {
-        self.check_invariants();
-        // Check for overflow.
         let Some(available_depth) = AvailableDepth::allowed_depth_for_nested::<D>(cx, &self.stack)
         else {
-            if let Some(last) = self.stack.raw.last_mut() {
-                last.encountered_overflow = true;
-            }
-
-            debug!("encountered stack overflow");
-            return D::on_stack_overflow(cx, inspect, input);
+            return self.handle_overflow(cx, input, inspect);
         };
 
+        // We check the provisional cache before checking the global cache. This simplifies
+        // the implementation as we can avoid worrying about cases where both the global and
+        // provisional cache may apply, e.g. consider the following example
+        //
+        // - xxBA overflow
+        // - A
+        //     - BA cycle
+        //     - CB :x:
+        if let Some(result) = self.lookup_provisional_cache(cx, input) {
+            return result;
+        }
+
+        // Lookup the global cache unless we're building proof trees or are currently
+        // fuzzing.
         let validate_cache = if !D::inspect_is_noop(inspect) {
             None
         } else if let Some(scope) = D::enter_validation_scope(cx, input) {
@@ -375,20 +493,22 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
             None
         };
 
-        if let Some(result) = self.lookup_provisional_cache(cx, input) {
-            return result;
-        }
-
+        // Detect cycles on the stack. We do this after the global cache lookup to
+        // avoid iterating over the stack in case a goal has already been computed.
+        // This may not have an actual performance impact and we could reorder them
+        // as it may reduce the number of `nested_goals` we need to track.
         if let Some(result) = self.check_cycle_on_stack(cx, input) {
+            debug_assert!(validate_cache.is_none(), "global cache and cycle on stack");
             return result;
         }
 
+        // Unfortunate, it looks like we actually have to compute this goalrar.
         let depth = self.stack.next_index();
         let entry = StackEntry {
             input,
             available_depth,
             reached_depth: depth,
-            non_root_cycle_participant: None,
+            heads: Default::default(),
             encountered_overflow: false,
             has_been_used: None,
             nested_goals: Default::default(),
@@ -403,37 +523,23 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
         // must not be added to the global cache. Notably, this is the case for
         // trait solver cycles participants.
         let ((final_entry, result), dep_node) = cx.with_cached_task(|| {
-            for _ in 0..D::FIXPOINT_STEP_LIMIT {
-                match self.fixpoint_step_in_task(cx, input, inspect, &mut prove_goal) {
-                    StepResult::Done(final_entry, result) => return (final_entry, result),
-                    StepResult::HasChanged => {}
-                }
-            }
-
-            debug!("canonical cycle overflow");
-            let current_entry = self.stack.pop().unwrap();
-            debug_assert!(current_entry.has_been_used.is_none());
-            let result = D::on_fixpoint_overflow(cx, input);
-            (current_entry, result)
+            self.evaluate_goal_in_task(cx, input, inspect, &mut evaluate_goal)
         });
 
-        self.update_parent_goal(final_entry.reached_depth, final_entry.encountered_overflow);
-
-        // We're now done with this goal. In case this goal is involved in a larger cycle
-        // do not remove it from the provisional cache and update its provisional result.
-        // We only add the root of cycles to the global cache.
-        if let Some(head) = final_entry.non_root_cycle_participant {
-            debug_assert!(validate_cache.is_none());
-            let coinductive_stack = Self::stack_coinductive_from(cx, &self.stack, head);
+        // We've finished computing the goal and have popped it from the stack,
+        // lazily update its parent goal.
+        Self::update_parent_goal(
+            cx,
+            &mut self.stack,
+            final_entry.reached_depth,
+            &final_entry.heads,
+            final_entry.encountered_overflow,
+            &final_entry.nested_goals,
+        );
 
-            let entry = self.provisional_cache.entry(input).or_default();
-            if coinductive_stack {
-                entry.with_coinductive_stack = Some(DetachedEntry { head, result });
-            } else {
-                entry.with_inductive_stack = Some(DetachedEntry { head, result });
-            }
-        } else {
-            self.provisional_cache.remove(&input);
+        // We're now done with this goal. We only add the root of cycles to the global cache.
+        // In case this goal is involved in a larger cycle add it to the provisional cache.
+        if final_entry.heads.is_empty() {
             if let Some((_scope, expected)) = validate_cache {
                 // Do not try to move a goal into the cache again if we're testing
                 // the global cache.
@@ -441,11 +547,278 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
             } else if D::inspect_is_noop(inspect) {
                 self.insert_global_cache(cx, input, final_entry, result, dep_node)
             }
+        } else if D::ENABLE_PROVISIONAL_CACHE {
+            debug_assert!(validate_cache.is_none());
+            let entry = self.provisional_cache.entry(input).or_default();
+            let StackEntry { heads, nested_goals, encountered_overflow, .. } = final_entry;
+            let path_from_head = Self::stack_path_kind(cx, &self.stack, heads.highest_cycle_head());
+            entry.push(ProvisionalCacheEntry {
+                encountered_overflow,
+                heads,
+                path_from_head,
+                nested_goals,
+                result,
+            });
+        } else {
+            debug_assert!(validate_cache.is_none());
         }
 
         result
     }
 
+    fn handle_overflow(
+        &mut self,
+        cx: X,
+        input: X::Input,
+        inspect: &mut D::ProofTreeBuilder,
+    ) -> X::Result {
+        if let Some(last) = self.stack.raw.last_mut() {
+            last.encountered_overflow = true;
+            // If computing a goal `B` depends on another goal `A` and
+            // `A` has a nested goal which overflows, then computing `B`
+            // at the same depth, but with `A` already on the stack,
+            // would encounter a solver cycle instead, potentially
+            // changing the result.
+            //
+            // We must therefore not use the global cache entry for `B` in that case.
+            // See tests/ui/traits/next-solver/cycles/hidden-by-overflow.rs
+            last.nested_goals.insert(last.input, UsageKind::Single(PathKind::Coinductive));
+        }
+
+        debug!("encountered stack overflow");
+        D::on_stack_overflow(cx, inspect, input)
+    }
+
+    /// When reevaluating a goal with a changed provisional result, all provisional cache entry
+    /// which depend on this goal get invalidated.
+    fn clear_dependent_provisional_results(&mut self) {
+        let head = self.stack.next_index();
+        #[allow(rustc::potential_query_instability)]
+        self.provisional_cache.retain(|_, entries| {
+            entries.retain(|entry| entry.heads.highest_cycle_head() != head);
+            !entries.is_empty()
+        });
+    }
+
+    /// A necessary optimization to handle complex solver cycles. A provisional cache entry
+    /// relies on a set of cycle heads and the path towards these heads. When popping a cycle
+    /// head from the stack after we've finished computing it, we can't be sure that the
+    /// provisional cache entry is still applicable. We need to keep the cache entries to
+    /// prevent hangs.
+    ///
+    /// What we therefore do is check whether the cycle kind of all cycles the goal of a
+    /// provisional cache entry is involved in would stay the same when computing the
+    /// goal without its cycle head on the stack. For more details, see the relevant
+    /// [rustc-dev-guide chapter](https://rustc-dev-guide.rust-lang.org/solve/caching.html).
+    ///
+    /// This can be thought of rotating the sub-tree of this provisional result and changing
+    /// its entry point while making sure that all paths through this sub-tree stay the same.
+    ///
+    ///
+    /// In case the popped cycle head failed to reach a fixpoint anything which depends on
+    /// its provisional result is invalid. Actually discarding provisional cache entries in
+    /// this case would cause hangs, so we instead change the result of dependant provisional
+    /// cache entries to also be ambiguous. This causes some undesirable ambiguity for nested
+    /// goals whose result doesn't actually depend on this cycle head, but that's acceptable
+    /// to me.
+    fn rebase_provisional_cache_entries(
+        &mut self,
+        cx: X,
+        stack_entry: &StackEntry<X>,
+        mut mutate_result: impl FnMut(X::Input, X::Result) -> X::Result,
+    ) {
+        let head = self.stack.next_index();
+        #[allow(rustc::potential_query_instability)]
+        self.provisional_cache.retain(|&input, entries| {
+            entries.retain_mut(|entry| {
+                let ProvisionalCacheEntry {
+                    encountered_overflow: _,
+                    heads,
+                    path_from_head,
+                    nested_goals,
+                    result,
+                } = entry;
+                if heads.highest_cycle_head() != head {
+                    return true;
+                }
+
+                // We don't try rebasing if the path from the current head
+                // to the cache entry is not coinductive or if the path from
+                // the cache entry to the current head is not coinductive.
+                //
+                // Both of these constraints could be weakened, but by only
+                // accepting coinductive paths we don't have to worry about
+                // changing the cycle kind of the remaining cycles. We can
+                // extend this in the future once there's a known issue
+                // caused by it.
+                if *path_from_head != PathKind::Coinductive
+                    || nested_goals.get(stack_entry.input).unwrap()
+                        != UsageKind::Single(PathKind::Coinductive)
+                {
+                    return false;
+                }
+
+                // Merge the cycle heads of the provisional cache entry and the
+                // popped head. If the popped cycle head was a root, discard all
+                // provisional cache entries which depend on it.
+                heads.remove_highest_cycle_head();
+                heads.merge(&stack_entry.heads);
+                let Some(head) = heads.opt_highest_cycle_head() else {
+                    return false;
+                };
+
+                // As we've made sure that the path from the new highest cycle
+                // head to the uses of the popped cycle head are fully coinductive,
+                // we can be sure that the paths to all nested goals of the popped
+                // cycle head remain the same. We can simply merge them.
+                nested_goals.merge(&stack_entry.nested_goals);
+                // We now care about the path from the next highest cycle head to the
+                // provisional cache entry.
+                *path_from_head = Self::stack_path_kind(cx, &self.stack, head);
+                // Mutate the result of the provisional cache entry in case we did
+                // not reach a fixpoint.
+                *result = mutate_result(input, *result);
+                true
+            });
+            !entries.is_empty()
+        });
+    }
+
+    fn lookup_provisional_cache(&mut self, cx: X, input: X::Input) -> Option<X::Result> {
+        if !D::ENABLE_PROVISIONAL_CACHE {
+            return None;
+        }
+
+        let entries = self.provisional_cache.get(&input)?;
+        for &ProvisionalCacheEntry {
+            encountered_overflow,
+            ref heads,
+            path_from_head,
+            ref nested_goals,
+            result,
+        } in entries
+        {
+            let head = heads.highest_cycle_head();
+            if encountered_overflow {
+                // This check is overly strict and very subtle. We need to make sure that if
+                // a global cache entry depends on some goal without adding it to its
+                // `nested_goals`, that goal must never have an applicable provisional
+                // cache entry to avoid incorrectly applying the cache entry.
+                //
+                // As we'd have to otherwise track literally all nested goals, we only
+                // apply provisional cache entries which encountered overflow once the
+                // current goal is already part of the same cycle. This check could be
+                // improved but seems to be good enough for now.
+                let last = self.stack.raw.last().unwrap();
+                if !last.heads.opt_lowest_cycle_head().is_some_and(|lowest| lowest <= head) {
+                    continue;
+                }
+            }
+
+            // A provisional cache entry is only valid if the current path from its
+            // highest cycle head to the goal is the same.
+            if path_from_head == Self::stack_path_kind(cx, &self.stack, head) {
+                // While we don't have to track the full depth of the provisional cache entry,
+                // we do have to increment the required depth by one as we'd have already failed
+                // with overflow otherwise
+                let next_index = self.stack.next_index();
+                let last = &mut self.stack.raw.last_mut().unwrap();
+                let path_from_entry = Self::step_kind(cx, last.input);
+                last.nested_goals.insert(input, UsageKind::Single(path_from_entry));
+
+                Self::update_parent_goal(
+                    cx,
+                    &mut self.stack,
+                    next_index,
+                    heads,
+                    false,
+                    nested_goals,
+                );
+                debug_assert!(self.stack[head].has_been_used.is_some());
+                debug!(?head, ?path_from_head, "provisional cache hit");
+                return Some(result);
+            }
+        }
+
+        None
+    }
+
+    /// Even if there is a global cache entry for a given goal, we need to make sure
+    /// evaluating this entry would not have ended up depending on either a goal
+    /// already on the stack or a provisional cache entry.
+    fn candidate_is_applicable(
+        cx: X,
+        stack: &IndexVec<StackDepth, StackEntry<X>>,
+        provisional_cache: &HashMap<X::Input, Vec<ProvisionalCacheEntry<X>>>,
+        nested_goals: &NestedGoals<X>,
+    ) -> bool {
+        // If the global cache entry didn't depend on any nested goals, it always
+        // applies.
+        if nested_goals.is_empty() {
+            return true;
+        }
+
+        // If a nested goal of the global cache entry is on the stack, we would
+        // definitely encounter a cycle.
+        if stack.iter().any(|e| nested_goals.contains(e.input)) {
+            debug!("cache entry not applicable due to stack");
+            return false;
+        }
+
+        // The global cache entry is also invalid if there's a provisional cache entry
+        // would apply for any of its nested goals.
+        #[allow(rustc::potential_query_instability)]
+        for (input, path_from_global_entry) in nested_goals.iter() {
+            let Some(entries) = provisional_cache.get(&input) else {
+                continue;
+            };
+
+            debug!(?input, ?path_from_global_entry, ?entries, "candidate_is_applicable");
+            // A provisional cache entry is applicable if the path to
+            // its highest cycle head is equal to the expected path.
+            for &ProvisionalCacheEntry {
+                encountered_overflow,
+                ref heads,
+                path_from_head,
+                nested_goals: _,
+                result: _,
+            } in entries.iter()
+            {
+                // We don't have to worry about provisional cache entries which encountered
+                // overflow, see the relevant comment in `lookup_provisional_cache`.
+                if encountered_overflow {
+                    continue;
+                }
+
+                // A provisional cache entry only applies if the path from its highest head
+                // matches the path when encountering the goal.
+                let head = heads.highest_cycle_head();
+                let full_path = match Self::stack_path_kind(cx, stack, head) {
+                    PathKind::Coinductive => path_from_global_entry,
+                    PathKind::Inductive => UsageKind::Single(PathKind::Inductive),
+                };
+
+                match (full_path, path_from_head) {
+                    (UsageKind::Mixed, _)
+                    | (UsageKind::Single(PathKind::Coinductive), PathKind::Coinductive)
+                    | (UsageKind::Single(PathKind::Inductive), PathKind::Inductive) => {
+                        debug!(
+                            ?full_path,
+                            ?path_from_head,
+                            "cache entry not applicable due to matching paths"
+                        );
+                        return false;
+                    }
+                    _ => debug!(?full_path, ?path_from_head, "paths don't match"),
+                }
+            }
+        }
+
+        true
+    }
+
+    /// Used when fuzzing the global cache. Accesses the global cache without
+    /// updating the state of the search graph.
     fn lookup_global_cache_untracked(
         &self,
         cx: X,
@@ -453,7 +826,16 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
         available_depth: AvailableDepth,
     ) -> Option<X::Result> {
         cx.with_global_cache(self.mode, |cache| {
-            cache.get(cx, input, &self.stack, available_depth).map(|c| c.result)
+            cache
+                .get(cx, input, available_depth, |nested_goals| {
+                    Self::candidate_is_applicable(
+                        cx,
+                        &self.stack,
+                        &self.provisional_cache,
+                        nested_goals,
+                    )
+                })
+                .map(|c| c.result)
         })
     }
 
@@ -467,46 +849,37 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
         available_depth: AvailableDepth,
     ) -> Option<X::Result> {
         cx.with_global_cache(self.mode, |cache| {
-            let CacheData {
-                result,
-                additional_depth,
-                encountered_overflow,
-                nested_goals: _, // FIXME: consider nested goals here.
-            } = cache.get(cx, input, &self.stack, available_depth)?;
+            let CacheData { result, additional_depth, encountered_overflow, nested_goals } = cache
+                .get(cx, input, available_depth, |nested_goals| {
+                    Self::candidate_is_applicable(
+                        cx,
+                        &self.stack,
+                        &self.provisional_cache,
+                        nested_goals,
+                    )
+                })?;
 
             // Update the reached depth of the current goal to make sure
             // its state is the same regardless of whether we've used the
             // global cache or not.
             let reached_depth = self.stack.next_index().plus(additional_depth);
-            self.update_parent_goal(reached_depth, encountered_overflow);
+            // We don't move cycle participants to the global cache, so the
+            // cycle heads are always empty.
+            let heads = Default::default();
+            Self::update_parent_goal(
+                cx,
+                &mut self.stack,
+                reached_depth,
+                &heads,
+                encountered_overflow,
+                nested_goals,
+            );
 
             debug!(?additional_depth, "global cache hit");
             Some(result)
         })
     }
 
-    fn lookup_provisional_cache(&mut self, cx: X, input: X::Input) -> Option<X::Result> {
-        let cache_entry = self.provisional_cache.get(&input)?;
-        let &DetachedEntry { head, result } = cache_entry
-            .with_coinductive_stack
-            .as_ref()
-            .filter(|p| Self::stack_coinductive_from(cx, &self.stack, p.head))
-            .or_else(|| {
-                cache_entry
-                    .with_inductive_stack
-                    .as_ref()
-                    .filter(|p| !Self::stack_coinductive_from(cx, &self.stack, p.head))
-            })?;
-
-        debug!("provisional cache hit");
-        // We have a nested goal which is already in the provisional cache, use
-        // its result. We do not provide any usage kind as that should have been
-        // already set correctly while computing the cache entry.
-        Self::tag_cycle_participants(&mut self.stack, head);
-        debug_assert!(self.stack[head].has_been_used.is_some());
-        Some(result)
-    }
-
     fn check_cycle_on_stack(&mut self, cx: X, input: X::Input) -> Option<X::Result> {
         let (head, _stack_entry) = self.stack.iter_enumerated().find(|(_, e)| e.input == input)?;
         debug!("encountered cycle with depth {head:?}");
@@ -516,20 +889,48 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
         //
         // Finally we can return either the provisional response or the initial response
         // in case we're in the first fixpoint iteration for this goal.
-        let is_coinductive_cycle = Self::stack_coinductive_from(cx, &self.stack, head);
-        let cycle_kind =
-            if is_coinductive_cycle { CycleKind::Coinductive } else { CycleKind::Inductive };
-        let usage_kind = UsageKind::Single(cycle_kind);
+        let path_kind = Self::stack_path_kind(cx, &self.stack, head);
+        let usage_kind = UsageKind::Single(path_kind);
         self.stack[head].has_been_used =
             Some(self.stack[head].has_been_used.map_or(usage_kind, |prev| prev.merge(usage_kind)));
-        Self::tag_cycle_participants(&mut self.stack, head);
+
+        // Subtle: when encountering a cyclic goal, we still first checked for overflow,
+        // so we have to update the reached depth.
+        let next_index = self.stack.next_index();
+        let last_index = self.stack.last_index().unwrap();
+        let last = &mut self.stack[last_index];
+        last.reached_depth = last.reached_depth.max(next_index);
+
+        let path_from_entry = Self::step_kind(cx, last.input);
+        last.nested_goals.insert(input, UsageKind::Single(path_from_entry));
+        last.nested_goals.insert(last.input, UsageKind::Single(PathKind::Coinductive));
+        if last_index != head {
+            last.heads.insert(head);
+        }
 
         // Return the provisional result or, if we're in the first iteration,
         // start with no constraints.
         if let Some(result) = self.stack[head].provisional_result {
             Some(result)
         } else {
-            Some(D::initial_provisional_result(cx, cycle_kind, input))
+            Some(D::initial_provisional_result(cx, path_kind, input))
+        }
+    }
+
+    /// Whether we've reached a fixpoint when evaluating a cycle head.
+    fn reached_fixpoint(
+        &mut self,
+        cx: X,
+        stack_entry: &StackEntry<X>,
+        usage_kind: UsageKind,
+        result: X::Result,
+    ) -> bool {
+        if let Some(prev) = stack_entry.provisional_result {
+            prev == result
+        } else if let UsageKind::Single(kind) = usage_kind {
+            D::is_initial_provisional_result(cx, kind, stack_entry.input, result)
+        } else {
+            false
         }
     }
 
@@ -538,55 +939,83 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
     /// of this we continuously recompute the cycle until the result
     /// of the previous iteration is equal to the final result, at which
     /// point we are done.
-    fn fixpoint_step_in_task<F>(
+    fn evaluate_goal_in_task(
         &mut self,
         cx: X,
         input: X::Input,
         inspect: &mut D::ProofTreeBuilder,
-        prove_goal: &mut F,
-    ) -> StepResult<X>
-    where
-        F: FnMut(&mut Self, &mut D::ProofTreeBuilder) -> X::Result,
-    {
-        let result = prove_goal(self, inspect);
-        let stack_entry = self.stack.pop().unwrap();
-        debug_assert_eq!(stack_entry.input, input);
-
-        // If the current goal is not the root of a cycle, we are done.
-        let Some(usage_kind) = stack_entry.has_been_used else {
-            return StepResult::Done(stack_entry, result);
-        };
+        mut evaluate_goal: impl FnMut(&mut Self, &mut D::ProofTreeBuilder) -> X::Result,
+    ) -> (StackEntry<X>, X::Result) {
+        let mut i = 0;
+        loop {
+            let result = evaluate_goal(self, inspect);
+            let stack_entry = self.stack.pop().unwrap();
+            debug_assert_eq!(stack_entry.input, input);
+
+            // If the current goal is not the root of a cycle, we are done.
+            //
+            // There are no provisional cache entries which depend on this goal.
+            let Some(usage_kind) = stack_entry.has_been_used else {
+                return (stack_entry, result);
+            };
+
+            // If it is a cycle head, we have to keep trying to prove it until
+            // we reach a fixpoint. We need to do so for all cycle heads,
+            // not only for the root.
+            //
+            // See tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs
+            // for an example.
+            //
+            // Check whether we reached a fixpoint, either because the final result
+            // is equal to the provisional result of the previous iteration, or because
+            // this was only the root of either coinductive or inductive cycles, and the
+            // final result is equal to the initial response for that case.
+            if self.reached_fixpoint(cx, &stack_entry, usage_kind, result) {
+                self.rebase_provisional_cache_entries(cx, &stack_entry, |_, result| result);
+                return (stack_entry, result);
+            }
 
-        // If it is a cycle head, we have to keep trying to prove it until
-        // we reach a fixpoint. We need to do so for all cycle heads,
-        // not only for the root.
-        //
-        // See tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs
-        // for an example.
-
-        // Start by clearing all provisional cache entries which depend on this
-        // the current goal.
-        Self::clear_dependent_provisional_results(
-            &mut self.provisional_cache,
-            self.stack.next_index(),
-        );
+            // If computing this goal results in ambiguity with no constraints,
+            // we do not rerun it. It's incredibly difficult to get a different
+            // response in the next iteration in this case. These changes would
+            // likely either be caused by incompleteness or can change the maybe
+            // cause from ambiguity to overflow. Returning ambiguity always
+            // preserves soundness and completeness even if the goal is be known
+            // to succeed or fail.
+            //
+            // This prevents exponential blowup affecting multiple major crates.
+            // As we only get to this branch if we haven't yet reached a fixpoint,
+            // we also taint all provisional cache entries which depend on the
+            // current goal.
+            if D::is_ambiguous_result(result) {
+                self.rebase_provisional_cache_entries(cx, &stack_entry, |input, _| {
+                    D::propagate_ambiguity(cx, input, result)
+                });
+                return (stack_entry, result);
+            };
+
+            // If we've reached the fixpoint step limit, we bail with overflow and taint all
+            // provisional cache entries which depend on the current goal.
+            i += 1;
+            if i >= D::FIXPOINT_STEP_LIMIT {
+                debug!("canonical cycle overflow");
+                let result = D::on_fixpoint_overflow(cx, input);
+                self.rebase_provisional_cache_entries(cx, &stack_entry, |input, _| {
+                    D::on_fixpoint_overflow(cx, input)
+                });
+                return (stack_entry, result);
+            }
+
+            // Clear all provisional cache entries which depend on a previous provisional
+            // result of this goal and rerun.
+            self.clear_dependent_provisional_results();
 
-        // Check whether we reached a fixpoint, either because the final result
-        // is equal to the provisional result of the previous iteration, or because
-        // this was only the root of either coinductive or inductive cycles, and the
-        // final result is equal to the initial response for that case.
-        //
-        // If we did not reach a fixpoint, update the provisional result and reevaluate.
-        if D::reached_fixpoint(cx, usage_kind, input, stack_entry.provisional_result, result) {
-            StepResult::Done(stack_entry, result)
-        } else {
             debug!(?result, "fixpoint changed provisional results");
             self.stack.push(StackEntry {
                 has_been_used: None,
                 provisional_result: Some(result),
                 ..stack_entry
             });
-            StepResult::HasChanged
         }
     }
 
@@ -616,7 +1045,7 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
                 dep_node,
                 additional_depth,
                 final_entry.encountered_overflow,
-                &final_entry.nested_goals,
+                final_entry.nested_goals,
             )
         })
     }
diff --git a/compiler/rustc_type_ir/src/search_graph/validate.rs b/compiler/rustc_type_ir/src/search_graph/validate.rs
deleted file mode 100644
index b4802811b0f57..0000000000000
--- a/compiler/rustc_type_ir/src/search_graph/validate.rs
+++ /dev/null
@@ -1,63 +0,0 @@
-use super::*;
-
-impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
-    #[allow(rustc::potential_query_instability)]
-    pub(super) fn check_invariants(&self) {
-        if !cfg!(debug_assertions) {
-            return;
-        }
-
-        let SearchGraph { mode: _, stack, provisional_cache, _marker } = self;
-        if stack.is_empty() {
-            assert!(provisional_cache.is_empty());
-        }
-
-        for (depth, entry) in stack.iter_enumerated() {
-            let StackEntry {
-                input,
-                available_depth: _,
-                reached_depth: _,
-                non_root_cycle_participant,
-                encountered_overflow: _,
-                has_been_used,
-                ref nested_goals,
-                provisional_result,
-            } = *entry;
-            if let Some(head) = non_root_cycle_participant {
-                assert!(head < depth);
-                assert!(nested_goals.is_empty());
-                assert_ne!(stack[head].has_been_used, None);
-
-                let mut current_root = head;
-                while let Some(parent) = stack[current_root].non_root_cycle_participant {
-                    current_root = parent;
-                }
-                assert!(stack[current_root].nested_goals.contains(&input));
-            }
-
-            if !nested_goals.is_empty() {
-                assert!(provisional_result.is_some() || has_been_used.is_some());
-                for entry in stack.iter().take(depth.as_usize()) {
-                    assert_eq!(nested_goals.get(&entry.input), None);
-                }
-            }
-        }
-
-        for (&_input, entry) in &self.provisional_cache {
-            let ProvisionalCacheEntry { with_coinductive_stack, with_inductive_stack } = entry;
-            assert!(with_coinductive_stack.is_some() || with_inductive_stack.is_some());
-            let check_detached = |detached_entry: &DetachedEntry<X>| {
-                let DetachedEntry { head, result: _ } = *detached_entry;
-                assert_ne!(stack[head].has_been_used, None);
-            };
-
-            if let Some(with_coinductive_stack) = with_coinductive_stack {
-                check_detached(with_coinductive_stack);
-            }
-
-            if let Some(with_inductive_stack) = with_inductive_stack {
-                check_detached(with_inductive_stack);
-            }
-        }
-    }
-}