From 63ff3a1da0bc701e539233712c37e2dd44fd120c Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 13 Jan 2023 16:43:36 -0800 Subject: [PATCH 1/2] JIT: fix remaining phases that were rebuilding pred edges Loop canonicalization now maintains pred edges. GC poll insertion was already maintaining the edges but was rebuilding them anyways. Now pred lists are never rebuilt. Also revise `fgUpdateChangedFlowGraph` so that it no longer has the ability to remove or rebuild. Fixes #49030. --- src/coreclr/jit/compiler.h | 26 ++++++++++++--- src/coreclr/jit/fgopt.cpp | 26 +++++++-------- src/coreclr/jit/flowgraph.cpp | 57 +++++++-------------------------- src/coreclr/jit/loopcloning.cpp | 3 +- src/coreclr/jit/optimizer.cpp | 38 +++++++++++++--------- 5 files changed, 69 insertions(+), 81 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 7ed0ae94c48fb..2fef349114d3e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1511,6 +1511,24 @@ enum API_ICorJitInfo_Names API_COUNT }; +enum class FlowGraphUpdates +{ + COMPUTE_BASICS = 0, // renumber blocks, reachability, etc + COMPUTE_DOMS = 1 << 0, // recompute dominators + COMPUTE_RETURNS = 1 << 1, // recompute return blocks + COMPUTE_LOOPS = 1 << 2, // recompute loop table +}; + +inline constexpr FlowGraphUpdates operator|(FlowGraphUpdates a, FlowGraphUpdates b) +{ + return (FlowGraphUpdates)((unsigned int)a | (unsigned int)b); +} + +inline constexpr FlowGraphUpdates operator&(FlowGraphUpdates a, FlowGraphUpdates b) +{ + return (FlowGraphUpdates)((unsigned int)a & (unsigned int)b); +} + //--------------------------------------------------------------- // Compilation time. // @@ -5140,12 +5158,10 @@ class Compiler // && postOrder(A) >= postOrder(B) making the computation O(1). void fgNumberDomTree(DomTreeNode* domTree); - // When the flow graph changes, we need to update the block numbers, predecessor lists, reachability sets, + // When the flow graph changes, we need to update the block numbers, reachability sets, // dominators, and possibly loops. - void fgUpdateChangedFlowGraph(const bool computePreds = true, - const bool computeDoms = true, - const bool computeReturnBlocks = false, - const bool computeLoops = false); + // + void fgUpdateChangedFlowGraph(FlowGraphUpdates updates); public: // Compute the predecessors of the blocks in the control flow graph. diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 1c5c9758313a6..f26a27ce30b0f 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -174,16 +174,19 @@ bool Compiler::fgReachable(BasicBlock* b1, BasicBlock* b2) // to be recomputed if they know certain things do NOT need to be recomputed. // // Arguments: -// computePreds -- `true` if we should recompute predecessors -// computeDoms -- `true` if we should recompute dominators -// computeReturnBlocks -- `true` if we should recompute the list of return blocks -// computeLoops -- `true` if we should recompute the loop table -// -void Compiler::fgUpdateChangedFlowGraph(const bool computePreds, - const bool computeDoms, - const bool computeReturnBlocks, - const bool computeLoops) +// updates -- enum flag set indicating what to update +// +// Notes: +// Always renumbers, computes enter blocks, and computes reachability. +// Optionally rebuilds dominators, return blocks, and computes loop information. +// +void Compiler::fgUpdateChangedFlowGraph(FlowGraphUpdates updates) { + const bool computeDoms = ((updates & FlowGraphUpdates::COMPUTE_DOMS) == FlowGraphUpdates::COMPUTE_DOMS); + const bool computeReturnBlocks = + ((updates & FlowGraphUpdates::COMPUTE_RETURNS) == FlowGraphUpdates::COMPUTE_RETURNS); + const bool computeLoops = ((updates & FlowGraphUpdates::COMPUTE_LOOPS) == FlowGraphUpdates::COMPUTE_LOOPS); + // We need to clear this so we don't hit an assert calling fgRenumberBlocks(). fgDomsComputed = false; @@ -194,11 +197,6 @@ void Compiler::fgUpdateChangedFlowGraph(const bool computePreds, JITDUMP("\nRenumbering the basic blocks for fgUpdateChangeFlowGraph\n"); fgRenumberBlocks(); - - if (computePreds) // This condition is only here until all phases don't require it. - { - fgComputePreds(); - } fgComputeEnterBlocksSet(); fgComputeReachabilitySets(); if (computeDoms) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 3c6f329055704..1e66f5d580b2d 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -78,19 +78,9 @@ PhaseStatus Compiler::fgInsertGCPolls() bool createdPollBlocks = false; -#ifdef DEBUG - if (verbose) - { - printf("*************** In fgInsertGCPolls() for %s\n", info.compFullName); - fgDispBasicBlocks(false); - printf("\n"); - } -#endif // DEBUG - - BasicBlock* block; - // Walk through the blocks and hunt for a block that needs a GC Poll - for (block = fgFirstBB; block != nullptr; block = block->bbNext) + // + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) { compCurBB = block; @@ -112,56 +102,36 @@ PhaseStatus Compiler::fgInsertGCPolls() GCPollType pollType = GCPOLL_INLINE; - // We'd like to inset an inline poll. Below is the list of places where we + // We'd like to insert an inline poll. Below is the list of places where we // can't or don't want to emit an inline poll. Check all of those. If after all of that we still // have INLINE, then emit an inline check. if (opts.OptimizationDisabled()) { -#ifdef DEBUG - if (verbose) - { - printf("Selecting CALL poll in block " FMT_BB " because of debug/minopts\n", block->bbNum); - } -#endif // DEBUG - // Don't split blocks and create inlined polls unless we're optimizing. + // + JITDUMP("Selecting CALL poll in block " FMT_BB " because of debug/minopts\n", block->bbNum); pollType = GCPOLL_CALL; } else if (genReturnBB == block) { -#ifdef DEBUG - if (verbose) - { - printf("Selecting CALL poll in block " FMT_BB " because it is the single return block\n", block->bbNum); - } -#endif // DEBUG - // we don't want to split the single return block + // + JITDUMP("Selecting CALL poll in block " FMT_BB " because it is the single return block\n", block->bbNum); pollType = GCPOLL_CALL; } else if (BBJ_SWITCH == block->bbJumpKind) { -#ifdef DEBUG - if (verbose) - { - printf("Selecting CALL poll in block " FMT_BB " because it is a SWITCH block\n", block->bbNum); - } -#endif // DEBUG - // We don't want to deal with all the outgoing edges of a switch block. + // + JITDUMP("Selecting CALL poll in block " FMT_BB " because it is a SWITCH block\n", block->bbNum); pollType = GCPOLL_CALL; } else if ((block->bbFlags & BBF_COLD) != 0) { -#ifdef DEBUG - if (verbose) - { - printf("Selecting CALL poll in block " FMT_BB " because it is a cold block\n", block->bbNum); - } -#endif // DEBUG - // We don't want to split a cold block. + // + JITDUMP("Selecting CALL poll in block " FMT_BB " because it is a cold block\n", block->bbNum); pollType = GCPOLL_CALL; } @@ -176,9 +146,7 @@ PhaseStatus Compiler::fgInsertGCPolls() { noway_assert(opts.OptimizationEnabled()); fgReorderBlocks(/* useProfileData */ false); - constexpr bool computePreds = true; - constexpr bool computeDoms = false; - fgUpdateChangedFlowGraph(computePreds, computeDoms); + fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); } return result; @@ -194,7 +162,6 @@ PhaseStatus Compiler::fgInsertGCPolls() // Return Value: // If new basic blocks are inserted, the last inserted block; otherwise, the input block. // - BasicBlock* Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block) { bool createdPollBlocks; diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 4d7351d2011b7..0e9be15826878 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -3286,9 +3286,8 @@ PhaseStatus Compiler::optCloneLoops() if (optLoopsCloned > 0) { JITDUMP("Recompute reachability and dominators after loop cloning\n"); - constexpr bool computePreds = false; // TODO: recompute the loop table, to include the slow loop path in the table? - fgUpdateChangedFlowGraph(computePreds); + fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS); } #ifdef DEBUG diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 95db592e4ac1d..69b49134df178 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2684,8 +2684,7 @@ void Compiler::optFindNaturalLoops() } if (mod) { - constexpr bool computePreds = true; - fgUpdateChangedFlowGraph(computePreds); + fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS); } if (false /* pre-header stress */) @@ -2698,10 +2697,7 @@ void Compiler::optFindNaturalLoops() if (fgModified) { - // The predecessors were maintained in fgCreateLoopPreHeader; don't rebuild them. - constexpr bool computePreds = false; - constexpr bool computeDoms = true; - fgUpdateChangedFlowGraph(computePreds, computeDoms); + fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS); } } @@ -3082,6 +3078,10 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd) BasicBlock* const newH = fgNewBBbefore(BBJ_NONE, t, /*extendRegion*/ true); + fgRemoveRefPred(t, h); + fgAddRefPred(t, newH); + fgAddRefPred(newH, h); + // Anything that flows into sibling will flow here. // So we use sibling.H as our best guess for weight. // @@ -3259,6 +3259,10 @@ bool Compiler::optCanonicalizeLoopCore(unsigned char loopInd, LoopCanonicalizati const bool extendRegion = BasicBlock::sameTryRegion(t, b); BasicBlock* const newT = fgNewBBbefore(BBJ_NONE, t, extendRegion); + fgRemoveRefPred(t, h); + fgAddRefPred(t, newT); + fgAddRefPred(newT, h); + // Initially give newT the same weight as t; we will subtract from // this for each edge that does not move from t to newT. // @@ -3308,7 +3312,7 @@ bool Compiler::optCanonicalizeLoopCore(unsigned char loopInd, LoopCanonicalizati JITDUMP("in optCanonicalizeLoop (current): redirect bottom->top backedge " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", topPredBlock->bbNum, t->bbNum, topPredBlock->bbNum, newT->bbNum); - optRedirectBlock(b, blockMap); + optRedirectBlock(b, blockMap, RedirectBlockOption::UpdatePredLists); } } else if (option == LoopCanonicalizationOption::Outer) @@ -3340,7 +3344,7 @@ bool Compiler::optCanonicalizeLoopCore(unsigned char loopInd, LoopCanonicalizati " -> " FMT_BB "\n", topPredBlock == h ? "head" : "nonloop", topPredBlock == h ? "" : "back", topPredBlock->bbNum, t->bbNum, topPredBlock->bbNum, newT->bbNum); - optRedirectBlock(topPredBlock, blockMap); + optRedirectBlock(topPredBlock, blockMap, RedirectBlockOption::UpdatePredLists); } } else @@ -4464,7 +4468,7 @@ PhaseStatus Compiler::optUnrollLoops() optRedirectBlock(newBlock, &blockMap, RedirectBlockOption::AddToPredLists); } - // We fall into to this unroll iteration from the bottom block (first iteration) + // We fall into this unroll iteration from the bottom block (first iteration) // or from the previous unroll clone of the bottom block (subsequent iterations). // After doing this, all the newly cloned blocks now have proper flow and pred lists. // @@ -4639,12 +4643,16 @@ PhaseStatus Compiler::optUnrollLoops() // If we unrolled any nested loops, we rebuild the loop table (including recomputing the // return blocks list). - - constexpr bool computePreds = false; - constexpr bool computeDoms = true; - const bool computeReturnBlocks = anyNestedLoopsUnrolled; - const bool computeLoops = anyNestedLoopsUnrolled; - fgUpdateChangedFlowGraph(computePreds, computeDoms, computeReturnBlocks, computeLoops); + // + if (anyNestedLoopsUnrolled) + { + fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS | FlowGraphUpdates::COMPUTE_RETURNS | + FlowGraphUpdates::COMPUTE_LOOPS); + } + else + { + fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS); + } DBEXEC(verbose, fgDispBasicBlocks()); } From ca77fc5073f3ba87e60b97389acb57106b3fb1cd Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 18 Jan 2023 08:48:09 -0800 Subject: [PATCH 2/2] also fix #80772 --- src/coreclr/jit/optimizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 69b49134df178..acb63030e71c5 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -4515,7 +4515,7 @@ PhaseStatus Compiler::optUnrollLoops() // Scrub all pred list references to block, except for bottom-> bottom->bbNext. // - for (BasicBlock* succ : block->Succs()) + for (BasicBlock* succ : block->Succs(this)) { if ((block == bottom) && (succ == bottom->bbNext)) {