Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inliner: Extend IL limit for profiled call-sites, allow inlining for switches. #55478

Merged
merged 21 commits into from
Jul 14, 2021
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6272,6 +6272,9 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
// a potential inline candidate.
InlineResult prejitResult(this, methodHnd, "prejit");

// Profile data allows us to avoid early "too many IL bytes" outs.
prejitResult.NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, fgHaveSufficientProfileData());

// Do the initial inline screen.
impCanInlineIL(methodHnd, methodInfo, forceInline, &prejitResult);

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5814,6 +5814,7 @@ class Compiler
void WalkSpanningTree(SpanningTreeVisitor* visitor);
void fgSetProfileWeight(BasicBlock* block, BasicBlock::weight_t weight);
void fgApplyProfileScale();
bool fgHaveSufficientProfileData();

// fgIsUsingProfileWeights - returns true if we have real profile data for this method
// or if we have some fake profile data for the stress mode
Expand Down
65 changes: 51 additions & 14 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -818,8 +818,24 @@ class FgStack
return false;
}
const unsigned argNum = value - SLOT_ARGUMENT;
assert(argNum < info->argCnt);
return info->inlArgInfo[argNum].argIsInvariant;
if (argNum < info->argCnt)
{
return info->inlArgInfo[argNum].argIsInvariant;
}
return false;
}
static bool IsExactArgument(FgSlot value, InlineInfo* info)
{
if ((info == nullptr) || !IsArgument(value))
{
return false;
}
const unsigned argNum = value - SLOT_ARGUMENT;
if (argNum < info->argCnt)
{
return info->inlArgInfo[argNum].argIsExact;
}
return false;
}
static unsigned SlotTypeToArgNum(FgSlot value)
{
Expand Down Expand Up @@ -867,15 +883,17 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
const bool makeInlineObservations = (compInlineResult != nullptr);
const bool isInlining = compIsForInlining();
const bool isPreJit = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT);
const bool isTier1 = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1);
unsigned retBlocks = 0;
int prefixFlags = 0;
bool preciseScan = makeInlineObservations && compInlineResult->GetPolicy()->RequiresPreciseScan();
const bool resolveTokens = preciseScan && (isPreJit || isTier1);
const bool resolveTokens = preciseScan;

if (makeInlineObservations)
{
// Set default values for profile (to avoid NoteFailed in CALLEE_IL_CODE_SIZE's handler)
// these will be overridden later.
compInlineResult->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, true);
compInlineResult->NoteDouble(InlineObservation::CALLSITE_PROFILE_FREQUENCY, 1.0);
// Observe force inline state and code size.
compInlineResult->NoteBool(InlineObservation::CALLEE_IS_FORCE_INLINE, isForceInline);
compInlineResult->NoteInt(InlineObservation::CALLEE_IL_CODE_SIZE, codeSize);
Expand Down Expand Up @@ -1031,7 +1049,8 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
if (makeInlineObservations)
{
FgStack::FgSlot slot = pushedStack.Top();
if (FgStack::IsConstantOrConstArg(slot, impInlineInfo))
if (FgStack::IsConstantOrConstArg(slot, impInlineInfo) ||
FgStack::IsExactArgument(slot, impInlineInfo))
{
compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_EXPR_UN);
handled = true; // and keep argument in the pushedStack
Expand Down Expand Up @@ -1338,39 +1357,53 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
FgStack::FgSlot arg0 = pushedStack.Top(1);
FgStack::FgSlot arg1 = pushedStack.Top(0);

if ((FgStack::IsConstant(arg0) && FgStack::IsConstArgument(arg1, impInlineInfo)) ||
(FgStack::IsConstant(arg1) && FgStack::IsConstArgument(arg0, impInlineInfo)) ||
(FgStack::IsConstArgument(arg0, impInlineInfo) &&
FgStack::IsConstArgument(arg1, impInlineInfo)))
// Const op ConstArg -> ConstArg
if (FgStack::IsConstant(arg0) && FgStack::IsConstArgument(arg1, impInlineInfo))
{
// keep stack unchanged
handled = true;
compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_EXPR);
}
if ((FgStack::IsConstant(arg0) && FgStack::IsConstant(arg1)) ||
(FgStack::IsConstant(arg1) && FgStack::IsConstant(arg0)))
// ConstArg op Const -> ConstArg
// ConstArg op ConstArg -> ConstArg
else if (FgStack::IsConstArgument(arg0, impInlineInfo) &&
FgStack::IsConstantOrConstArg(arg1, impInlineInfo))
{
if (FgStack::IsConstant(arg1))
{
pushedStack.Push(arg0);
}
handled = true;
compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_EXPR);
}
// Const op Const -> Const
else if (FgStack::IsConstant(arg0) && FgStack::IsConstant(arg1))
{
// both are constants, but we're mostly interested in cases where a const arg leads to
// a foldable expression.
handled = true;
}
// Arg op ConstArg
// Arg op Const
else if (FgStack::IsArgument(arg0) && FgStack::IsConstantOrConstArg(arg1, impInlineInfo))
{
// "Arg op CNS" --> keep arg0 in the stack for the next ops
handled = true;
compInlineResult->Note(InlineObservation::CALLEE_BINARY_EXRP_WITH_CNS);
}
// ConstArg op Arg
// Const op Arg
else if (FgStack::IsArgument(arg1) && FgStack::IsConstantOrConstArg(arg0, impInlineInfo))
{
// "CNS op ARG" --> keep arg1 in the stack for the next ops
pushedStack.Push(arg1);
handled = true;
compInlineResult->Note(InlineObservation::CALLEE_BINARY_EXRP_WITH_CNS);
}

// X / ConstArg
// X % ConstArg
if (FgStack::IsConstArgument(arg1, impInlineInfo))
{
// Special case: "X / ConstArg" or "X % ConstArg"
if ((opcode == CEE_DIV) || (opcode == CEE_DIV_UN) || (opcode == CEE_REM) ||
(opcode == CEE_REM_UN))
{
Expand Down Expand Up @@ -1583,6 +1616,10 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
if (makeInlineObservations)
{
compInlineResult->Note(InlineObservation::CALLEE_HAS_SWITCH);
if (FgStack::IsConstantOrConstArg(pushedStack.Top(), impInlineInfo))
{
compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_SWITCH);
}

// Fail fast, if we're inlining and can't handle this.
if (isInlining && compInlineResult->IsFailure())
Expand Down
25 changes: 25 additions & 0 deletions src/coreclr/jit/fgprofile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,31 @@ bool Compiler::fgHaveProfileData()
return (fgPgoSchema != nullptr);
}

//------------------------------------------------------------------------
// fgHaveSufficientProfileData: check if profile data is available
// and is sufficient enough to be trustful.
//
// Returns:
// true if so
//
// Note:
// See notes for fgHaveProfileData.
//
bool Compiler::fgHaveSufficientProfileData()
{
if (!fgHaveProfileData())
{
return false;
}

if ((fgFirstBB != nullptr) && (fgPgoSource == ICorJitInfo::PgoSource::Static))
{
const BasicBlock::weight_t sufficientSamples = 5000;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This used to be 1000 -- I assume you're increasing this to keep prejit image size small?

If so, you should add a comment describing how this value influences prejit size.
If not, you might comment on what the impact of changing this would be.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1000 samples feels like a plenty of evidence that something is hot.

Copy link
Member Author

@EgorBo EgorBo Jul 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted to 1000. Yes, this changes was supposed to decrease the prejitted size, I was using this histogram:
image
(weights in SPC, the right column starts at 50000).

However, I don't need to save some space with it anymore as I've found an unrelated issue that bloated size for no reason (binary expressions like "arg op cns" used to leave "cns" on top of the pushed stack so there were lots of false-positive foldable-branches/switches).

return fgFirstBB->bbWeight > sufficientSamples;
}
return true;
}

//------------------------------------------------------------------------
// fgApplyProfileScale: scale inlinee counts by appropriate scale factor
//
Expand Down
41 changes: 22 additions & 19 deletions src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13258,8 +13258,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
goto COND_JUMP;

case CEE_SWITCH:
assert(!compIsForInlining());

if (tiVerificationNeeded)
{
Verify(impStackTop().seTypeInfo.IsType(TI_INT), "Bad switch val");
Expand Down Expand Up @@ -19071,33 +19069,30 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I
inlineResult->NoteInt(InlineObservation::CALLSITE_FREQUENCY, static_cast<int>(frequency));
inlineResult->NoteInt(InlineObservation::CALLSITE_WEIGHT, (int)(weight));

bool hasProfile = false;
double profileFreq = 0.0;

// If the call site has profile data, report the relative frequency of the site.
//
if ((pInlineInfo != nullptr) && rootCompiler->fgHaveProfileData() && pInlineInfo->iciBlock->hasProfileWeight())
if ((pInlineInfo != nullptr) && rootCompiler->fgHaveSufficientProfileData())
{
BasicBlock::weight_t callSiteWeight = pInlineInfo->iciBlock->bbWeight;
BasicBlock::weight_t entryWeight = rootCompiler->fgFirstBB->bbWeight;
BasicBlock::weight_t profileFreq = entryWeight == 0.0f ? 0.0f : callSiteWeight / entryWeight;
const BasicBlock::weight_t callSiteWeight = pInlineInfo->iciBlock->bbWeight;
const BasicBlock::weight_t entryWeight = rootCompiler->fgFirstBB->bbWeight;
profileFreq = entryWeight == 0.0f ? 0.0 : callSiteWeight / entryWeight;
hasProfile = true;

assert(callSiteWeight >= 0);
assert(entryWeight >= 0);

BasicBlock::weight_t sufficientSamples = 1000.0f;

if (!rootCompiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) ||
((callSiteWeight + entryWeight) > sufficientSamples))
{
// Let's not report profiles for methods with insufficient samples during prejitting.
inlineResult->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, true);
inlineResult->NoteDouble(InlineObservation::CALLSITE_PROFILE_FREQUENCY, profileFreq);
}
}
else if ((pInlineInfo == nullptr) && rootCompiler->fgHaveProfileData())
else if (pInlineInfo == nullptr)
{
// Simulate a hot callsite for PrejitRoot mode.
inlineResult->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, true);
inlineResult->NoteDouble(InlineObservation::CALLSITE_PROFILE_FREQUENCY, 1.0);
hasProfile = true;
profileFreq = 1.0;
}

inlineResult->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, hasProfile);
inlineResult->NoteDouble(InlineObservation::CALLSITE_PROFILE_FREQUENCY, profileFreq);
}

/*****************************************************************************
Expand Down Expand Up @@ -19243,6 +19238,10 @@ void Compiler::impCheckCanInline(GenTreeCall* call,
goto _exit;
}

// Profile data allows us to avoid early "too many IL bytes" outs.
pParam->result->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE,
pParam->pThis->fgHaveSufficientProfileData());

bool forceInline;
forceInline = !!(pParam->methAttr & CORINFO_FLG_FORCEINLINE);

Expand Down Expand Up @@ -19465,6 +19464,10 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo,
}
}

bool isExact = false;
bool isNonNull = false;
inlCurArgInfo->argIsExact = (gtGetClassHandle(curArgVal, &isExact, &isNonNull) != NO_CLASS_HANDLE) && isExact;

// If the arg is a local that is address-taken, we can't safely
// directly substitute it into the inlinee.
//
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/inline.def
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ INLINE_OBSERVATION(FOLDABLE_INTRINSIC, int, "foldable intrinsic",
INLINE_OBSERVATION(FOLDABLE_EXPR, int, "foldable binary expression", INFORMATION, CALLSITE)
INLINE_OBSERVATION(FOLDABLE_EXPR_UN, int, "foldable unary expression", INFORMATION, CALLSITE)
INLINE_OBSERVATION(FOLDABLE_BRANCH, int, "foldable branch", INFORMATION, CALLSITE)
INLINE_OBSERVATION(FOLDABLE_SWITCH, int, "foldable switch", INFORMATION, CALLSITE)
INLINE_OBSERVATION(DIV_BY_CNS, int, "dividy by const", INFORMATION, CALLSITE)
INLINE_OBSERVATION(CONSTANT_ARG_FEEDS_TEST, bool, "constant argument feeds test", INFORMATION, CALLSITE)
INLINE_OBSERVATION(DEPTH, int, "depth", INFORMATION, CALLSITE)
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,7 @@ struct InlArgInfo
unsigned argHasStargOp : 1; // Is there STARG(s) operation on this argument?
unsigned argIsByRefToStructLocal : 1; // Is this arg an address of a struct local or a normed struct local or a
// field in them?
unsigned argIsExact : 1; // Is this arg of an exact class?
};

// InlLclVarInfo describes inline candidate argument and local variable properties.
Expand Down
Loading