Skip to content

Commit

Permalink
[JIT] x86/x64 - improved localloc codegen in some cases (#76851)
Browse files Browse the repository at this point in the history
  • Loading branch information
TIHan committed Nov 1, 2022
1 parent 5ec2e87 commit 2fb0a8c
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 71 deletions.
11 changes: 7 additions & 4 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1430,13 +1430,16 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

void genReturn(GenTree* treeNode);

#ifdef TARGET_XARCH
void genStackPointerConstantAdjustment(ssize_t spDelta, bool trackSpAdjustments);
void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool trackSpAdjustments);
target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool trackSpAdjustments);
void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta);
#else // !TARGET_XARCH
void genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp);
void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp);
target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp);

#if defined(TARGET_XARCH)
void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp);
#endif // defined(TARGET_XARCH)
#endif // !TARGET_XARCH

void genLclHeap(GenTree* tree);

Expand Down
80 changes: 35 additions & 45 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2216,36 +2216,32 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
//
// Arguments:
// spDelta - the value to add to SP. Must be negative or zero.
// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP
// adjustment from the emitter, using this register.
// trackSpAdjustments - x86 only: whether or not to track the SP adjustment
//
// Return Value:
// None.
//
void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp)
void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, bool trackSpAdjustments)
{
assert(spDelta < 0);

// We assert that the SP change is less than one page. If it's greater, you should have called a
// function that does a probe, which will in turn call this function.
assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize());

#ifdef TARGET_X86
if (regTmp != REG_NA)
#ifdef TARGET_AMD64
// We always track the SP adjustment on X64.
trackSpAdjustments = true;
#endif // TARGET_AMD64

if (trackSpAdjustments)
{
// For x86, some cases don't want to use "sub ESP" because we don't want the emitter to track the adjustment
// to ESP. So do the work in the count register.
// TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require
// creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't
// track".
inst_Mov(TYP_I_IMPL, regTmp, REG_SPBASE, /* canSkip */ false);
inst_RV_IV(INS_sub, regTmp, (target_ssize_t)-spDelta, EA_PTRSIZE);
inst_Mov(TYP_I_IMPL, REG_SPBASE, regTmp, /* canSkip */ false);
inst_RV_IV(INS_sub, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE);
}
else
#endif // TARGET_X86
{
inst_RV_IV(INS_sub, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE);
// For x86, some cases don't want to track the adjustment to SP.
inst_RV_IV(INS_sub_hide, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE);
}
}

Expand All @@ -2257,16 +2253,15 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm
// Arguments:
// spDelta - the value to add to SP. Must be negative or zero. If zero, the probe happens,
// but the stack pointer doesn't move.
// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP
// adjustment from the emitter, using this register.
// trackSpAdjustments - x86 only: whether or not to track the SP adjustment
//
// Return Value:
// None.
//
void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp)
void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool trackSpAdjustments)
{
GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
genStackPointerConstantAdjustment(spDelta, regTmp);
genStackPointerConstantAdjustment(spDelta, trackSpAdjustments);
}

//------------------------------------------------------------------------
Expand All @@ -2280,13 +2275,12 @@ void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNum
//
// Arguments:
// spDelta - the value to add to SP. Must be negative.
// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP
// adjustment from the emitter, using this register.
// trackSpAdjustments - x86 only: whether or not to track the SP adjustment
//
// Return Value:
// Offset in bytes from SP to last probed address.
//
target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp)
target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool trackSpAdjustments)
{
assert(spDelta < 0);

Expand All @@ -2296,7 +2290,7 @@ target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t s
do
{
ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize);
genStackPointerConstantAdjustmentWithProbe(spOneDelta, regTmp);
genStackPointerConstantAdjustmentWithProbe(spOneDelta, trackSpAdjustments);
spRemainingDelta -= spOneDelta;
} while (spRemainingDelta < 0);

Expand All @@ -2323,21 +2317,18 @@ target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t s
// genStackPointerDynamicAdjustmentWithProbe: add a register value to the stack pointer,
// and probe the stack as appropriate.
//
// Note that for x86, we hide the ESP adjustment from the emitter. To do that, currently,
// requires a temporary register and extra code.
// We hide the ESP adjustment from the emitter.
//
// Arguments:
// regSpDelta - the register value to add to SP. The value in this register must be negative.
// This register might be trashed.
// regTmp - an available temporary register. Will be trashed.
//
// Return Value:
// None.
//
void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp)
void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta)
{
assert(regSpDelta != REG_NA);
assert(regTmp != REG_NA);

// Tickle the pages to ensure that ESP is always valid and is
// in sync with the "stack guard page". Note that in the worst
Expand All @@ -2356,9 +2347,7 @@ void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, re
// xor regSpDelta, regSpDelta // Overflow, pick lowest possible number
// loop:
// test ESP, [ESP+0] // tickle the page
// mov regTmp, ESP
// sub regTmp, eeGetPageSize()
// mov ESP, regTmp
// sub ESP, eeGetPageSize()
// cmp ESP, regSpDelta
// jae loop
// mov ESP, regSpDelta
Expand All @@ -2376,11 +2365,8 @@ void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, re
// be on the guard page. It is OK to leave the final value of ESP on the guard page.
GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);

// Subtract a page from ESP. This is a trick to avoid the emitter trying to track the
// decrement of the ESP - we do the subtraction in another reg instead of adjusting ESP directly.
inst_Mov(TYP_I_IMPL, regTmp, REG_SPBASE, /* canSkip */ false);
inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE);
inst_Mov(TYP_I_IMPL, REG_SPBASE, regTmp, /* canSkip */ false);
// Subtract a page from ESP and hide the adjustment.
inst_RV_IV(INS_sub_hide, REG_SPBASE, compiler->eeGetPageSize(), EA_PTRSIZE);

inst_RV_RV(INS_cmp, REG_SPBASE, regSpDelta, TYP_I_IMPL);
inst_JMP(EJ_jae, loop);
Expand Down Expand Up @@ -2470,7 +2456,7 @@ void CodeGen::genLclHeap(GenTree* tree)
}
else
{
regCnt = tree->ExtractTempReg();
regCnt = tree->GetSingleTempReg();

// Above, we put the size in targetReg. Now, copy it to our new temp register if necessary.
inst_Mov(size->TypeGet(), regCnt, targetReg, /* canSkip */ true);
Expand Down Expand Up @@ -2533,7 +2519,8 @@ void CodeGen::genLclHeap(GenTree* tree)

if ((amount > 0) && !initMemOrLargeAlloc)
{
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, REG_NA);
lastTouchDelta =
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* trackSpAdjustments */ true);
stackAdjustment = 0;
locAllocStackOffset = (target_size_t)compiler->lvaOutgoingArgSpaceSize;
goto ALLOC_DONE;
Expand Down Expand Up @@ -2584,7 +2571,7 @@ void CodeGen::genLclHeap(GenTree* tree)
}
else
{
regCnt = tree->ExtractTempReg();
regCnt = tree->GetSingleTempReg();
}
}

Expand All @@ -2595,7 +2582,8 @@ void CodeGen::genLclHeap(GenTree* tree)
// the alloc, not after.

assert(amount < compiler->eeGetPageSize()); // must be < not <=
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, regCnt);
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount,
/* trackSpAdjustments */ regCnt == REG_NA);
goto ALLOC_DONE;
}

Expand All @@ -2611,6 +2599,9 @@ void CodeGen::genLclHeap(GenTree* tree)
instGen_Set_Reg_To_Imm(((size_t)(int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount);
}

// We should not have any temp registers at this point.
assert(tree->AvailableTempRegCount() == 0);

if (compiler->info.compInitMem)
{
// At this point 'regCnt' is set to the number of loop iterations for this loop, if each
Expand Down Expand Up @@ -2646,8 +2637,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// adds to ESP).

inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
regNumber regTmp = tree->GetSingleTempReg();
genStackPointerDynamicAdjustmentWithProbe(regCnt, regTmp);
genStackPointerDynamicAdjustmentWithProbe(regCnt);

// lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space,
// we're going to assume the worst and probe.
Expand All @@ -2667,11 +2657,11 @@ void CodeGen::genLclHeap(GenTree* tree)
(stackAdjustment + (target_size_t)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES >
compiler->eeGetPageSize()))
{
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, REG_NA);
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, /* trackSpAdjustments */ true);
}
else
{
genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, REG_NA);
genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, /* trackSpAdjustments */ true);
}
}

Expand Down Expand Up @@ -7862,7 +7852,7 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
if ((argSize >= ARG_STACK_PROBE_THRESHOLD_BYTES) ||
compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
{
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, REG_NA);
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, /* trackSpAdjustments */ true);
}
else
{
Expand Down
18 changes: 10 additions & 8 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12066,7 +12066,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)

case IF_RRW_ARD:
// Mark the destination register as holding a GCT_BYREF
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
break;

Expand All @@ -12083,7 +12083,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)

case IF_ARW_RRD:
case IF_ARW_CNS:
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
break;

default:
Expand Down Expand Up @@ -12513,7 +12513,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)

// reg could have been a GCREF as GCREF + int=BYREF
// or BYREF+/-int=BYREF
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
break;

Expand Down Expand Up @@ -12973,7 +12973,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
case IF_RRW_MRD:

assert(id->idGCref() == GCT_BYREF);
assert(ins == INS_add || ins == INS_sub);
assert(ins == INS_add || ins == INS_sub || ins == INS_sub_hide);

// Mark it as holding a GCT_BYREF
emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
Expand Down Expand Up @@ -13539,6 +13539,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)

case INS_add:
case INS_sub:
case INS_sub_hide:
assert(id->idGCref() == GCT_BYREF);

#if 0
Expand All @@ -13561,7 +13562,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
// r1/r2 could have been a GCREF as GCREF + int=BYREF
// or BYREF+/-int=BYREF
assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) ||
((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide)));
#endif // DEBUG
#endif // 0

Expand Down Expand Up @@ -14042,7 +14043,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
}
if (emitThisByrefRegs & regMask)
{
assert(ins == INS_add || ins == INS_sub);
assert(ins == INS_add || ins == INS_sub || ins == INS_sub_hide);
}
#endif
// Mark it as holding a GCT_BYREF
Expand Down Expand Up @@ -15666,7 +15667,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)

case INS_sub:
// Check for "sub ESP, icon"
if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
if (id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
{
assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
Expand All @@ -15675,7 +15676,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)

case INS_add:
// Check for "add ESP, icon"
if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
if (id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
{
assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0,
Expand Down Expand Up @@ -16155,6 +16156,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins

case INS_add:
case INS_sub:
case INS_sub_hide:
case INS_and:
case INS_or:
case INS_xor:
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ INST4(adc, "adc", IUM_RW, 0x000010, 0x001080,
INST4(sbb, "sbb", IUM_RW, 0x000018, 0x001880, 0x00001A, 0x00001C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | Reads_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(and, "and", IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(sub, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
// Does not affect the stack tracking in the emitter
INST4(sub_hide, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )

INST4(xor, "xor", IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(cmp, "cmp", IUM_RD, 0x000038, 0x003880, 0x00003A, 0x00003C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(test, "test", IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Wbit )
Expand Down
Loading

0 comments on commit 2fb0a8c

Please sign in to comment.