Skip to content

Commit

Permalink
add support for genReg1/genReg2->SIMD8 store on x86 windows. (#52581)
Browse files Browse the repository at this point in the history
* add a standart header for inst_RV_RV

* add support for EAX/EDX->SIMD8 store on x86 windows.

* fix the header.
  • Loading branch information
Sergey Andreenko committed May 13, 2021
1 parent 2fb544c commit d77854a
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 16 deletions.
50 changes: 38 additions & 12 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1902,7 +1902,8 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
//
void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode)
{
#ifdef UNIX_AMD64_ABI
assert(varTypeIsSIMD(lclNode));

regNumber dst = lclNode->GetRegNum();
GenTree* op1 = lclNode->gtGetOp1();
GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
Expand All @@ -1920,15 +1921,10 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode)
assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT);

assert(regCount == 2);
assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0)));
assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1)));

// This is a case where the two 8-bytes that comprise the operand are in
// two different xmm registers and need to be assembled into a single
// xmm register.
regNumber targetReg = lclNode->GetRegNum();
regNumber reg0 = call->GetRegNumByIdx(0);
regNumber reg1 = call->GetRegNumByIdx(1);

regNumber reg0 = call->GetRegNumByIdx(0);
regNumber reg1 = call->GetRegNumByIdx(1);

if (op1->IsCopyOrReload())
{
Expand All @@ -1947,6 +1943,13 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode)
}
}

#ifdef UNIX_AMD64_ABI
assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0)));
assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1)));

// This is a case where the two 8-bytes that comprise the operand are in
// two different xmm registers and need to be assembled into a single
// xmm register.
if (targetReg != reg0 && targetReg != reg1)
{
// targetReg = reg0;
Expand Down Expand Up @@ -1979,9 +1982,32 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode)
inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01);
}
genProduceReg(lclNode);
#else // !UNIX_AMD64_ABI
assert(!"Multireg store to SIMD reg not supported on Windows");
#endif // !UNIX_AMD64_ABI
#elif defined(TARGET_X86) && defined(TARGET_WINDOWS)
assert(varTypeIsIntegral(retTypeDesc->GetReturnRegType(0)));
assert(varTypeIsIntegral(retTypeDesc->GetReturnRegType(1)));
assert(lclNode->TypeIs(TYP_SIMD8));

// This is a case where a SIMD8 struct returned as [EAX, EDX]
// and needs to be assembled into a single xmm register,
// note we can't check reg0=EAX, reg1=EDX because they could be already moved.

inst_RV_RV(ins_Copy(reg0, TYP_FLOAT), targetReg, reg0, TYP_INT);
const emitAttr size = emitTypeSize(TYP_SIMD8);
if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
GetEmitter()->emitIns_SIMD_R_R_R_I(INS_pinsrd, size, targetReg, targetReg, reg1, 1);
}
else
{
regNumber tempXmm = lclNode->GetSingleTempReg();
inst_RV_RV(ins_Copy(reg1, TYP_FLOAT), tempXmm, reg1, TYP_INT);
GetEmitter()->emitIns_SIMD_R_R_R(INS_punpckldq, size, targetReg, targetReg, tempXmm);
}
#elif defined(TARGET_WINDOWS) && defined(TARGET_AMD64)
assert(!"Multireg store to SIMD reg not supported on Windows x64");
#else
#error Unsupported or unset target architecture
#endif
}
#endif // FEATURE_SIMD

Expand Down
15 changes: 13 additions & 2 deletions src/coreclr/jit/instr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,11 +399,22 @@ void CodeGen::inst_RV(instruction ins, regNumber reg, var_types type, emitAttr s
* Generate a "op reg1, reg2" instruction.
*/

//------------------------------------------------------------------------
// inst_RV_RV: Generate a "op reg1, reg2" instruction.
//
// Arguments:
// ins - the instruction to generate;
// reg1 - the first register to use, the dst for most instructions;
// reg2 - the second register to use, the src for most instructions;
// type - the type used to get the size attribute if not given, usually type of the reg2 operand;
// size - the size attribute, the type arg is ignored if this arg is provided with an actual value;
// flags - whether flags are set for arm32.
//
void CodeGen::inst_RV_RV(instruction ins,
regNumber reg1,
regNumber reg2,
var_types type,
emitAttr size,
var_types type /* = TYP_I_IMPL */,
emitAttr size /* = EA_UNKNOWN */,
insFlags flags /* = INS_FLAGS_DONT_CARE */)
{
if (size == EA_UNKNOWN)
Expand Down
13 changes: 11 additions & 2 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3338,12 +3338,11 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)

// First, define internal registers.
#ifdef FEATURE_SIMD
RefPosition* internalFloatDef = nullptr;
if (varTypeIsSIMD(storeLoc) && !op1->IsCnsIntOrI() && (storeLoc->TypeGet() == TYP_SIMD12))
{
// Need an additional register to extract upper 4 bytes of Vector3,
// it has to be float for x86.
internalFloatDef = buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs());
buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs());
}
#endif // FEATURE_SIMD

Expand All @@ -3360,6 +3359,16 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
{
BuildUse(op1, RBM_NONE, i);
}
#if defined(FEATURE_SIMD) && defined(TARGET_X86) && defined(TARGET_WINDOWS)
if (!compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
if (varTypeIsSIMD(storeLoc) && op1->IsCall())
{
// Need an additional register to create a SIMD8 from EAX/EDX without SSE4.1.
buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs());
}
}
#endif // FEATURE_SIMD && TARGET_X86 && TARGET_WINDOWS
}
else if (op1->isContained() && op1->OperIs(GT_BITCAST))
{
Expand Down

0 comments on commit d77854a

Please sign in to comment.