diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 71ce12f63d2b6..37adbf75707f3 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1902,7 +1902,8 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) // void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) { -#ifdef UNIX_AMD64_ABI + assert(varTypeIsSIMD(lclNode)); + regNumber dst = lclNode->GetRegNum(); GenTree* op1 = lclNode->gtGetOp1(); GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); @@ -1920,15 +1921,10 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT); assert(regCount == 2); - assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0))); - assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1))); - - // This is a case where the two 8-bytes that comprise the operand are in - // two different xmm registers and need to be assembled into a single - // xmm register. regNumber targetReg = lclNode->GetRegNum(); - regNumber reg0 = call->GetRegNumByIdx(0); - regNumber reg1 = call->GetRegNumByIdx(1); + + regNumber reg0 = call->GetRegNumByIdx(0); + regNumber reg1 = call->GetRegNumByIdx(1); if (op1->IsCopyOrReload()) { @@ -1947,6 +1943,13 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) } } +#ifdef UNIX_AMD64_ABI + assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0))); + assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1))); + + // This is a case where the two 8-bytes that comprise the operand are in + // two different xmm registers and need to be assembled into a single + // xmm register. if (targetReg != reg0 && targetReg != reg1) { // targetReg = reg0; @@ -1979,9 +1982,32 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01); } genProduceReg(lclNode); -#else // !UNIX_AMD64_ABI - assert(!"Multireg store to SIMD reg not supported on Windows"); -#endif // !UNIX_AMD64_ABI +#elif defined(TARGET_X86) && defined(TARGET_WINDOWS) + assert(varTypeIsIntegral(retTypeDesc->GetReturnRegType(0))); + assert(varTypeIsIntegral(retTypeDesc->GetReturnRegType(1))); + assert(lclNode->TypeIs(TYP_SIMD8)); + + // This is a case where a SIMD8 struct returned as [EAX, EDX] + // and needs to be assembled into a single xmm register, + // note we can't check reg0=EAX, reg1=EDX because they could be already moved. + + inst_RV_RV(ins_Copy(reg0, TYP_FLOAT), targetReg, reg0, TYP_INT); + const emitAttr size = emitTypeSize(TYP_SIMD8); + if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + GetEmitter()->emitIns_SIMD_R_R_R_I(INS_pinsrd, size, targetReg, targetReg, reg1, 1); + } + else + { + regNumber tempXmm = lclNode->GetSingleTempReg(); + inst_RV_RV(ins_Copy(reg1, TYP_FLOAT), tempXmm, reg1, TYP_INT); + GetEmitter()->emitIns_SIMD_R_R_R(INS_punpckldq, size, targetReg, targetReg, tempXmm); + } +#elif defined(TARGET_WINDOWS) && defined(TARGET_AMD64) + assert(!"Multireg store to SIMD reg not supported on Windows x64"); +#else +#error Unsupported or unset target architecture +#endif } #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index a1135a347f1b2..fbe7e754707fc 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -399,11 +399,22 @@ void CodeGen::inst_RV(instruction ins, regNumber reg, var_types type, emitAttr s * Generate a "op reg1, reg2" instruction. */ +//------------------------------------------------------------------------ +// inst_RV_RV: Generate a "op reg1, reg2" instruction. +// +// Arguments: +// ins - the instruction to generate; +// reg1 - the first register to use, the dst for most instructions; +// reg2 - the second register to use, the src for most instructions; +// type - the type used to get the size attribute if not given, usually type of the reg2 operand; +// size - the size attribute, the type arg is ignored if this arg is provided with an actual value; +// flags - whether flags are set for arm32. +// void CodeGen::inst_RV_RV(instruction ins, regNumber reg1, regNumber reg2, - var_types type, - emitAttr size, + var_types type /* = TYP_I_IMPL */, + emitAttr size /* = EA_UNKNOWN */, insFlags flags /* = INS_FLAGS_DONT_CARE */) { if (size == EA_UNKNOWN) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index f66a5a18dc801..83999c3589b2a 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3338,12 +3338,11 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) // First, define internal registers. #ifdef FEATURE_SIMD - RefPosition* internalFloatDef = nullptr; if (varTypeIsSIMD(storeLoc) && !op1->IsCnsIntOrI() && (storeLoc->TypeGet() == TYP_SIMD12)) { // Need an additional register to extract upper 4 bytes of Vector3, // it has to be float for x86. - internalFloatDef = buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs()); + buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs()); } #endif // FEATURE_SIMD @@ -3360,6 +3359,16 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) { BuildUse(op1, RBM_NONE, i); } +#if defined(FEATURE_SIMD) && defined(TARGET_X86) && defined(TARGET_WINDOWS) + if (!compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + if (varTypeIsSIMD(storeLoc) && op1->IsCall()) + { + // Need an additional register to create a SIMD8 from EAX/EDX without SSE4.1. + buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs()); + } + } +#endif // FEATURE_SIMD && TARGET_X86 && TARGET_WINDOWS } else if (op1->isContained() && op1->OperIs(GT_BITCAST)) {