Skip to content

Commit

Permalink
JIT and VM implementation for SHA instructions (#3)
Browse files Browse the repository at this point in the history
* JIT implementation for SHA instructions

* Fix flags

* Add `cpuid` check for SHA (29th bit)

* Add EnableSHA config value

* Add incomplete CodeGen method called `genSHAIntrinsic`
  • Loading branch information
deeprobin authored Jan 3, 2022
1 parent d9953f7 commit 89c89dd
Show file tree
Hide file tree
Showing 12 changed files with 226 additions and 70 deletions.
106 changes: 72 additions & 34 deletions src/coreclr/inc/corinfoinstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,23 +57,27 @@ enum CORINFO_InstructionSet
InstructionSet_Vector128=17,
InstructionSet_Vector256=18,
InstructionSet_AVXVNNI=19,
InstructionSet_X86Base_X64=20,
InstructionSet_SSE_X64=21,
InstructionSet_SSE2_X64=22,
InstructionSet_SSE3_X64=23,
InstructionSet_SSSE3_X64=24,
InstructionSet_SSE41_X64=25,
InstructionSet_SSE42_X64=26,
InstructionSet_AVX_X64=27,
InstructionSet_AVX2_X64=28,
InstructionSet_AES_X64=29,
InstructionSet_BMI1_X64=30,
InstructionSet_BMI2_X64=31,
InstructionSet_FMA_X64=32,
InstructionSet_LZCNT_X64=33,
InstructionSet_PCLMULQDQ_X64=34,
InstructionSet_POPCNT_X64=35,
InstructionSet_AVXVNNI_X64=36,
InstructionSet_Sha1=20,
InstructionSet_Sha256=21,
InstructionSet_SHA=22,
InstructionSet_X86Base_X64=23,
InstructionSet_SSE_X64=24,
InstructionSet_SSE2_X64=25,
InstructionSet_SSE3_X64=26,
InstructionSet_SSSE3_X64=27,
InstructionSet_SSE41_X64=28,
InstructionSet_SSE42_X64=29,
InstructionSet_AVX_X64=30,
InstructionSet_AVX2_X64=31,
InstructionSet_AES_X64=32,
InstructionSet_BMI1_X64=33,
InstructionSet_BMI2_X64=34,
InstructionSet_FMA_X64=35,
InstructionSet_LZCNT_X64=36,
InstructionSet_PCLMULQDQ_X64=37,
InstructionSet_POPCNT_X64=38,
InstructionSet_AVXVNNI_X64=39,
InstructionSet_SHA_X64=40,
#endif // TARGET_AMD64
#ifdef TARGET_X86
InstructionSet_X86Base=1,
Expand All @@ -95,23 +99,27 @@ enum CORINFO_InstructionSet
InstructionSet_Vector128=17,
InstructionSet_Vector256=18,
InstructionSet_AVXVNNI=19,
InstructionSet_X86Base_X64=20,
InstructionSet_SSE_X64=21,
InstructionSet_SSE2_X64=22,
InstructionSet_SSE3_X64=23,
InstructionSet_SSSE3_X64=24,
InstructionSet_SSE41_X64=25,
InstructionSet_SSE42_X64=26,
InstructionSet_AVX_X64=27,
InstructionSet_AVX2_X64=28,
InstructionSet_AES_X64=29,
InstructionSet_BMI1_X64=30,
InstructionSet_BMI2_X64=31,
InstructionSet_FMA_X64=32,
InstructionSet_LZCNT_X64=33,
InstructionSet_PCLMULQDQ_X64=34,
InstructionSet_POPCNT_X64=35,
InstructionSet_AVXVNNI_X64=36,
InstructionSet_Sha1=20,
InstructionSet_Sha256=21,
InstructionSet_SHA=22,
InstructionSet_X86Base_X64=23,
InstructionSet_SSE_X64=24,
InstructionSet_SSE2_X64=25,
InstructionSet_SSE3_X64=26,
InstructionSet_SSSE3_X64=27,
InstructionSet_SSE41_X64=28,
InstructionSet_SSE42_X64=29,
InstructionSet_AVX_X64=30,
InstructionSet_AVX2_X64=31,
InstructionSet_AES_X64=32,
InstructionSet_BMI1_X64=33,
InstructionSet_BMI2_X64=34,
InstructionSet_FMA_X64=35,
InstructionSet_LZCNT_X64=36,
InstructionSet_PCLMULQDQ_X64=37,
InstructionSet_POPCNT_X64=38,
InstructionSet_AVXVNNI_X64=39,
InstructionSet_SHA_X64=40,
#endif // TARGET_X86

};
Expand Down Expand Up @@ -211,6 +219,8 @@ struct CORINFO_InstructionSetFlags
AddInstructionSet(InstructionSet_POPCNT_X64);
if (HasInstructionSet(InstructionSet_AVXVNNI))
AddInstructionSet(InstructionSet_AVXVNNI_X64);
if (HasInstructionSet(InstructionSet_SHA))
AddInstructionSet(InstructionSet_SHA_X64);
#endif // TARGET_AMD64
#ifdef TARGET_X86
#endif // TARGET_X86
Expand Down Expand Up @@ -352,6 +362,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI);
if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI))
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI_X64);
if (resultflags.HasInstructionSet(InstructionSet_SHA) && !resultflags.HasInstructionSet(InstructionSet_SHA_X64))
resultflags.RemoveInstructionSet(InstructionSet_SHA);
if (resultflags.HasInstructionSet(InstructionSet_SHA_X64) && !resultflags.HasInstructionSet(InstructionSet_SHA))
resultflags.RemoveInstructionSet(InstructionSet_SHA_X64);
if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_SSE);
if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE))
Expand Down Expand Up @@ -382,6 +396,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_POPCNT);
if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX))
resultflags.RemoveInstructionSet(InstructionSet_Vector256);
if (resultflags.HasInstructionSet(InstructionSet_Sha1) && !resultflags.HasInstructionSet(InstructionSet_SHA))
resultflags.RemoveInstructionSet(InstructionSet_Sha1);
if (resultflags.HasInstructionSet(InstructionSet_Sha256) && !resultflags.HasInstructionSet(InstructionSet_SHA))
resultflags.RemoveInstructionSet(InstructionSet_Sha256);
#endif // TARGET_AMD64
#ifdef TARGET_X86
if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
Expand Down Expand Up @@ -414,6 +432,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_POPCNT);
if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX))
resultflags.RemoveInstructionSet(InstructionSet_Vector256);
if (resultflags.HasInstructionSet(InstructionSet_Sha1) && !resultflags.HasInstructionSet(InstructionSet_SHA))
resultflags.RemoveInstructionSet(InstructionSet_Sha1);
if (resultflags.HasInstructionSet(InstructionSet_Sha256) && !resultflags.HasInstructionSet(InstructionSet_SHA))
resultflags.RemoveInstructionSet(InstructionSet_Sha256);
#endif // TARGET_X86

} while (!oldflags.Equals(resultflags));
Expand Down Expand Up @@ -544,6 +566,14 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "AVXVNNI";
case InstructionSet_AVXVNNI_X64 :
return "AVXVNNI_X64";
case InstructionSet_Sha1 :
return "Sha1";
case InstructionSet_Sha256 :
return "Sha256";
case InstructionSet_SHA :
return "SHA";
case InstructionSet_SHA_X64 :
return "SHA_X64";
#endif // TARGET_AMD64
#ifdef TARGET_X86
case InstructionSet_X86Base :
Expand Down Expand Up @@ -584,6 +614,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "Vector256";
case InstructionSet_AVXVNNI :
return "AVXVNNI";
case InstructionSet_Sha1 :
return "Sha1";
case InstructionSet_Sha256 :
return "Sha256";
case InstructionSet_SHA :
return "SHA";
#endif // TARGET_X86

default:
Expand Down Expand Up @@ -632,6 +668,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ;
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT;
case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI;
case READYTORUN_INSTRUCTION_Sha: return InstructionSet_SHA;
#endif // TARGET_AMD64
#ifdef TARGET_X86
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
Expand All @@ -651,6 +688,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ;
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT;
case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI;
case READYTORUN_INSTRUCTION_Sha: return InstructionSet_SHA;
#endif // TARGET_X86

default:
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_Dp=23,
READYTORUN_INSTRUCTION_Rdm=24,
READYTORUN_INSTRUCTION_AvxVnni=25,
READYTORUN_INSTRUCTION_Sha=26,

};

Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ bool emitter::IsBMIInstruction(instruction ins)
return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
}

bool emitter::IsSHAInstruction(instruction ins)
{
return (ins >= INS_sha1msg1) && (ins <= INS_sha256rnds2);
}

regNumber emitter::getBmiRegNumber(instruction ins)
{
switch (ins)
Expand Down
19 changes: 19 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case InstructionSet_BMI2_X64:
genBMI1OrBMI2Intrinsic(node);
break;
case InstructionSet_SHA:
genSHAIntrinsic(node);
break;
case InstructionSet_FMA:
genFMAIntrinsic(node);
break;
Expand Down Expand Up @@ -2023,6 +2026,22 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node)
genProduceReg(node);
}

void CodeGen::genSHAIntrinsic(GenTreeHWIntrinsic* node) {
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
regNumber targetReg = node->GetRegNum();
var_types targetType = node->TypeGet();
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType);
emitter* emit = GetEmitter();

assert(targetReg != REG_NA);

// TODO: Generate SHA Intrinsic

genConsumeMultiOpOperands(node);

genProduceReg(node);
}

//------------------------------------------------------------------------
// genFMAIntrinsic: Generates the code for an FMA hardware intrinsic node
//
Expand Down
19 changes: 19 additions & 0 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,25 @@ HARDWARE_INTRINSIC(SSE2, UCOMISD,
HARDWARE_INTRINSIC(SSE41, PTEST, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX, PTEST, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// Sha1 Intrinsics
HARDWARE_INTRINSIC(Sha1, MessageSchedule1, 16, 2, {INS_invalid, INS_sha1msg1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(Sha1, MessageSchedule2, 16, 2, {INS_invalid, INS_sha1msg2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(Sha1, NextE, 16, 2, {INS_invalid, INS_sha1nexte, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(Sha1, FourRounds, 16, 3, {INS_invalid, INS_sha1rnds4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// Sha256 Intrinsics
HARDWARE_INTRINSIC(Sha256, MessageSchedule1, 16, 2, {INS_invalid, INS_sha256msg1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(Sha256, MessageSchedule2, 16, 2, {INS_invalid, INS_sha256msg2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(Sha256, TwoRounds, 16, 3, {INS_invalid, INS_sha256rnds2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)

#endif // FEATURE_HW_INTRINSIC

#undef HARDWARE_INTRINSIC
Expand Down
Loading

0 comments on commit 89c89dd

Please sign in to comment.