diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp index 4fa386c8fed04..034e309634ebd 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp @@ -335,3 +335,46 @@ void PutThumb2BlRel24(UINT16* p, INT32 imm24) p[0] = Opcode0; p[1] = Opcode1; } + +// GetArm64MovConstant / GetArm64MovkConstant: Decode arm64 mov / movk instructions, e.g.: +// d29ff600 mov x0, #65456 +// f2ab8640 movk x0, #23602, lsl #16 +// f2c04bc0 movk x0, #606, lsl #32 +// +// This is used in the NearDiffer to determine if a sequence of mov/movk is actually an address. +// +// Return `true` if the instruction pointed to by `p` is a mov/movk, `false` otherwise. +// If true, fill out the target register in `*pReg`, constant in `*pCon`, and (for movk) shift value in `*pShift`. + +bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon) +{ + UINT32 instr = *p; + if ((instr & 0xffe00000) == 0xd2800000) + { + *pReg = instr & 0x1f; + *pCon = (instr >> 5) & 0xffff; + return true; + } + + return false; +} + +bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift) +{ + UINT32 instr = *p; + if ((instr & 0xff800000) == 0xf2800000) + { + *pReg = instr & 0x1f; + *pCon = (instr >> 5) & 0xffff; + *pShift = ((instr >> 21) & 0x3) * 16; + return true; + } + + return false; +} + +// PutArm64MovkConstant: set the constant field in an Arm64 `movk` instruction +void PutArm64MovkConstant(UINT32* p, unsigned con) +{ + *p = (*p & ~(0xffff << 5)) | ((con & 0xffff) << 5); +} diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h index aa5605576625c..f414e9be3623c 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h @@ -82,6 +82,11 @@ void PutArm64Rel12(UINT32* pCode, INT32 imm12); void PutThumb2Mov32(UINT16* p, UINT32 imm32); void PutThumb2BlRel24(UINT16* p, INT32 imm24); +bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon); +bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift); + +void PutArm64MovkConstant(UINT32* p, unsigned con); + template inline constexpr unsigned ArrLen(T (&)[size]) { diff --git a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp index 2d480ee776dc5..1ca4d1bbd334e 100644 --- a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp +++ b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp @@ -298,22 +298,24 @@ struct DiffData CompileResult* cr2; // Details of the first block - size_t blocksize1; - size_t datablock1; - size_t datablockSize1; - size_t originalBlock1; - size_t originalDataBlock1; - size_t otherCodeBlock1; - size_t otherCodeBlockSize1; + unsigned char* block1; + size_t blocksize1; + unsigned char* datablock1; + size_t datablockSize1; + size_t originalBlock1; + size_t originalDataBlock1; + size_t otherCodeBlock1; + size_t otherCodeBlockSize1; // Details of the second block - size_t blocksize2; - size_t datablock2; - size_t datablockSize2; - size_t originalBlock2; - size_t originalDataBlock2; - size_t otherCodeBlock2; - size_t otherCodeBlockSize2; + unsigned char* block2; + size_t blocksize2; + unsigned char* datablock2; + size_t datablockSize2; + size_t originalBlock2; + size_t originalDataBlock2; + size_t otherCodeBlock2; + size_t otherCodeBlockSize2; }; // @@ -330,6 +332,7 @@ bool NearDiffer::compareOffsets( return true; } + const SPMI_TARGET_ARCHITECTURE targetArch = GetSpmiTargetArchitecture(); const DiffData* data = (const DiffData*)payload; size_t ip1 = data->originalBlock1 + blockOffset; size_t ip2 = data->originalBlock2 + blockOffset; @@ -435,6 +438,140 @@ bool NearDiffer::compareOffsets( if ((mapped1 == mapped2) && (mapped1 != (size_t)-1)) return true; + // There are some cases on arm64 where we generate multiple instruction register construction of addresses + // but we don't have a relocation for them (so they aren't handled by `applyRelocs`). One case is + // allocPgoInstrumentationBySchema(), which returns an address the JIT writes into the code stream + // (used to store dynamic PGO probe data). + // + // The instruction sequence is something like this: + // mov x0, #63408 + // movk x0, #23602, lsl #16 + // movk x0, #606, lsl #32 + // + // Here, we try to match this sequence and look it up in the address map. + // + // Since the mov/movk sequence is specific to the replay address constant, we don't assume the baseline + // and diff have the same number of instructions (e.g., it's possible to skip a `movk` if it is zero). + // + // Some version of this logic might apply to ARM as well. + // + if (targetArch == SPMI_TARGET_ARCHITECTURE_ARM64) + { + bool movk2_1 = false, movk3_1 = false; + bool movk2_2 = false, movk3_2 = false; + + unsigned reg1_1 = 0, reg2_1, reg3_1, reg4_1; + unsigned reg1_2 = 0, reg2_2, reg3_2, reg4_2; + unsigned con1_1, con2_1, con3_1, con4_1; + unsigned con1_2, con2_2, con3_2, con4_2; + unsigned shift2_1, shift3_1, shift4_1; + unsigned shift2_2, shift3_2, shift4_2; + + UINT32* iaddr1 = (UINT32*)(data->block1 + blockOffset); + UINT32* iaddr2 = (UINT32*)(data->block2 + blockOffset); + UINT32* iaddr1end = (UINT32*)(data->block1 + data->blocksize1); + UINT32* iaddr2end = (UINT32*)(data->block2 + data->blocksize2); + + DWORDLONG addr1 = 0; + DWORDLONG addr2 = 0; + + // Look for a mov/movk address pattern in code stream 1. + + if ((iaddr1 < iaddr1end) && + GetArm64MovConstant(iaddr1, ®1_1, &con1_1)) + { + // We assume the address requires at least 1 'movk' instruction. + if ((iaddr1 + 1 < iaddr1end) && + GetArm64MovkConstant(iaddr1 + 1, ®2_1, &con2_1, &shift2_1) && + (reg1_1 == reg2_1)) + { + addr1 = (DWORDLONG)con1_1 + ((DWORDLONG)con2_1 << shift2_1); + + if ((iaddr1 + 2 < iaddr1end) && + GetArm64MovkConstant(iaddr1 + 2, ®3_1, &con3_1, &shift3_1) && + (reg1_1 == reg3_1)) + { + movk2_1 = true; + addr1 += (DWORDLONG)con3_1 << shift3_1; + + if ((iaddr1 + 3 < iaddr1end) && + GetArm64MovkConstant(iaddr1 + 3, ®4_1, &con4_1, &shift4_1) && + (reg1_1 == reg4_1)) + { + movk3_1 = true; + addr1 += (DWORDLONG)con4_1 << shift4_1; + } + } + } + } + + // Look for a mov/movk address pattern in code stream 2. + + if ((iaddr2 < iaddr2end) && + GetArm64MovConstant(iaddr2, ®1_2, &con1_2)) + { + // We assume the address requires at least 1 'movk' instruction. + if ((iaddr2 + 1 < iaddr2end) && + GetArm64MovkConstant(iaddr2 + 1, ®2_2, &con2_2, &shift2_2) && + (reg1_2 == reg2_2)) + { + addr2 = (DWORDLONG)con1_2 + ((DWORDLONG)con2_2 << shift2_2); + + if ((iaddr2 + 2 < iaddr2end) && + GetArm64MovkConstant(iaddr2 + 2, ®3_2, &con3_2, &shift3_2) && + (reg1_2 == reg3_2)) + { + movk2_2 = true; + addr2 += (DWORDLONG)con3_2 << shift3_2; + + if ((iaddr2 + 3 < iaddr2end) && + GetArm64MovkConstant(iaddr2 + 3, ®4_2, &con4_2, &shift4_2) && + (reg1_2 == reg4_2)) + { + movk3_2 = true; + addr2 += (DWORDLONG)con4_2 << shift4_2; + } + } + } + } + + // Check the constants. We don't need to check 'addr1 == addr2' because if that were + // true we wouldn't have gotten here. + // + // Note: when replaying on a 32-bit platform, we must have + // movk2_1 == movk2_2 == movk3_1 == movk3_2 == false + + if ((addr1 != 0) && (addr2 != 0) && (reg1_1 == reg1_2)) + { + DWORDLONG mapped1 = (DWORDLONG)data->cr1->searchAddressMap((void*)addr1); + DWORDLONG mapped2 = (DWORDLONG)data->cr2->searchAddressMap((void*)addr2); + if ((mapped1 == mapped2) && (mapped1 != (DWORDLONG)-1)) + { + // Now, zero out the constants in the `movk` instructions so when the disassembler + // gets to them, they compare equal. + PutArm64MovkConstant(iaddr1 + 1, 0); + PutArm64MovkConstant(iaddr2 + 1, 0); + if (movk2_1) + { + PutArm64MovkConstant(iaddr1 + 2, 0); + } + if (movk2_2) + { + PutArm64MovkConstant(iaddr2 + 2, 0); + } + if (movk3_1) + { + PutArm64MovkConstant(iaddr1 + 3, 0); + } + if (movk3_2) + { + PutArm64MovkConstant(iaddr2 + 3, 0); + } + return true; + } + } + } + return false; } @@ -513,11 +650,11 @@ bool NearDiffer::compareCodeSection(MethodContext* mc, cr2, // Details of the first block - (size_t)blocksize1, (size_t)datablock1, (size_t)datablockSize1, (size_t)originalBlock1, + block1, (size_t)blocksize1, datablock1, (size_t)datablockSize1, (size_t)originalBlock1, (size_t)originalDataBlock1, (size_t)otherCodeBlock1, (size_t)otherCodeBlockSize1, // Details of the second block - (size_t)blocksize2, (size_t)datablock2, (size_t)datablockSize2, (size_t)originalBlock2, + block2, (size_t)blocksize2, datablock2, (size_t)datablockSize2, (size_t)originalBlock2, (size_t)originalDataBlock2, (size_t)otherCodeBlock2, (size_t)otherCodeBlockSize2}; #ifdef USE_COREDISTOOLS