dotnet · BruceForstall · Oct 5, 2022 · Oct 4, 2022 · Oct 4, 2022 · Oct 4, 2022
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp
@@ -335,3 +335,46 @@ void PutThumb2BlRel24(UINT16* p, INT32 imm24)
     p[0] = Opcode0;
     p[1] = Opcode1;
 }
+
+// GetArm64MovConstant / GetArm64MovkConstant: Decode arm64 mov / movk instructions, e.g.:
+//    d29ff600 mov     x0, #65456
+//    f2ab8640 movk    x0, #23602, lsl #16
+//    f2c04bc0 movk    x0, #606, lsl #32
+//
+// This is used in the NearDiffer to determine if a sequence of mov/movk is actually an address.
+//
+// Return `true` if the instruction pointed to by `p` is a mov/movk, `false` otherwise.
+// If true, fill out the target register in `*pReg`, constant in `*pCon`, and (for movk) shift value in `*pShift`.
+
+bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon)
+{
+    UINT32 instr = *p;
+    if ((instr & 0xffe00000) == 0xd2800000)
+    {
+        *pReg = instr & 0x1f;
+        *pCon = (instr >> 5) & 0xffff;
+        return true;
+    }
+
+    return false;
+}
+
+bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift)
+{
+    UINT32 instr = *p;
+    if ((instr & 0xff800000) == 0xf2800000)
+    {
+        *pReg = instr & 0x1f;
+        *pCon = (instr >> 5) & 0xffff;
+        *pShift = ((instr >> 21) & 0x3) * 16;
+        return true;
+    }
+
+    return false;
+}
+
+// PutArm64MovkConstant: set the constant field in an Arm64 `movk` instruction
+void PutArm64MovkConstant(UINT32* p, unsigned con)
+{
+    *p = (*p & ~(0xffff << 5)) | ((con & 0xffff) << 5);
+}
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h
@@ -82,6 +82,11 @@ void PutArm64Rel12(UINT32* pCode, INT32 imm12);
 void PutThumb2Mov32(UINT16* p, UINT32 imm32);
 void PutThumb2BlRel24(UINT16* p, INT32 imm24);
 
+bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon);
+bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift);
+
+void PutArm64MovkConstant(UINT32* p, unsigned con);
+
 template <typename T, int size>
 inline constexpr unsigned ArrLen(T (&)[size])
 {

diff --git a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp
@@ -298,22 +298,24 @@ struct DiffData
     CompileResult* cr2;
 
     // Details of the first block
-    size_t blocksize1;
-    size_t datablock1;
-    size_t datablockSize1;
-    size_t originalBlock1;
-    size_t originalDataBlock1;
-    size_t otherCodeBlock1;
-    size_t otherCodeBlockSize1;
+    unsigned char* block1;
+    size_t         blocksize1;
+    unsigned char* datablock1;
+    size_t         datablockSize1;
+    size_t         originalBlock1;
+    size_t         originalDataBlock1;
+    size_t         otherCodeBlock1;
+    size_t         otherCodeBlockSize1;
 
     // Details of the second block
-    size_t blocksize2;
-    size_t datablock2;
-    size_t datablockSize2;
-    size_t originalBlock2;
-    size_t originalDataBlock2;
-    size_t otherCodeBlock2;
-    size_t otherCodeBlockSize2;
+    unsigned char* block2;
+    size_t         blocksize2;
+    unsigned char* datablock2;
+    size_t         datablockSize2;
+    size_t         originalBlock2;
+    size_t         originalDataBlock2;
+    size_t         otherCodeBlock2;
+    size_t         otherCodeBlockSize2;
 };
 
 //
@@ -330,6 +332,7 @@ bool NearDiffer::compareOffsets(
         return true;
     }
 
+    const SPMI_TARGET_ARCHITECTURE targetArch = GetSpmiTargetArchitecture();
     const DiffData* data         = (const DiffData*)payload;
     size_t          ip1          = data->originalBlock1 + blockOffset;
     size_t          ip2          = data->originalBlock2 + blockOffset;
@@ -435,6 +438,103 @@ bool NearDiffer::compareOffsets(
     if ((mapped1 == mapped2) && (mapped1 != (size_t)-1))
         return true;
 
+    // There are some cases on arm64 where we generate multiple instruction register construction of addresses
+    // but we don't have a relocation for them (so they aren't handled by `applyRelocs`). One case is
+    // allocPgoInstrumentationBySchema(), which returns an address into which the JIT writes PGO probe data.
+    // The instruction sequence is something like this:
+    //     mov     x0, #63408
+    //     movk    x0, #23602, lsl #16
+    //     movk    x0, #606, lsl #32
+    //
+    // Here, we try to match this sequence and look it up in the address map.
+    // 
+    // Some version of this logic might apply to ARM as well.
+    //
+    if (targetArch == SPMI_TARGET_ARCHITECTURE_ARM64)
+    {
+        bool movk2 = false, movk3 = false;
+        unsigned reg1_1, reg1_2, reg2_1, reg2_2, reg3_1, reg3_2, reg4_1, reg4_2;
+        unsigned con1_1, con1_2, con2_1, con2_2, con3_1, con3_2, con4_1, con4_2;
+        unsigned shift2_1, shift2_2, shift3_1, shift3_2, shift4_1, shift4_2;
+        UINT32* iaddr1    = (UINT32*)(data->block1 + blockOffset);
+        UINT32* iaddr2    = (UINT32*)(data->block2 + blockOffset);
+        UINT32* iaddr1end = (UINT32*)(data->block1 + data->blocksize1);
+        UINT32* iaddr2end = (UINT32*)(data->block2 + data->blocksize2);
+
+        // We're assuming that a mov/movk isn't the last instruction in the instruction buffer.
+        if ((iaddr1 < iaddr1end) &&
+            (iaddr2 < iaddr2end) &&
+            GetArm64MovConstant(iaddr1, &reg1_1, &con1_1) &&
+            GetArm64MovConstant(iaddr2, &reg1_2, &con1_2) &&
+            (reg1_1 == reg1_2))
+        {
+            if ((iaddr1 + 1 < iaddr1end) &&
+                (iaddr2 + 1 < iaddr2end) &&
+                GetArm64MovkConstant(iaddr1 + 1, &reg2_1, &con2_1, &shift2_1) &&
+                GetArm64MovkConstant(iaddr2 + 1, &reg2_2, &con2_2, &shift2_2) &&
+                (reg2_1 == reg2_2) &&
+                (shift2_1 == shift2_2) &&
+                (shift2_1 == 16))
+            {
+                DWORDLONG addr1 = (DWORDLONG)con1_1 + ((DWORDLONG)con2_1 << 16);
+                DWORDLONG addr2 = (DWORDLONG)con1_2 + ((DWORDLONG)con2_2 << 16);
+
+                // We currently assume the address requires at least 1 'movk' instruction.
+                if ((iaddr1 + 2 < iaddr1end) &&
+                    (iaddr2 + 2 < iaddr2end) &&
+                    GetArm64MovkConstant(iaddr1 + 2, &reg3_1, &con3_1, &shift3_1) &&
+                    GetArm64MovkConstant(iaddr2 + 2, &reg3_2, &con3_2, &shift3_2) &&
+                    (reg3_1 == reg3_2) &&
+                    (shift3_1 == shift3_2) &&
+                    (shift3_1 == 32))
+                {
+                    movk2 = true;
+                    addr1 += (DWORDLONG)con3_1 << 32;
+                    addr2 += (DWORDLONG)con3_2 << 32;
+
+                    if ((iaddr1 + 3 < iaddr1end) &&
+                        (iaddr2 + 3 < iaddr2end) &&
+                        GetArm64MovkConstant(iaddr1 + 3, &reg4_1, &con4_1, &shift4_1) &&
+                        GetArm64MovkConstant(iaddr2 + 3, &reg4_2, &con4_2, &shift4_2) &&
+                        (reg4_1 == reg4_2) &&
+                        (shift4_1 == shift4_2) &&
+                        (shift4_1 == 48))
+                    {
+                        movk3 = true;
+                        addr1 += (DWORDLONG)con4_1 << 48;
+                        addr2 += (DWORDLONG)con4_2 << 48;
+                    }
+                }
+
+                // Check the constants! We don't need to check 'addr1 == addr2' because if that were
+                // true we wouldn't have gotten here.
+                //
+                // Note: when replaying on a 32-bit platform, we must have movk2==false and movk3==false.
+
+                DWORDLONG mapped1 = (DWORDLONG)data->cr1->searchAddressMap((void*)addr1);
+                DWORDLONG mapped2 = (DWORDLONG)data->cr2->searchAddressMap((void*)addr2);
+                if ((mapped1 == mapped2) && (mapped1 != (DWORDLONG)-1))
+                {
+                    // Now, zero out the constants in the `movk` instructions so when the disassembler
+                    // gets to them, they compare equal.
+                    PutArm64MovkConstant(iaddr1 + 1, 0);
+                    PutArm64MovkConstant(iaddr2 + 1, 0);
+                    if (movk2)
+                    {
+                        PutArm64MovkConstant(iaddr1 + 2, 0);
+                        PutArm64MovkConstant(iaddr2 + 2, 0);
+                    }
+                    if (movk3)
+                    {
+                        PutArm64MovkConstant(iaddr1 + 3, 0);
+                        PutArm64MovkConstant(iaddr2 + 3, 0);
+                    }
+                    return true;
+                }
+            }
+        }
+    }
+
     return false;
 }
 
@@ -513,11 +613,11 @@ bool NearDiffer::compareCodeSection(MethodContext* mc,
                      cr2,
 
                      // Details of the first block
-                     (size_t)blocksize1, (size_t)datablock1, (size_t)datablockSize1, (size_t)originalBlock1,
+                     block1, (size_t)blocksize1, datablock1, (size_t)datablockSize1, (size_t)originalBlock1,
                      (size_t)originalDataBlock1, (size_t)otherCodeBlock1, (size_t)otherCodeBlockSize1,
 
                      // Details of the second block
-                     (size_t)blocksize2, (size_t)datablock2, (size_t)datablockSize2, (size_t)originalBlock2,
+                     block2, (size_t)blocksize2, datablock2, (size_t)datablockSize2, (size_t)originalBlock2,
                      (size_t)originalDataBlock2, (size_t)otherCodeBlock2, (size_t)otherCodeBlockSize2};
 
 #ifdef USE_COREDISTOOLS