Skip to content

Commit

Permalink
[AArch64][GlobalISel] Reland Make G_DUP immediate 32-bits or larger (l…
Browse files Browse the repository at this point in the history
…lvm#96780)

Immediate operand gets extended in RegBankSelect to at least 32 bits to allow
for better pattern matching in TableGen

The previous patch was erasing a constant without checking if it has
more than one use

Changes:
 - Does not erase the constant
 - Added @v_dup16_const test
  • Loading branch information
chuongg3 committed Jul 17, 2024
1 parent 3a0e015 commit 2b331a6
Show file tree
Hide file tree
Showing 7 changed files with 137 additions and 144 deletions.
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5559,7 +5559,8 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
}

if (CV->getSplatValue()) {
APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
APInt DefBits = APInt::getSplat(
DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits()));
auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
MachineInstr *NewOp;
bool Inv = false;
Expand Down
35 changes: 32 additions & 3 deletions llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "AArch64GenRegisterBankInfo.def"

using namespace llvm;
static const unsigned CustomMappingID = 1;

AArch64RegisterBankInfo::AArch64RegisterBankInfo(
const TargetRegisterInfo &TRI) {
Expand Down Expand Up @@ -424,6 +425,26 @@ void AArch64RegisterBankInfo::applyMappingImpl(
MI.getOperand(2).setReg(Ext.getReg(0));
return applyDefaultMapping(OpdMapper);
}
case AArch64::G_DUP: {
// Extend smaller gpr to 32-bits
assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 &&
"Expected sources smaller than 32-bits");
Builder.setInsertPt(*MI.getParent(), MI.getIterator());

Register ConstReg;
auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg());
if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) {
auto CstVal = ConstMI->getOperand(1).getCImm()->getValue();
ConstReg =
Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0);
} else {
ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg())
.getReg(0);
}
MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID));
MI.getOperand(1).setReg(ConstReg);
return applyDefaultMapping(OpdMapper);
}
default:
llvm_unreachable("Don't know how to handle that operation");
}
Expand Down Expand Up @@ -792,8 +813,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
(getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
onlyDefinesFP(*ScalarDef, MRI, TRI)))
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
else
else {
if (ScalarTy.getSizeInBits() < 32 &&
getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) {
// Calls applyMappingImpl()
MappingID = CustomMappingID;
}
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
}
break;
}
case TargetOpcode::G_TRUNC: {
Expand Down Expand Up @@ -1014,8 +1041,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// If the type is i8/i16, and the regank will be GPR, then we change the
// type to i32 in applyMappingImpl.
LLT Ty = MRI.getType(MI.getOperand(2).getReg());
if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16)
MappingID = 1;
if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) {
// Calls applyMappingImpl()
MappingID = CustomMappingID;
}
OpRegBankIdx[2] = PMI_FirstGPR;
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
;
; GISEL-LABEL: combine_vec_udiv_uniform:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, .LCPI0_0
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
; GISEL-NEXT: mov w8, #25645 // =0x642d
; GISEL-NEXT: dup v1.8h, w8
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
Expand Down
75 changes: 42 additions & 33 deletions llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ body: |
; CHECK-LABEL: name: v4s32_gpr
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%4:_(<4 x s32>) = G_DUP %0(s32)
$q0 = COPY %4(<4 x s32>)
Expand All @@ -37,10 +38,11 @@ body: |
; CHECK-LABEL: name: v4s64_gpr
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $x0
%4:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %4(<2 x s64>)
Expand All @@ -58,10 +60,11 @@ body: |
; CHECK-LABEL: name: v2s32_gpr
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
; CHECK: RET_ReallyLR implicit $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s32) = COPY $w0
%4:_(<2 x s32>) = G_DUP %0(s32)
$d0 = COPY %4(<2 x s32>)
Expand All @@ -79,10 +82,11 @@ body: |
; CHECK-LABEL: name: v4s32_fpr
; CHECK: liveins: $s0
; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $s0
%4:_(<4 x s32>) = G_DUP %0(s32)
$q0 = COPY %4(<4 x s32>)
Expand All @@ -100,10 +104,11 @@ body: |
; CHECK-LABEL: name: v2s64_fpr
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $d0
%4:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %4(<2 x s64>)
Expand All @@ -121,10 +126,11 @@ body: |
; CHECK-LABEL: name: v2s32_fpr
; CHECK: liveins: $s0
; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
; CHECK: RET_ReallyLR implicit $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s32) = COPY $s0
%4:_(<2 x s32>) = G_DUP %0(s32)
$d0 = COPY %4(<2 x s32>)
Expand All @@ -142,10 +148,11 @@ body: |
; CHECK-LABEL: name: v2s64_fpr_copy
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $d0
%6:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %6(<2 x s64>)
Expand All @@ -163,11 +170,13 @@ body: |
; CHECK-LABEL: name: v416s8_gpr
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
; CHECK: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP %trunc(s8)
; CHECK: $q0 = COPY [[DUP]](<16 x s8>)
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK-NEXT: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT %trunc(s8)
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP [[ANYEXT]](s32)
; CHECK-NEXT: $q0 = COPY [[DUP]](<16 x s8>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%trunc:_(s8) = G_TRUNC %0(s32)
%1:_(<16 x s8>) = G_DUP %trunc(s8)
Expand Down
34 changes: 9 additions & 25 deletions llvm/test/CodeGen/AArch64/aarch64-smull.ll
Original file line number Diff line number Diff line change
Expand Up @@ -994,9 +994,9 @@ define <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
;
; CHECK-GI-LABEL: smull_noextvec_v8i8_v8i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI34_0
; CHECK-GI-NEXT: mov w8, #-999 // =0xfffffc19
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NEXT: dup v1.8h, w8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
%tmp3 = sext <8 x i8> %arg to <8 x i16>
Expand Down Expand Up @@ -1088,29 +1088,13 @@ define <8 x i16> @umull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {

define <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
; Do not use SMULL if the BUILD_VECTOR element values are too big.
; CHECK-NEON-LABEL: umull_noextvec_v8i8_v8i16:
; CHECK-NEON: // %bb.0:
; CHECK-NEON-NEXT: mov w8, #999 // =0x3e7
; CHECK-NEON-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEON-NEXT: dup v1.8h, w8
; CHECK-NEON-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-NEON-NEXT: ret
;
; CHECK-SVE-LABEL: umull_noextvec_v8i8_v8i16:
; CHECK-SVE: // %bb.0:
; CHECK-SVE-NEXT: mov w8, #999 // =0x3e7
; CHECK-SVE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-SVE-NEXT: dup v1.8h, w8
; CHECK-SVE-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: umull_noextvec_v8i8_v8i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI38_0
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI38_0]
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
; CHECK-LABEL: umull_noextvec_v8i8_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #999 // =0x3e7
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp3 = zext <8 x i8> %arg to <8 x i16>
%tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
ret <8 x i16> %tmp4
Expand Down
53 changes: 33 additions & 20 deletions llvm/test/CodeGen/AArch64/arm64-dup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,19 @@ define <4 x i32> @v_dupQ32(i32 %A) nounwind {
ret <4 x i32> %tmp4
}

define <4 x i16> @v_dup16_const(i16 %y, ptr %p) {
; CHECK-LABEL: v_dup16_const:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.4h v0, #10
; CHECK-NEXT: mov w8, #10 // =0xa
; CHECK-NEXT: strh w8, [x1]
; CHECK-NEXT: ret
%i = insertelement <4 x i16> undef, i16 10, i32 0
%lo = shufflevector <4 x i16> %i, <4 x i16> undef, <4 x i32> zeroinitializer
store i16 10, ptr %p
ret <4 x i16> %lo
}

define <4 x float> @v_dupQfloat(float %A) nounwind {
; CHECK-LABEL: v_dupQfloat:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -420,9 +433,9 @@ define <4 x i16> @test_perfectshuffle_dupext_v4i16(<4 x i16> %a, <4 x i16> %b) n
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: adrp x8, .LCPI33_0
; CHECK-GI-NEXT: adrp x8, .LCPI34_0
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI33_0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
Expand All @@ -443,9 +456,9 @@ define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: adrp x8, .LCPI34_0
; CHECK-GI-NEXT: adrp x8, .LCPI35_0
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI35_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
Expand All @@ -462,9 +475,9 @@ define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) n
;
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI35_0
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
Expand All @@ -481,9 +494,9 @@ define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float>
;
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
; CHECK-GI-NEXT: adrp x8, .LCPI37_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
Expand All @@ -503,12 +516,12 @@ define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: disguised_dup:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI37_1
; CHECK-GI-NEXT: adrp x8, .LCPI38_1
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_1]
; CHECK-GI-NEXT: adrp x8, .LCPI37_0
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_1]
; CHECK-GI-NEXT: adrp x8, .LCPI38_0
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_0]
; CHECK-GI-NEXT: tbl.16b v2, { v0, v1 }, v2
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: str q2, [x1]
Expand All @@ -531,8 +544,8 @@ define <2 x i32> @dup_const2(<2 x i32> %A) nounwind {
;
; CHECK-GI-LABEL: dup_const2:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI38_0
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI38_0]
; CHECK-GI-NEXT: adrp x8, .LCPI39_0
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI39_0]
; CHECK-GI-NEXT: add.2s v0, v0, v1
; CHECK-GI-NEXT: ret
%tmp2 = add <2 x i32> %A, <i32 8421378, i32 8421378>
Expand All @@ -550,8 +563,8 @@ define <2 x i32> @dup_const4_ext(<4 x i32> %A) nounwind {
;
; CHECK-GI-LABEL: dup_const4_ext:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI39_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI39_0]
; CHECK-GI-NEXT: adrp x8, .LCPI40_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI40_0]
; CHECK-GI-NEXT: add.4s v0, v0, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
Expand All @@ -575,12 +588,12 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
;
; CHECK-GI-LABEL: dup_const24:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI40_1
; CHECK-GI-NEXT: adrp x8, .LCPI41_1
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI40_1]
; CHECK-GI-NEXT: adrp x8, .LCPI40_0
; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI41_1]
; CHECK-GI-NEXT: adrp x8, .LCPI41_0
; CHECK-GI-NEXT: add.2s v0, v0, v3
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI40_0]
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI41_0]
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
; CHECK-GI-NEXT: add.4s v1, v2, v3
; CHECK-GI-NEXT: eor.16b v0, v1, v0
Expand Down
Loading

0 comments on commit 2b331a6

Please sign in to comment.