diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 6a0bec58127d5a..9e0860934f777e 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -5551,8 +5551,7 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, } if (CV->getSplatValue()) { - APInt DefBits = APInt::getSplat( - DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits())); + APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger()); auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * { MachineInstr *NewOp; bool Inv = false; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index ff7152192fe35f..5616d063f70bcc 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -42,7 +42,6 @@ #include "AArch64GenRegisterBankInfo.def" using namespace llvm; -static const unsigned CustomMappingID = 1; AArch64RegisterBankInfo::AArch64RegisterBankInfo( const TargetRegisterInfo &TRI) { @@ -425,27 +424,6 @@ void AArch64RegisterBankInfo::applyMappingImpl( MI.getOperand(2).setReg(Ext.getReg(0)); return applyDefaultMapping(OpdMapper); } - case AArch64::G_DUP: { - // Extend smaller gpr to 32-bits - assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 && - "Expected sources smaller than 32-bits"); - Builder.setInsertPt(*MI.getParent(), MI.getIterator()); - - Register ConstReg; - auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg()); - if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) { - auto CstVal = ConstMI->getOperand(1).getCImm()->getValue(); - ConstReg = - Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0); - ConstMI->eraseFromParent(); - } else { - ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg()) - .getReg(0); - } - MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID)); - MI.getOperand(1).setReg(ConstReg); - return applyDefaultMapping(OpdMapper); - } default: llvm_unreachable("Don't know how to handle that operation"); } @@ -814,13 +792,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank || onlyDefinesFP(*ScalarDef, MRI, TRI))) OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; - else { - if (ScalarTy.getSizeInBits() < 32 && - getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) - // Calls applyMappingImpl() - MappingID = CustomMappingID; + else OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; - } break; } case TargetOpcode::G_TRUNC: { @@ -1042,8 +1015,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // type to i32 in applyMappingImpl. LLT Ty = MRI.getType(MI.getOperand(2).getReg()); if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) - // Calls applyMappingImpl() - MappingID = CustomMappingID; + MappingID = 1; OpRegBankIdx[2] = PMI_FirstGPR; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll index 2b9ef7acd4a4d7..c97a00ccdd4557 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll @@ -18,8 +18,8 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) { ; ; GISEL-LABEL: combine_vec_udiv_uniform: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #25645 // =0x642d -; GISEL-NEXT: dup v1.8h, w8 +; GISEL-NEXT: adrp x8, .LCPI0_0 +; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir index 66c8c2efda9bc1..4cd6eef531ce08 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir @@ -16,11 +16,10 @@ body: | ; CHECK-LABEL: name: v4s32_gpr ; CHECK: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) - ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>) - ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: $q0 = COPY [[DUP]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $w0 %4:_(<4 x s32>) = G_DUP %0(s32) $q0 = COPY %4(<4 x s32>) @@ -38,11 +37,10 @@ body: | ; CHECK-LABEL: name: v4s64_gpr ; CHECK: liveins: $x0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 - ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) - ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>) - ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 + ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) + ; CHECK: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $x0 %4:_(<2 x s64>) = G_DUP %0(s64) $q0 = COPY %4(<2 x s64>) @@ -60,11 +58,10 @@ body: | ; CHECK-LABEL: name: v2s32_gpr ; CHECK: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) - ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>) - ; CHECK-NEXT: RET_ReallyLR implicit $d0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: $d0 = COPY [[DUP]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 %0:_(s32) = COPY $w0 %4:_(<2 x s32>) = G_DUP %0(s32) $d0 = COPY %4(<2 x s32>) @@ -82,11 +79,10 @@ body: | ; CHECK-LABEL: name: v4s32_fpr ; CHECK: liveins: $s0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) - ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>) - ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 + ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: $q0 = COPY [[DUP]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $s0 %4:_(<4 x s32>) = G_DUP %0(s32) $q0 = COPY %4(<4 x s32>) @@ -104,11 +100,10 @@ body: | ; CHECK-LABEL: name: v2s64_fpr ; CHECK: liveins: $d0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 - ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) - ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>) - ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 + ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) + ; CHECK: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $d0 %4:_(<2 x s64>) = G_DUP %0(s64) $q0 = COPY %4(<2 x s64>) @@ -126,11 +121,10 @@ body: | ; CHECK-LABEL: name: v2s32_fpr ; CHECK: liveins: $s0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) - ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>) - ; CHECK-NEXT: RET_ReallyLR implicit $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 + ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: $d0 = COPY [[DUP]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 %0:_(s32) = COPY $s0 %4:_(<2 x s32>) = G_DUP %0(s32) $d0 = COPY %4(<2 x s32>) @@ -148,11 +142,10 @@ body: | ; CHECK-LABEL: name: v2s64_fpr_copy ; CHECK: liveins: $d0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 - ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) - ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>) - ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 + ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) + ; CHECK: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $d0 %6:_(<2 x s64>) = G_DUP %0(s64) $q0 = COPY %6(<2 x s64>) @@ -170,13 +163,11 @@ body: | ; CHECK-LABEL: name: v416s8_gpr ; CHECK: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK-NEXT: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT %trunc(s8) - ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP [[ANYEXT]](s32) - ; CHECK-NEXT: $q0 = COPY [[DUP]](<16 x s8>) - ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP %trunc(s8) + ; CHECK: $q0 = COPY [[DUP]](<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $w0 %trunc:_(s8) = G_TRUNC %0(s32) %1:_(<16 x s8>) = G_DUP %trunc(s8) diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index 5aff8e03514879..307aa397eabbbe 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -994,9 +994,9 @@ define <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind { ; ; CHECK-GI-LABEL: smull_noextvec_v8i8_v8i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov w8, #-999 // =0xfffffc19 +; CHECK-GI-NEXT: adrp x8, .LCPI34_0 ; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: dup v1.8h, w8 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI34_0] ; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: ret %tmp3 = sext <8 x i8> %arg to <8 x i16> @@ -1088,13 +1088,29 @@ define <8 x i16> @umull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind { define <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind { ; Do not use SMULL if the BUILD_VECTOR element values are too big. -; CHECK-LABEL: umull_noextvec_v8i8_v8i16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #999 // =0x3e7 -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: dup v1.8h, w8 -; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: umull_noextvec_v8i8_v8i16: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: mov w8, #999 // =0x3e7 +; CHECK-NEON-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEON-NEXT: dup v1.8h, w8 +; CHECK-NEON-NEXT: mul v0.8h, v0.8h, v1.8h +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: umull_noextvec_v8i8_v8i16: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: mov w8, #999 // =0x3e7 +; CHECK-SVE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SVE-NEXT: dup v1.8h, w8 +; CHECK-SVE-NEXT: mul v0.8h, v0.8h, v1.8h +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: umull_noextvec_v8i8_v8i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI38_0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI38_0] +; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %tmp3 = zext <8 x i8> %arg to <8 x i16> %tmp4 = mul <8 x i16> %tmp3, ret <8 x i16> %tmp4 diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll index cb85bbda80a803..170ba7292ae608 100644 --- a/llvm/test/CodeGen/AArch64/neon-mov.ll +++ b/llvm/test/CodeGen/AArch64/neon-mov.ll @@ -109,11 +109,29 @@ define <4 x i32> @movi4s_lsl16() { } define <4 x i32> @movi4s_fneg() { -; CHECK-LABEL: movi4s_fneg: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v0.4s, #240, lsl #8 -; CHECK-NEXT: fneg v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-NOFP16-SD-LABEL: movi4s_fneg: +; CHECK-NOFP16-SD: // %bb.0: +; CHECK-NOFP16-SD-NEXT: movi v0.4s, #240, lsl #8 +; CHECK-NOFP16-SD-NEXT: fneg v0.4s, v0.4s +; CHECK-NOFP16-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: movi4s_fneg: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: movi v0.4s, #240, lsl #8 +; CHECK-FP16-SD-NEXT: fneg v0.4s, v0.4s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-NOFP16-GI-LABEL: movi4s_fneg: +; CHECK-NOFP16-GI: // %bb.0: +; CHECK-NOFP16-GI-NEXT: movi v0.4s, #240, lsl #8 +; CHECK-NOFP16-GI-NEXT: fneg v0.4s, v0.4s +; CHECK-NOFP16-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: movi4s_fneg: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: movi v0.4s, #240, lsl #8 +; CHECK-FP16-GI-NEXT: fneg v0.4s, v0.4s +; CHECK-FP16-GI-NEXT: ret ret <4 x i32> } @@ -290,17 +308,23 @@ define <8 x i16> @mvni8h_neg() { ; CHECK-NOFP16-SD-NEXT: dup v0.8h, w8 ; CHECK-NOFP16-SD-NEXT: ret ; -; CHECK-FP16-LABEL: mvni8h_neg: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v0.8h, #240 -; CHECK-FP16-NEXT: fneg v0.8h, v0.8h -; CHECK-FP16-NEXT: ret +; CHECK-FP16-SD-LABEL: mvni8h_neg: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: movi v0.8h, #240 +; CHECK-FP16-SD-NEXT: fneg v0.8h, v0.8h +; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: mvni8h_neg: ; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: mov w8, #-32528 // =0xffff80f0 -; CHECK-NOFP16-GI-NEXT: dup v0.8h, w8 +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0 +; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0] ; CHECK-NOFP16-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: mvni8h_neg: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: movi v0.8h, #240 +; CHECK-FP16-GI-NEXT: fneg v0.8h, v0.8h +; CHECK-FP16-GI-NEXT: ret ret <8 x i16> } @@ -470,11 +494,29 @@ define <2 x double> @fmov2d() { } define <2 x double> @fmov2d_neg0() { -; CHECK-LABEL: fmov2d_neg0: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: fneg v0.2d, v0.2d -; CHECK-NEXT: ret +; CHECK-NOFP16-SD-LABEL: fmov2d_neg0: +; CHECK-NOFP16-SD: // %bb.0: +; CHECK-NOFP16-SD-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NOFP16-SD-NEXT: fneg v0.2d, v0.2d +; CHECK-NOFP16-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: fmov2d_neg0: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: movi v0.2d, #0000000000000000 +; CHECK-FP16-SD-NEXT: fneg v0.2d, v0.2d +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-NOFP16-GI-LABEL: fmov2d_neg0: +; CHECK-NOFP16-GI: // %bb.0: +; CHECK-NOFP16-GI-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NOFP16-GI-NEXT: fneg v0.2d, v0.2d +; CHECK-NOFP16-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: fmov2d_neg0: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: movi v0.2d, #0000000000000000 +; CHECK-FP16-GI-NEXT: fneg v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: ret ret <2 x double> } @@ -539,4 +581,5 @@ define <2 x i32> @movi1d() { ret <2 x i32> %1 } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-FP16: {{.*}} ; CHECK-NOFP16: {{.*}}