Skip to content

Commit

Permalink
[AArch64] Consider histcnt smaller than i32 in the cost model (llvm#1…
Browse files Browse the repository at this point in the history
…08521)

This PR updates the AArch64 cost model to consider the cheaper cost of
<i32 histograms to reflect the improvements from
llvm#101017 and
llvm#103037

Work by Max Beck-Jones (@DevM-uk)

---------

Co-authored-by: DevM-uk <max.beck-jones@arm.com>
  • Loading branch information
2 people authored and tmsri committed Sep 19, 2024
1 parent 36a7d56 commit def587d
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 21 deletions.
30 changes: 18 additions & 12 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -517,25 +517,31 @@ static bool isUnpackedVectorVT(EVT VecVT) {
static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers
Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements
unsigned TotalHistCnts = 1;

// Only allow (32b and 64b) integers or pointers for now...
if ((!EltTy->isIntegerTy() && !EltTy->isPointerTy()) ||
(EltTy->getScalarSizeInBits() != 32 &&
EltTy->getScalarSizeInBits() != 64))
unsigned EltSize = EltTy->getScalarSizeInBits();
// Only allow (up to 64b) integers or pointers
if ((!EltTy->isIntegerTy() && !EltTy->isPointerTy()) || EltSize > 64)
return InstructionCost::getInvalid();

// FIXME: Hacky check for legal vector types. We can promote smaller types
// but we cannot legalize vectors via splitting for histcnt.
// FIXME: We should be able to generate histcnt for fixed-length vectors
// using ptrue with a specific VL.
if (VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy))
if ((VTy->getElementCount().getKnownMinValue() != 2 &&
VTy->getElementCount().getKnownMinValue() != 4) ||
VTy->getPrimitiveSizeInBits().getKnownMinValue() > 128 ||
!VTy->isScalableTy())
if (VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy)) {
unsigned EC = VTy->getElementCount().getKnownMinValue();
if (!isPowerOf2_64(EC) || !VTy->isScalableTy())
return InstructionCost::getInvalid();

return InstructionCost(BaseHistCntCost);
// HistCnt only supports 32b and 64b element types
unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;

if (EC == 2 || (!LegalEltSize == 32 && EC == 4))
return InstructionCost(BaseHistCntCost);

unsigned NaturalVectorWidth = AArch64::SVEBitsPerBlock / LegalEltSize;
TotalHistCnts = EC / NaturalVectorWidth;
}

return InstructionCost(BaseHistCntCost * TotalHistCnts);
}

InstructionCost
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -971,26 +971,26 @@ define void @histogram_nxv4i32(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %m
ret void
}

define void @histogram_nxv8i16(<vscale x 8 x ptr> %buckets, <vscale x 8 x i1> %mask) {
define void @histogram_nxv8i16(<vscale x 8 x ptr> %buckets, <vscale x 8 x i1> %mask) #3 {
; CHECK-LABEL: 'histogram_nxv8i16'
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv8i16'
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
ret void
}

define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1> %mask) {
define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1> %mask) #3 {
; CHECK-LABEL: 'histogram_nxv16i8'
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv16i8'
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call void @llvm.experimental.vector.histogram.add.nxv16p0.i64(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
Expand Down Expand Up @@ -1049,13 +1049,13 @@ define void @histogram_v16i8(<16 x ptr> %buckets, <16 x i1> %mask) {
ret void
}

define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) {
define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) #3 {
; CHECK-LABEL: 'histogram_nxv4i64'
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv4i64'
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
Expand Down

0 comments on commit def587d

Please sign in to comment.