Skip to content

Commit

Permalink
[AArch64] Add FEAT_SME_B16B16 and remove FEAT_B16B16 (llvm#102501)
Browse files Browse the repository at this point in the history
Implement FEAT_SME_B16B16 to enable ZA-targeting non-widening SME
BFloat16 instructions. Remove the now redundant FEAT_B16B16 which has
been replaced by FEAT_SVE_B16B16 and FEAT_SME_B16B16 (this commit), see
llvm#101480 for the details and
reasoning of this change to LLVM.

FEAT_SME_B16B16 is documented under the latest Armv9.4 feature
documentation:

https://developer.arm.com/documentation/109697/0100/Feature-descriptions/The-Armv9-4-architecture-extensio

- Changes to Clang AArch64 frontend
- Change target guard of SME2 ZA-targeting non-widening BFloat16
intrinsics to 'sme-b16b16'

- Changes to LLVM AArch64 backend
  - llvm/lib/Target/AArch64/AArch64Features.td
- Create FeatureSMEB16B16, which implies FeatureSME2 and
FeatureSVEB16B16
	- Remove FeatureB16B16
	- Fix description of FeatureSVEB16B16
  - llvm/lib/Target/AArch64/AArch64InstrInfo.td
	- Create HasSMEB16B16 predicate
  - llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
- Change predictication of SME2 ZA-targeting non-widening BFloat16
instructions to new HasSMEB16B16
  - llvm/lib/Target/AArch64/AArch64.td
- Add HasSMEB16B16 to SME2Unsupported (FEAT_SME_B16B16 implies
FEAT_SME2)
  - llvm/lib/AArch64/AsmParser/AArch64AsmParser.cpp
	- Remove flag 'b16b16' mapping to removed FeatureB16B16
	- Add flag 'sme-b16b16' mapping to new FeatureSMEB16B16

- Changes to LLVM unit tests
  - llvm/unittests/TargetParser/TargetParserTest.cpp
	- Add new sme-b16b16 flag to existing target parser tests
	- Add tests for the sme-b16b16 dependencies:
- 'sme-b16b16' should enable 'sme2', 'sve-b16b16'. - Remove 'b16b16'
from bf16 dependency test

- Added MC tests
    - llvm/test/MC/AArch64/SME2p1
- To ensure that ZA-targeting multi-vector non-widening BFloat16
instructions are enabled by +sme-b16b16, and that this feature is
removed by +nosme-b61b6.

- Modidified tests
- All CodeGen, Semantic, and MC tests that are effected by the removal
of 'b16b16', have been modified to supply and/or expect 'sme-b16b16'
where appropriate.
  • Loading branch information
SpencerAbson authored and bwendling committed Aug 15, 2024
1 parent fc27229 commit d5bab90
Show file tree
Hide file tree
Showing 32 changed files with 536 additions and 509 deletions.
6 changes: 3 additions & 3 deletions clang/include/clang/Basic/arm_sme.td
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ multiclass ZAAddSub<string n_suffix> {
def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>;
}

let SMETargetGuard = "sme2,b16b16" in {
let SMETargetGuard = "sme-b16b16" in {
def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>;
def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>;
}
Expand Down Expand Up @@ -506,7 +506,7 @@ let SMETargetGuard = "sme-f16f16" in {
def SVMLS_LANE_VG1x4_F16 : Inst<"svmls_lane_za16[_f16]_vg1x4", "vm4di", "h", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_7>]>;
}

let SMETargetGuard = "sme2,b16b16" in {
let SMETargetGuard = "sme-b16b16" in {
def SVMLA_MULTI_VG1x2_BF16 : Inst<"svmla_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>;
def SVMLA_MULTI_VG1x4_BF16 : Inst<"svmla_za16[_bf16]_vg1x4", "vm44", "b", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>;
def SVMLS_MULTI_VG1x2_BF16 : Inst<"svmls_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>;
Expand Down Expand Up @@ -742,7 +742,7 @@ let SMETargetGuard = "sme2" in {

////////////////////////////////////////////////////////////////////////////////
// SME2p1 - FMOPA, FMOPS (non-widening)
let SMETargetGuard = "sme2,b16b16" in {
let SMETargetGuard = "sme-b16b16" in {
def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b",
MergeNone, "aarch64_sme_mopa",
[IsStreaming, IsInOutZA],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f8f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f8f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX

// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -S -Werror -Wall -o /dev/null

// REQUIRES: aarch64-registered-target

Expand Down
10 changes: 5 additions & 5 deletions clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fmlas16.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX

// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall %s -o /dev/null
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -S -Werror -Wall %s -o /dev/null

// REQUIRES: aarch64-registered-target
#include <arm_sme.h>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK
// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK
// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX

// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -o /dev/null %s
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -S -O2 -Werror -o /dev/null %s

// REQUIRES: aarch64-registered-target

Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/print-supported-extensions-aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
// CHECK-EMPTY:
// CHECK-NEXT: Name Architecture Feature(s) Description
// CHECK-NEXT: aes FEAT_AES, FEAT_PMULL Enable AES support
// CHECK-NEXT: b16b16 FEAT_B16B16 Enable SME2.1 ZA-targeting non-widening BFloat16 to BFloat16 instructions
// CHECK-NEXT: bf16 FEAT_BF16 Enable BFloat16 Extension
// CHECK-NEXT: brbe FEAT_BRBE Enable Branch Record Buffer Extension
// CHECK-NEXT: bti FEAT_BTI Enable Branch Target Identification
Expand Down Expand Up @@ -55,6 +54,7 @@
// CHECK-NEXT: sha3 FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support
// CHECK-NEXT: sm4 FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support
// CHECK-NEXT: sme FEAT_SME Enable Scalable Matrix Extension (SME)
// CHECK-NEXT: sme-b16b16 FEAT_SME_B16B16 Enable SME2.1 ZA-targeting non-widening BFloat16 instructions
// CHECK-NEXT: sme-f16f16 FEAT_SME_F16F16 Enable SME non-widening Float16 instructions
// CHECK-NEXT: sme-f64f64 FEAT_SME_F64F64 Enable Scalable Matrix Extension (SME) F64F64 instructions
// CHECK-NEXT: sme-f8f16 FEAT_SME_F8F16 Enable Scalable Matrix Extension (SME) F8F16 instructions
Expand All @@ -71,7 +71,7 @@
// CHECK-NEXT: ssve-fp8dot4 FEAT_SSVE_FP8DOT4 Enable SVE2 FP8 4-way dot product instructions
// CHECK-NEXT: ssve-fp8fma FEAT_SSVE_FP8FMA Enable SVE2 FP8 multiply-add instructions
// CHECK-NEXT: sve FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
// CHECK-NEXT: sve-b16b16 FEAT_SVE_B16B16 Enable SVE2.1 non-widening and SME2.1 Z-targeting non-widening BFloat16 to BFloat16 instructions
// CHECK-NEXT: sve-b16b16 FEAT_SVE_B16B16 Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions
// CHECK-NEXT: sve2 FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
// CHECK-NEXT: sve2-aes FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable AES SVE2 instructions
// CHECK-NEXT: sve2-bitperm FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ void test_features(uint32_t slice, svfloat16x2_t zn2, svfloat16x4_t zn4,
// expected-error@+1 {{'svsub_za16_f16_vg1x4' needs target feature sme-f16f16|sme-f8f16}}
svsub_za16_f16_vg1x4(slice, zn4);

// expected-error@+1 {{'svadd_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svadd_za16_bf16_vg1x2' needs target feature sme-b16b16}}
svadd_za16_bf16_vg1x2(slice, bzn2);
// expected-error@+1 {{'svadd_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svadd_za16_bf16_vg1x4' needs target feature sme-b16b16}}
svadd_za16_bf16_vg1x4(slice, bzn4);
// expected-error@+1 {{'svsub_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svsub_za16_bf16_vg1x2' needs target feature sme-b16b16}}
svsub_za16_bf16_vg1x2(slice, bzn2);
// expected-error@+1 {{'svsub_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svsub_za16_bf16_vg1x4' needs target feature sme-b16b16}}
svsub_za16_bf16_vg1x4(slice, bzn4);
}

Expand Down
24 changes: 12 additions & 12 deletions clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fmlas16.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,29 +39,29 @@ void test_features_f16f16(uint32_t slice,
// expected-error@+1 {{'svmls_lane_za16_f16_vg1x4' needs target feature sme-f16f16}}
svmls_lane_za16_f16_vg1x4(slice, zn4, zm, 7);

// expected-error@+1 {{'svmla_single_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmla_single_za16_bf16_vg1x2' needs target feature sme-b16b16}}
svmla_single_za16_bf16_vg1x2(slice, bzn2, bzm);
// expected-error@+1 {{'svmla_single_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmla_single_za16_bf16_vg1x4' needs target feature sme-b16b16}}
svmla_single_za16_bf16_vg1x4(slice, bzn4, bzm);
// expected-error@+1 {{'svmls_single_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmls_single_za16_bf16_vg1x2' needs target feature sme-b16b16}}
svmls_single_za16_bf16_vg1x2(slice, bzn2, bzm);
// expected-error@+1 {{'svmls_single_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmls_single_za16_bf16_vg1x4' needs target feature sme-b16b16}}
svmls_single_za16_bf16_vg1x4(slice, bzn4, bzm);
// expected-error@+1 {{'svmla_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmla_za16_bf16_vg1x2' needs target feature sme-b16b16}}
svmla_za16_bf16_vg1x2(slice, bzn2, bzm2);
// expected-error@+1 {{'svmla_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmla_za16_bf16_vg1x4' needs target feature sme-b16b16}}
svmla_za16_bf16_vg1x4(slice, bzn4, bzm4);
// expected-error@+1 {{'svmls_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmls_za16_bf16_vg1x2' needs target feature sme-b16b16}}
svmls_za16_bf16_vg1x2(slice, bzn2, bzm2);
// expected-error@+1 {{'svmls_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmls_za16_bf16_vg1x4' needs target feature sme-b16b16}}
svmls_za16_bf16_vg1x4(slice, bzn4, bzm4);
// expected-error@+1 {{'svmla_lane_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmla_lane_za16_bf16_vg1x2' needs target feature sme-b16b16}}
svmla_lane_za16_bf16_vg1x2(slice, bzn2, bzm, 7);
// expected-error@+1 {{'svmla_lane_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmla_lane_za16_bf16_vg1x4' needs target feature sme-b16b16}}
svmla_lane_za16_bf16_vg1x4(slice, bzn4, bzm, 7);
// expected-error@+1 {{'svmls_lane_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmls_lane_za16_bf16_vg1x2' needs target feature sme-b16b16}}
svmls_lane_za16_bf16_vg1x2(slice, bzn2, bzm, 7);
// expected-error@+1 {{'svmls_lane_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmls_lane_za16_bf16_vg1x4' needs target feature sme-b16b16}}
svmls_lane_za16_bf16_vg1x4(slice, bzn4, bzm, 7);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ void test_features(svbool_t pn, svbool_t pm,
svfloat16_t zn, svfloat16_t zm,
svbfloat16_t znb, svbfloat16_t zmb)
__arm_streaming __arm_inout("za") {
// expected-error@+1 {{'svmopa_za16_bf16_m' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmopa_za16_bf16_m' needs target feature sme-b16b16}}
svmopa_za16_bf16_m(0, pn, pm, znb, zmb);
// expected-error@+1 {{'svmops_za16_bf16_m' needs target feature sme2,b16b16}}
// expected-error@+1 {{'svmops_za16_bf16_m' needs target feature sme-b16b16}}
svmops_za16_bf16_m(0, pn, pm, znb, zmb);
// expected-error@+1 {{'svmopa_za16_f16_m' needs target feature sme-f16f16}}
svmopa_za16_f16_m(0, pn, pm, zn, zm);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def SME2p1Unsupported : AArch64Unsupported;

def SME2Unsupported : AArch64Unsupported {
let F = !listconcat([HasSME2, HasSVE2orSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA,
HasSMEF8F16, HasSMEF8F32, HasSMEF16F16orSMEF8F16],
HasSMEF8F16, HasSMEF8F32, HasSMEF16F16orSMEF8F16, HasSMEB16B16],
SME2p1Unsupported.F);
}

Expand Down
9 changes: 5 additions & 4 deletions llvm/lib/Target/AArch64/AArch64Features.td
Original file line number Diff line number Diff line change
Expand Up @@ -435,11 +435,12 @@ def FeatureMEC : Extension<"mec", "MEC", "FEAT_MEC",
def FeatureSVE2p1: ExtensionWithMArch<"sve2p1", "SVE2p1", "FEAT_SVE2p1",
"Enable Scalable Vector Extension 2.1 instructions", [FeatureSVE2]>;

def FeatureB16B16 : ExtensionWithMArch<"b16b16", "B16B16", "FEAT_B16B16",
"Enable SME2.1 ZA-targeting non-widening BFloat16 to BFloat16 instructions", [FeatureBF16]>;

def FeatureSVEB16B16: ExtensionWithMArch<"sve-b16b16", "SVEB16B16", "FEAT_SVE_B16B16",
"Enable SVE2.1 non-widening and SME2.1 Z-targeting non-widening BFloat16 to BFloat16 instructions">;
"Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions">;

def FeatureSMEB16B16 : ExtensionWithMArch<"sme-b16b16", "SMEB16B16", "FEAT_SME_B16B16",
"Enable SME2.1 ZA-targeting non-widening BFloat16 instructions",
[FeatureSME2, FeatureSVEB16B16]>;

def FeatureSMEF16F16 : ExtensionWithMArch<"sme-f16f16", "SMEF16F16", "FEAT_SME_F16F16",
"Enable SME non-widening Float16 instructions", [FeatureSME2]>;
Expand Down
Loading

0 comments on commit d5bab90

Please sign in to comment.