Skip to content

Commit

Permalink
Add I128 atomic support to the x64 backend
Browse files Browse the repository at this point in the history
  • Loading branch information
beetrees committed Oct 13, 2024
1 parent 460a4c0 commit 5a4083b
Show file tree
Hide file tree
Showing 16 changed files with 1,540 additions and 36 deletions.
61 changes: 47 additions & 14 deletions cranelift/codegen/meta/src/isa/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ pub(crate) fn define() -> TargetIsa {
"SSSE3: CPUID.01H:ECX.SSSE3[bit 9]",
false,
);
let has_cmpxchg16b = settings.add_bool(
"has_cmpxchg16b",
"Has support for CMPXCHG16b.",
"CMPXCHG16b: CPUID.01H:ECX.CMPXCHG16B[bit 13]",
false,
);
let has_sse41 = settings.add_bool(
"has_sse41",
"Has support for SSE4.1.",
Expand Down Expand Up @@ -106,6 +112,7 @@ pub(crate) fn define() -> TargetIsa {
false,
);

settings.add_predicate("use_cmpxchg16b", predicate!(has_cmpxchg16b));
settings.add_predicate("use_ssse3", predicate!(has_ssse3));
settings.add_predicate("use_sse41", predicate!(has_sse41));
settings.add_predicate("use_sse42", predicate!(has_sse41 && has_sse42));
Expand Down Expand Up @@ -141,14 +148,30 @@ pub(crate) fn define() -> TargetIsa {
// Intel CPUs

// Netburst
settings.add_preset("nocona", "Nocona microarchitecture.", preset!(sse3));
settings.add_preset(
"nocona",
"Nocona microarchitecture.",
preset!(sse3 && has_cmpxchg16b),
);

// Intel Core 2 Solo/Duo
settings.add_preset("core2", "Core 2 microarchitecture.", preset!(sse3));
settings.add_preset("penryn", "Penryn microarchitecture.", preset!(sse41));
settings.add_preset(
"core2",
"Core 2 microarchitecture.",
preset!(sse3 && has_cmpxchg16b),
);
settings.add_preset(
"penryn",
"Penryn microarchitecture.",
preset!(sse41 && has_cmpxchg16b),
);

// Intel Atom CPUs
let atom = settings.add_preset("atom", "Atom microarchitecture.", preset!(ssse3));
let atom = settings.add_preset(
"atom",
"Atom microarchitecture.",
preset!(ssse3 && has_cmpxchg16b),
);
settings.add_preset("bonnell", "Bonnell microarchitecture.", preset!(atom));
let silvermont = settings.add_preset(
"silvermont",
Expand Down Expand Up @@ -186,7 +209,7 @@ pub(crate) fn define() -> TargetIsa {
let nehalem = settings.add_preset(
"nehalem",
"Nehalem microarchitecture.",
preset!(sse42 && has_popcnt),
preset!(sse42 && has_popcnt && has_cmpxchg16b),
);
settings.add_preset("corei7", "Core i7 microarchitecture.", preset!(nehalem));
let westmere = settings.add_preset("westmere", "Westmere microarchitecture.", preset!(nehalem));
Expand Down Expand Up @@ -229,7 +252,15 @@ pub(crate) fn define() -> TargetIsa {
let knights_landing = settings.add_preset(
"knl",
"Knights Landing microarchitecture.",
preset!(has_popcnt && has_avx512f && has_fma && has_bmi1 && has_bmi2 && has_lzcnt),
preset!(
has_popcnt
&& has_avx512f
&& has_fma
&& has_bmi1
&& has_bmi2
&& has_lzcnt
&& has_cmpxchg16b
),
);
settings.add_preset(
"knm",
Expand Down Expand Up @@ -312,22 +343,22 @@ pub(crate) fn define() -> TargetIsa {
settings.add_preset(
"opteron-sse3",
"Opteron microarchitecture with support for SSE3 instructions.",
preset!(sse3),
preset!(sse3 && has_cmpxchg16b),
);
settings.add_preset(
"k8-sse3",
"K8 Hammer microarchitecture with support for SSE3 instructions.",
preset!(sse3),
preset!(sse3 && has_cmpxchg16b),
);
settings.add_preset(
"athlon64-sse3",
"Athlon 64 microarchitecture with support for SSE3 instructions.",
preset!(sse3),
preset!(sse3 && has_cmpxchg16b),
);
let barcelona = settings.add_preset(
"barcelona",
"Barcelona microarchitecture.",
preset!(has_popcnt && has_lzcnt),
preset!(has_popcnt && has_lzcnt && has_cmpxchg16b),
);
settings.add_preset(
"amdfam10",
Expand All @@ -338,7 +369,7 @@ pub(crate) fn define() -> TargetIsa {
let btver1 = settings.add_preset(
"btver1",
"Bobcat microarchitecture.",
preset!(ssse3 && has_lzcnt && has_popcnt),
preset!(ssse3 && has_lzcnt && has_popcnt && has_cmpxchg16b),
);
settings.add_preset(
"btver2",
Expand All @@ -349,7 +380,7 @@ pub(crate) fn define() -> TargetIsa {
let bdver1 = settings.add_preset(
"bdver1",
"Bulldozer microarchitecture",
preset!(has_lzcnt && has_popcnt && ssse3),
preset!(has_lzcnt && has_popcnt && ssse3 && has_cmpxchg16b),
);
let bdver2 = settings.add_preset(
"bdver2",
Expand All @@ -366,7 +397,9 @@ pub(crate) fn define() -> TargetIsa {
let znver1 = settings.add_preset(
"znver1",
"Zen (first generation) microarchitecture.",
preset!(sse42 && has_popcnt && has_bmi1 && has_bmi2 && has_lzcnt && has_fma),
preset!(
sse42 && has_popcnt && has_bmi1 && has_bmi2 && has_lzcnt && has_fma && has_cmpxchg16b
),
);
let znver2 = settings.add_preset(
"znver2",
Expand Down Expand Up @@ -397,7 +430,7 @@ pub(crate) fn define() -> TargetIsa {
let x86_64_v2 = settings.add_preset(
"x86-64-v2",
"Generic x86-64 (V2) microarchitecture.",
preset!(sse42 && has_popcnt),
preset!(sse42 && has_popcnt && has_cmpxchg16b),
);
let x86_64_v3 = settings.add_preset(
"x84_64_v3",
Expand Down
35 changes: 19 additions & 16 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3637,18 +3637,19 @@ pub(crate) fn define(
let AtomicMem = &TypeVar::new(
"AtomicMem",
"Any type that can be stored in memory, which can be used in an atomic operation",
TypeSetBuilder::new().ints(8..64).build(),
TypeSetBuilder::new().ints(8..128).build(),
);

ig.push(
Inst::new(
"atomic_rmw",
r#"
Atomically read-modify-write memory at `p`, with second operand `x`. The old value is
returned. `p` has the type of the target word size, and `x` may be an integer type of
8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned value is the
same as the type of `x`. This operation is sequentially consistent and creates
happens-before edges that order normal (non-atomic) loads and stores.
returned. `p` has the type of the target word size, and `x` may be any integer type; note
that some targets require specific target features to be enabled in order to support 128-bit
integer atomics. The type of the returned value is the same as the type of `x`. This
operation is sequentially consistent and creates happens-before edges that order normal
(non-atomic) loads and stores.
"#,
&formats.atomic_rmw,
)
Expand All @@ -3673,11 +3674,11 @@ pub(crate) fn define(
Perform an atomic compare-and-swap operation on memory at `p`, with expected value `e`,
storing `x` if the value at `p` equals `e`. The old value at `p` is returned,
regardless of whether the operation succeeds or fails. `p` has the type of the target
word size, and `x` and `e` must have the same type and the same size, which may be an
integer type of 8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned
value is the same as the type of `x` and `e`. This operation is sequentially
consistent and creates happens-before edges that order normal (non-atomic) loads and
stores.
word size, and `x` and `e` must have the same type and the same size, which may be any
integer type; note that some targets require specific target features to be enabled in order
to support 128-bit integer atomics. The type of the returned value is the same as the type
of `x` and `e`. This operation is sequentially consistent and creates happens-before edges
that order normal (non-atomic) loads and stores.
"#,
&formats.atomic_cas,
)
Expand All @@ -3702,9 +3703,10 @@ pub(crate) fn define(
Atomically load from memory at `p`.
This is a polymorphic instruction that can load any value type which has a memory
representation. It should only be used for integer types with 8, 16, 32 or 64 bits.
This operation is sequentially consistent and creates happens-before edges that order
normal (non-atomic) loads and stores.
representation. It can only be used for integer types; note that some targets require
specific target features to be enabled in order to support 128-bit integer atomics. This
operation is sequentially consistent and creates happens-before edges that order normal
(non-atomic) loads and stores.
"#,
&formats.load_no_offset,
)
Expand All @@ -3726,9 +3728,10 @@ pub(crate) fn define(
Atomically store `x` to memory at `p`.
This is a polymorphic instruction that can store any value type with a memory
representation. It should only be used for integer types with 8, 16, 32 or 64 bits.
This operation is sequentially consistent and creates happens-before edges that order
normal (non-atomic) loads and stores.
representation. It can only be used for integer types; note that some targets require
specific target features to be enabled in order to support 128-bit integer atomics This
operation is sequentially consistent and creates happens-before edges that order normal
(non-atomic) loads and stores.
"#,
&formats.store_no_offset,
)
Expand Down
107 changes: 107 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,24 @@
(mem SyntheticAmode)
(dst_old WritableReg))

;; A standard (native) `lock cmpxchg16b (amode)`, with register
;; conventions:
;;
;; `mem` (read) address
;; %rbx (low), %rcx (high) (read) replacement value
;; %rax (low), %rdx (high) (modified) in: expected value, out: value that was actually at `dst`
;; %rflags is written. Do not assume anything about it after the instruction.
;;
;; The instruction "succeeded" iff the bits of %rax and %rdx
;; afterwards are the same as they were before.
(LockCmpxchg16b (replacement_low Reg)
(replacement_high Reg)
(expected_low Reg)
(expected_high Reg)
(mem BoxSyntheticAmode)
(dst_old_low WritableReg)
(dst_old_high WritableReg))

;; A synthetic instruction, based on a loop around a native `lock
;; cmpxchg` instruction.
;;
Expand Down Expand Up @@ -696,6 +714,46 @@
(temp WritableReg)
(dst_old WritableReg))

;; A synthetic instruction, based on a loop around a native `lock
;; cmpxchg16b` instruction.
;;
;; This is the same as `AtomicRmwSeq`, but for 128-bit integers.
;;
;; For `MachAtomicRmwOp::Xchg`, use `Atomic128XchgSeq` instead.
;;
;; This instruction sequence has fixed register uses as follows:
;; - %rax (low), %rdx (high) (written) the old value at `mem`
;; - %rbx (low), %rcx (high) (written) used as temp registers to hold
;; the replacement value
;; - %rflags is written. Do not assume anything about it after the
;; instruction.
(Atomic128RmwSeq (op MachAtomicRmwOp)
(mem BoxSyntheticAmode)
(operand_low Reg)
(operand_high Reg)
(temp_low WritableReg)
(temp_high WritableReg)
(dst_old_low WritableReg)
(dst_old_high WritableReg))

;; A synthetic instruction, based on a loop around a native `lock
;; cmpxchg16b` instruction.
;;
;; This is `Atomic128XchgSeq` but only for `MachAtomicRmwOp::Xchg`. As
;; the replacement value is the same every time, this instruction doesn't
;; require any temporary registers.
;;
;; This instruction sequence has fixed register uses as follows:
;; - %rax (low), %rdx (high) (written) the old value at `mem`
;; - %rbx (low), %rcx (high) (read) the replacement value
;; - %rflags is written. Do not assume anything about it after the
;; instruction.
(Atomic128XchgSeq (mem SyntheticAmode)
(operand_low Reg)
(operand_high Reg)
(dst_old_low WritableReg)
(dst_old_high WritableReg))

;; A memory fence (mfence, lfence or sfence).
(Fence (kind FenceKind))

Expand Down Expand Up @@ -762,6 +820,11 @@
(type BoxCallIndInfo extern (enum))
(type BoxReturnCallInfo extern (enum))
(type BoxReturnCallIndInfo extern (enum))
(type BoxSyntheticAmode extern (enum))

(decl pure box_synthetic_amode (SyntheticAmode) BoxSyntheticAmode)
(extern constructor box_synthetic_amode box_synthetic_amode)
(convert SyntheticAmode BoxSyntheticAmode box_synthetic_amode)

;; Get the `OperandSize` for a given `Type`, rounding smaller types up to 32 bits.
(decl operand_size_of_type_32_64 (Type) OperandSize)
Expand Down Expand Up @@ -1862,6 +1925,9 @@
(decl pure use_avx2 () bool)
(extern constructor use_avx2 use_avx2)

(decl pure use_cmpxchg16b () bool)
(extern constructor use_cmpxchg16b use_cmpxchg16b)

;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;

;; Extract a constant `Imm8Reg.Imm8` from a value operand.
Expand Down Expand Up @@ -5214,13 +5280,54 @@
(_ Unit (emit (MInst.LockCmpxchg ty replacement expected addr dst))))
dst))

(decl x64_cmpxchg16b (ValueRegs ValueRegs SyntheticAmode) ValueRegs)
(rule (x64_cmpxchg16b expected replacement addr)
(let ((expected_low Gpr (value_regs_get_gpr expected 0))
(expected_high Gpr (value_regs_get_gpr expected 1))
(replacement_low Gpr (value_regs_get_gpr replacement 0))
(replacement_high Gpr (value_regs_get_gpr replacement 1))
(dst_low WritableGpr (temp_writable_gpr))
(dst_high WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.LockCmpxchg16b replacement_low replacement_high expected_low expected_high addr dst_low dst_high))))
(value_regs dst_low dst_high)))

(decl x64_atomic_rmw_seq (Type MachAtomicRmwOp SyntheticAmode Gpr) Gpr)
(rule (x64_atomic_rmw_seq ty op mem input)
(let ((dst WritableGpr (temp_writable_gpr))
(tmp WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.AtomicRmwSeq ty op mem input tmp dst))))
dst))

(decl x64_atomic_128_rmw_seq (MachAtomicRmwOp SyntheticAmode ValueRegs) ValueRegs)
(rule (x64_atomic_128_rmw_seq op mem input)
(let ((dst_low WritableGpr (temp_writable_gpr))
(dst_high WritableGpr (temp_writable_gpr))
(tmp_low WritableGpr (temp_writable_gpr))
(tmp_high WritableGpr (temp_writable_gpr))
(input_low Gpr (value_regs_get_gpr input 0))
(input_high Gpr (value_regs_get_gpr input 1))
(_ Unit (emit (MInst.Atomic128RmwSeq op mem input_low input_high tmp_low tmp_high dst_low dst_high))))
(value_regs dst_low dst_high)))

(rule 1 (x64_atomic_128_rmw_seq (mach_atomic_rmw_op_xchg) mem input)
(let ((dst_low WritableGpr (temp_writable_gpr))
(dst_high WritableGpr (temp_writable_gpr))
(input_low Gpr (value_regs_get_gpr input 0))
(input_high Gpr (value_regs_get_gpr input 1))
(_ Unit (emit (MInst.Atomic128XchgSeq mem input_low input_high dst_low dst_high))))
(value_regs dst_low dst_high)))

(decl x64_atomic_128_store_seq (SyntheticAmode ValueRegs) SideEffectNoResult)
(rule (x64_atomic_128_store_seq mem input)
(let ((dst_low WritableGpr (temp_writable_gpr))
(dst_high WritableGpr (temp_writable_gpr))
(input_low Gpr (value_regs_get_gpr input 0))
(input_high Gpr (value_regs_get_gpr input 1)))
(SideEffectNoResult.Inst (MInst.Atomic128XchgSeq mem input_low input_high dst_low dst_high))))

(decl mach_atomic_rmw_op_xchg () MachAtomicRmwOp)
(extern extractor mach_atomic_rmw_op_xchg mach_atomic_rmw_op_is_xchg)

;; CLIF IR has one enumeration for atomic operations (`AtomicRmwOp`) while the
;; mach backend has another (`MachAtomicRmwOp`)--this converts one to the other.
(type MachAtomicRmwOp extern (enum))
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,7 @@ pub enum CmpOpcode {
pub(crate) enum InstructionSet {
SSE,
SSE2,
CMPXCHG16b,
SSSE3,
SSE41,
SSE42,
Expand Down
Loading

0 comments on commit 5a4083b

Please sign in to comment.