From 40c4cd4f682f1cb153f18d4d6a88795bafaf5667 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Wed, 19 Apr 2023 03:12:55 +0900 Subject: [PATCH] Various cleanup - Optimize x86_64 128-bit outline-atomics. FYI, this improves performance by up to 15% in concurrent RMW/store microbenchmarks. - Optimize x86_64 128-bit load that uses cmpxchg16b. - Optimize aarch64 128-bit load that uses FEAT_LSE. - Move 128-bit atomic implementation for Miri and ThreadSanitizer to intrinsics.rs on all architectures. - Remove duplicate tests and add tests for cases where feature detection returns false. - Several minor cleanups. --- .cirrus.yml | 91 ++-- .github/workflows/ci.yml | 43 +- Cargo.toml | 1 - bench/Cargo.toml | 1 - src/imp/arm_linux.rs | 18 +- src/imp/atomic128/README.md | 13 + src/imp/atomic128/aarch64.rs | 294 ++++++----- src/imp/atomic128/detect/common.rs | 12 +- src/imp/atomic128/detect/x86_64.rs | 11 +- src/imp/atomic128/intrinsics.rs | 575 ++++++++------------- src/imp/atomic128/macros.rs | 82 +-- src/imp/atomic128/powerpc64.rs | 9 +- src/imp/atomic128/s390x.rs | 99 +--- src/imp/atomic128/x86_64.rs | 761 ++++++++++++++-------------- src/imp/core_atomic.rs | 6 +- src/imp/fallback/mod.rs | 26 +- src/imp/fallback/outline_atomics.rs | 116 +++-- src/imp/float.rs | 6 +- src/imp/interrupt/mod.rs | 2 +- src/imp/mod.rs | 56 +- src/imp/x86.rs | 2 +- src/lib.rs | 4 +- src/tests/helper.rs | 11 +- src/tests/mod.rs | 54 +- tests/helper/Cargo.toml | 4 +- tests/helper/src/lib.rs | 23 +- tools/build.sh | 12 +- tools/test.sh | 15 +- 28 files changed, 1049 insertions(+), 1298 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index b934e82b..dfbd6505 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -36,11 +36,11 @@ aarch64_linux_gnu_test_task: - | [ ! -f $HOME/.cargo/env ] || . $HOME/.cargo/env - set -ex - - ./tools/test.sh -vv + - ./tools/test.sh -vv -- -Z unstable-options --report-time # +lse # Graviton2 (Neoverse N1) is ARMv8.2-a and doesn't support FEAT_LSE2. # FEAT_LSE2 is tested on aarch64 macOS VM. - - RUSTFLAGS="$RUSTFLAGS -C target-feature=+lse" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=+lse" ./tools/test.sh -vv + - RUSTFLAGS="$RUSTFLAGS -C target-feature=+lse" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=+lse" ./tools/test.sh -vv -- -Z unstable-options --report-time aarch64_linux_musl_test_task: name: test ($TARGET) @@ -55,47 +55,36 @@ aarch64_linux_musl_test_task: - rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly test_script: - set -ex - - ./tools/test.sh -vv + - ./tools/test.sh -vv -- -Z unstable-options --report-time # -crt-static - - RUSTFLAGS="$RUSTFLAGS -C target-feature=-crt-static" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=-crt-static" ./tools/test.sh -vv + - RUSTFLAGS="$RUSTFLAGS -C target-feature=-crt-static" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=-crt-static" ./tools/test.sh -vv -- -Z unstable-options --report-time # +lse # Graviton2 (Neoverse N1) is ARMv8.2-a and doesn't support FEAT_LSE2. # FEAT_LSE2 is tested on aarch64 macOS VM. - - RUSTFLAGS="$RUSTFLAGS -C target-feature=+lse" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=+lse" ./tools/test.sh -vv + - RUSTFLAGS="$RUSTFLAGS -C target-feature=+lse" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=+lse" ./tools/test.sh -vv -- -Z unstable-options --report-time -armel_linux_test_task: +arm_linux_test_task: name: test ($TARGET) - env: - TARGET: armv5te-unknown-linux-gnueabi + matrix: + - env: + TARGET: armv5te-unknown-linux-gnueabi + DPKG_ARCH: armel + - env: + TARGET: armv7-unknown-linux-gnueabihf + DPKG_ARCH: armhf arm_container: image: rust:latest setup_script: - set -ex - lscpu - rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly - - dpkg --add-architecture armel - - apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-arm-linux-gnueabi libc6-dev-armel-cross libc6:armel + - dpkg --add-architecture "$DPKG_ARCH" + - apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-"${TARGET/*-unknown/arm}" libc6-dev-"$DPKG_ARCH"-cross libc6:"$DPKG_ARCH" test_script: - set -ex - export CARGO_TARGET_ARMV5TE_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc - - ./tools/test.sh --target "$TARGET" -Z doctest-xcompile -vv - -armhf_linux_test_task: - name: test ($TARGET) - env: - TARGET: armv7-unknown-linux-gnueabihf - arm_container: - image: rust:latest - setup_script: - - set -ex - - lscpu - - rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly - - dpkg --add-architecture armhf - - apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-arm-linux-gnueabihf libc6-dev-armhf-cross libc6:armhf - test_script: - - set -ex - export CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc - - ./tools/test.sh --target "$TARGET" -Z doctest-xcompile -vv + - ./tools/test.sh --target "$TARGET" -Z doctest-xcompile -vv -- -Z unstable-options --report-time aarch64_macos_test_task: name: test ($TARGET) @@ -111,14 +100,12 @@ aarch64_macos_test_task: - . $HOME/.cargo/env - set -ex # macOS is +lse,+lse2 by default - - ./tools/test.sh -vv + - ./tools/test.sh -vv -- -Z unstable-options --report-time aarch64_linux_valgrind_task: name: valgrind ($TARGET) env: - CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER: valgrind -v --error-exitcode=1 --error-limit=no --leak-check=full --show-leak-kinds=all --track-origins=yes - RUSTDOCFLAGS: -D warnings --cfg valgrind - RUSTFLAGS: -D warnings --cfg valgrind + VALGRIND: valgrind TARGET: aarch64-unknown-linux-gnu arm_container: # Valgrind support ldxp/stxp on 3.19+: https://valgrind.org/docs/manual/dist.news.html @@ -126,17 +113,15 @@ aarch64_linux_valgrind_task: image: debian:bookworm setup_script: - set -ex - - lscpu - - apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends ca-certificates curl gcc git libc6-dev valgrind + - apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends ca-certificates curl gcc git libc6-dev valgrind moreutils - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain nightly --component rust-src test_script: - . $HOME/.cargo/env - set -ex - # doctests on Valgrind are very slow - - ./tools/test.sh -vv --tests + - ./tools/test.sh -vv 2>&1 | ts -i '%.s ' # +lse # As of Valgrind 3.19, Valgrind supports atomic instructions of ARMv8.0 and ARMv8.1 (FEAT_LSE). - - RUSTFLAGS="$RUSTFLAGS -C target-feature=+lse" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=+lse" ./tools/test.sh -vv --tests + - RUSTFLAGS="$RUSTFLAGS -C target-feature=+lse" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=+lse" ./tools/test.sh -vv 2>&1 | ts -i '%.s ' # # aarch64_linux_bench_task: # name: bench ($TARGET) @@ -152,33 +137,36 @@ aarch64_linux_valgrind_task: # - rustup toolchain add nightly --no-self-update && rustup default nightly # bench_script: # - set -ex +# - cargo bench -vv --manifest-path bench/Cargo.toml # - RUSTFLAGS="${RUSTFLAGS} -C target-feature=-outline-atomics --cfg portable_atomic_no_outline_atomics" cargo bench -vv --manifest-path bench/Cargo.toml # - RUSTFLAGS="${RUSTFLAGS} -C target-feature=+lse" cargo bench -vv --manifest-path bench/Cargo.toml -# armel_linux_bench_task: +# x86_64_linux_bench_task: # name: bench ($TARGET) # env: -# TARGET: armv5te-unknown-linux-gnueabi -# arm_container: +# TARGET: x86_64-unknown-linux-gnu +# container: # image: rust:latest # cpu: 4 # memory: 12G # setup_script: # - set -ex # - lscpu -# - rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly -# - rustup target add "$TARGET" -# - dpkg --add-architecture armel -# - apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-arm-linux-gnueabi libc6-dev-armel-cross libc6:armel -# test_script: +# - rustup toolchain add nightly --no-self-update && rustup default nightly +# bench_script: # - set -ex -# - export CARGO_TARGET_ARMV5TE_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc -# - RUSTFLAGS="${RUSTFLAGS}" cargo bench --target "$TARGET" -vv --manifest-path bench/Cargo.toml +# - cargo bench -vv --manifest-path bench/Cargo.toml +# - RUSTFLAGS="${RUSTFLAGS} -C target-feature=+cmpxchg16b" cargo bench -vv --manifest-path bench/Cargo.toml -# armhf_linux_bench_task: +# arm_linux_bench_task: # name: bench ($TARGET) -# env: -# TARGET: armv7-unknown-linux-gnueabihf +# matrix: +# - env: +# TARGET: armv5te-unknown-linux-gnueabi +# DPKG_ARCH: armel +# - env: +# TARGET: armv7-unknown-linux-gnueabihf +# DPKG_ARCH: armhf # arm_container: # image: rust:latest # cpu: 4 @@ -188,10 +176,11 @@ aarch64_linux_valgrind_task: # - lscpu # - rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly # - rustup target add "$TARGET" -# - dpkg --add-architecture armhf -# - apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-arm-linux-gnueabihf libc6-dev-armhf-cross libc6:armhf +# - dpkg --add-architecture "$DPKG_ARCH" +# - apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-"${TARGET/*-unknown/arm}" libc6-dev-"$DPKG_ARCH"-cross libc6:"$DPKG_ARCH" # test_script: # - set -ex +# - export CARGO_TARGET_ARMV5TE_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc # - export CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc # - RUSTFLAGS="${RUSTFLAGS}" cargo bench --target "$TARGET" -vv --manifest-path bench/Cargo.toml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 10c54f9b..95ef5cb2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -95,8 +95,6 @@ jobs: target: armv7-unknown-linux-gnueabi - rust: nightly target: armv7-unknown-linux-gnueabihf - - rust: nightly - target: armv5te-unknown-linux-musleabi - rust: nightly target: arm-linux-androideabi - rust: nightly @@ -163,20 +161,31 @@ jobs: if: matrix.target != '' && !startsWith(matrix.target, 'i686') && !startsWith(matrix.target, 'x86_64') - run: echo "TARGET=--target=${{ matrix.target }}" >>"${GITHUB_ENV}" if: matrix.target != '' + - run: echo "REPORT_TIME=-- -Z unstable-options --report-time" >>"${GITHUB_ENV}" + if: startsWith(matrix.rust, 'nightly') # Since nightly-2022-12-23, -Z build-std + -Z randomize-layout + release mode on windows causes segfault. - run: echo "RANDOMIZE_LAYOUT=-Z randomize-layout" >>"${GITHUB_ENV}" if: startsWith(matrix.rust, 'nightly') && !startsWith(matrix.os, 'windows') - - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD + - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME + # macOS is skipped because it is +cmpxchg16b by default + - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME + env: + # Note: This cfg is intended to make it easy for portable-atomic developers + # to test has_cmpxchg16b == false, has_lse == false, or __kuser_helper_version < 5 cases, + # and is not a public API. + RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} --cfg portable_atomic_test_outline_atomics_detect_false + RUSTFLAGS: ${{ env.RUSTFLAGS }} --cfg portable_atomic_test_outline_atomics_detect_false + if: (matrix.target == '' || startsWith(matrix.target, 'x86_64')) && !startsWith(matrix.os, 'macos') || startsWith(matrix.target, 'aarch64') || startsWith(matrix.target, 'armv5te') || matrix.target == 'arm-linux-androideabi' # -crt-static - - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD + - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME env: RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=-crt-static RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=-crt-static if: contains(matrix.target, '-musl') # +cmpxchg16b # macOS is skipped because it is +cmpxchg16b by default - - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD + - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME env: RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b @@ -184,21 +193,21 @@ jobs: # +lse # As of QEMU 7.2, QEMU has not yet implemented FEAT_LSE2: https://linaro.atlassian.net/browse/QEMU-300 # FEAT_LSE2 is tested on Cirrus CI's aarch64 macOS VM. - - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD + - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME env: RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+lse RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+lse if: startsWith(matrix.target, 'aarch64') # pwr7 # powerpc64- (big-endian) is skipped because it is pre-pwr8 by default - - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD + - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME env: RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=pwr7 RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=pwr7 if: startsWith(matrix.target, 'powerpc64le-') # pwr8 # powerpc64le- (little-endian) is skipped because it is pwr8 by default - - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD + - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME env: RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=pwr8 RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=pwr8 @@ -303,8 +312,7 @@ jobs: - run: sudo apt-get -o Acquire::Retries=10 -qq update && sudo apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends moreutils - run: echo "TARGET=--target=${{ matrix.target }}" >>"${GITHUB_ENV}" if: matrix.target != '' - - run: | - cargo miri test --workspace --all-features $EXCLUDE $TARGET 2>&1 | ts -i '%.s ' + - run: cargo miri test --workspace --all-features $EXCLUDE $TARGET 2>&1 | ts -i '%.s ' env: MIRIFLAGS: -Zmiri-strict-provenance -Zmiri-symbolic-alignment-check -Zmiri-retag-fields -Zmiri-disable-isolation RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -Z randomize-layout @@ -351,7 +359,7 @@ jobs: valgrind: env: - CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER: valgrind -v --error-exitcode=1 --error-limit=no --leak-check=full --show-leak-kinds=all --track-origins=yes + VALGRIND: valgrind runs-on: ubuntu-latest timeout-minutes: 60 steps: @@ -360,17 +368,14 @@ jobs: persist-credentials: false - name: Install Rust run: rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly + - run: sudo apt-get -o Acquire::Retries=10 -qq update && sudo apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends moreutils - uses: taiki-e/install-action@valgrind - # doctests on Valgrind are very slow - - run: tools/test.sh -vv --tests - env: - RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} --cfg valgrind - RUSTFLAGS: ${{ env.RUSTFLAGS }} --cfg valgrind + - run: tools/test.sh -vv 2>&1 | ts -i '%.s ' # +cmpxchg16b - - run: tools/test.sh -vv --tests + - run: tools/test.sh -vv 2>&1 | ts -i '%.s ' env: - RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b --cfg valgrind - RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b --cfg valgrind + RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b + RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b codegen: runs-on: ubuntu-latest diff --git a/Cargo.toml b/Cargo.toml index 7d99d156..72e542d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,7 +67,6 @@ crossbeam-utils = "0.8" fastrand = "1" paste = "1" quickcheck = { default-features = false, git = "https://github.com/taiki-e/quickcheck.git", branch = "dev" } # https://github.com/BurntSushi/quickcheck/pull/304 + https://github.com/BurntSushi/quickcheck/pull/282 + lower MSRV -rustversion = "1" serde_test = "1" sptr = "0.3" static_assertions = "1" diff --git a/bench/Cargo.toml b/bench/Cargo.toml index 83f51163..9b6ea215 100644 --- a/bench/Cargo.toml +++ b/bench/Cargo.toml @@ -19,7 +19,6 @@ crossbeam-utils = "0.8" fastrand = "1" paste = "1" quickcheck = { default-features = false, git = "https://github.com/taiki-e/quickcheck.git", branch = "dev" } # https://github.com/BurntSushi/quickcheck/pull/304 + https://github.com/BurntSushi/quickcheck/pull/282 + lower MSRV -rustversion = "1" static_assertions = "1" [[bench]] diff --git a/src/imp/arm_linux.rs b/src/imp/arm_linux.rs index 892efa24..8633ce35 100644 --- a/src/imp/arm_linux.rs +++ b/src/imp/arm_linux.rs @@ -16,7 +16,7 @@ mod fallback; #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; -use core::{mem, sync::atomic::Ordering}; +use core::{cell::UnsafeCell, mem, sync::atomic::Ordering}; /// A 64-bit value represented as a pair of 32-bit values. /// @@ -56,10 +56,17 @@ fn __kuser_helper_version() -> i32 { } #[inline] fn has_kuser_cmpxchg64() -> bool { + // Note: This cfg is intended to make it easy for portable-atomic developers + // to test __kuser_helper_version < 5 cases, and is not a public API. + if cfg!(portable_atomic_test_outline_atomics_detect_false) { + return false; + } __kuser_helper_version() >= 5 } #[inline] unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut u64) -> bool { + debug_assert!(ptr as usize % 8 == 0); + debug_assert!(has_kuser_cmpxchg64()); // SAFETY: the caller must uphold the safety contract. unsafe { let f: extern "C" fn(*const u64, *const u64, *mut u64) -> u32 = @@ -91,9 +98,6 @@ unsafe fn atomic_update_kuser_cmpxchg64(dst: *mut u64, mut f: F) -> u64 where F: FnMut(u64) -> u64, { - debug_assert!(dst as usize % 8 == 0); - debug_assert!(has_kuser_cmpxchg64()); - // SAFETY: the caller must uphold the safety contract. unsafe { loop { @@ -130,7 +134,7 @@ macro_rules! atomic_with_ifunc { if has_kuser_cmpxchg64() { kuser_cmpxchg64_fn } else { - // Use SeqCst because __kuser_cmpxchg64 is SeqCst. + // Use SeqCst because __kuser_cmpxchg64 is always SeqCst. // https://github.com/torvalds/linux/blob/v6.1/arch/arm/kernel/entry-armv.S#L918-L925 fallback::$seqcst_fallback_fn } @@ -264,7 +268,7 @@ macro_rules! atomic64 { ($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => { #[repr(C, align(8))] pub(crate) struct $atomic_type { - v: core::cell::UnsafeCell<$int_type>, + v: UnsafeCell<$int_type>, } // Send is implicitly implemented. @@ -276,7 +280,7 @@ macro_rules! atomic64 { impl $atomic_type { #[inline] pub(crate) const fn new(v: $int_type) -> Self { - Self { v: core::cell::UnsafeCell::new(v) } + Self { v: UnsafeCell::new(v) } } #[inline] diff --git a/src/imp/atomic128/README.md b/src/imp/atomic128/README.md index 486b2f30..2ffcd300 100644 --- a/src/imp/atomic128/README.md +++ b/src/imp/atomic128/README.md @@ -15,6 +15,19 @@ On compiler versions or platforms where these are not supported, the fallback im See [aarch64.rs](aarch64.rs) module-level comments for more details on the instructions used on aarch64. +## Comparison with core::intrinsics::atomic_\* (core::sync::atomic::Atomic{I,U}128) + +This directory has target-specific implementations with inline assembly ([aarch64.rs](aarch64.rs), [x86_64.rs](x86_64.rs), [powerpc64.rs](powerpc64.rs), [s390x.rs](s390x.rs)) and an implementation without inline assembly ([intrinsics.rs](intrinsics.rs)). The latter currently always needs nightly compilers and is only used for Miri and ThreadSanitizer, which do not support inline assembly. + +Implementations with inline assembly generate assemblies almost equivalent to the `core::intrinsics::atomic_*` (used in `core::sync::atomic::Atomic{I,U}128`) for many operations, but some operations may or may not generate more efficient code. For example: + +- On x86_64, implementation with inline assembly contains additional optimizations (e.g., [#16](https://github.com/taiki-e/portable-atomic/pull/16)) and is much faster for some operations. +- On aarch64, implementation with inline assembly supports outline-atomics on more operating systems, and may be faster in environments where outline-atomics can improve performance. +- On powerpc64 and s390x, LLVM does not support generating some 128-bit atomic operations (see [intrinsics.rs](intrinsics.rs) module-level comments), and we use CAS loop to implement them, so implementation with inline assembly may be faster for those operations. +- In implementations without inline assembly, the compiler may reuse condition flags that have changed as a result of the operation, or use immediate values instead of registers, depending on the situation. + +As 128-bit atomics-related APIs stabilize in the standard library, implementations with inline assembly are planned to be updated to get the benefits of both. + ## Run-time feature detection [detect](detect) module has run-time feature detection implementations. diff --git a/src/imp/atomic128/aarch64.rs b/src/imp/atomic128/aarch64.rs index d4b4c735..ccced4d5 100644 --- a/src/imp/atomic128/aarch64.rs +++ b/src/imp/atomic128/aarch64.rs @@ -48,9 +48,9 @@ // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit // // Generated asm: -// - aarch64 https://godbolt.org/z/qKPb1asj4 -// - aarch64 (+lse) https://godbolt.org/z/dqxj9z9Ps -// - aarch64 (+lse,+lse2) https://godbolt.org/z/x4a135Psb +// - aarch64 https://godbolt.org/z/jz7rGK8hc +// - aarch64 (+lse) https://godbolt.org/z/sK3sEa8jP +// - aarch64 (+lse,+lse2) https://godbolt.org/z/P564r3EG9 include!("macros.rs"); @@ -101,6 +101,40 @@ mod detect_macos; use core::arch::asm; use core::sync::atomic::Ordering; +#[cfg(any( + target_feature = "lse", + portable_atomic_target_feature = "lse", + all(not(portable_atomic_no_aarch64_target_feature), not(portable_atomic_no_outline_atomics)), +))] +macro_rules! debug_assert_lse { + () => { + #[cfg(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "openbsd", + target_os = "fuchsia", + target_os = "windows", + ), + ))] + #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] + { + debug_assert!(detect::detect().has_lse()); + } + }; +} + #[cfg(target_pointer_width = "32")] macro_rules! ptr_modifier { () => { @@ -170,7 +204,7 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. // cfg guarantee that the CPU supports FEAT_LSE. unsafe { - _atomic_compare_exchange_casp(src, 0, 0, order) + _atomic_load_casp(src, order) } #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] // SAFETY: the caller must uphold the safety contract. @@ -215,6 +249,46 @@ unsafe fn atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 { U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole } } +// Do not use _atomic_compare_exchange_casp because it needs extra MOV to implement load. +#[cfg(any(test, not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))))] +#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] +#[inline] +unsafe fn _atomic_load_casp(src: *mut u128, order: Ordering) -> u128 { + debug_assert!(src as usize % 16 == 0); + debug_assert_lse!(); + + // SAFETY: the caller must uphold the safety contract. + // cfg guarantee that the CPU supports FEAT_LSE. + unsafe { + let (prev_lo, prev_hi); + macro_rules! atomic_load { + ($acquire:tt, $release:tt) => { + asm!( + concat!("casp", $acquire, $release, " x4, x5, x4, x5, [{src", ptr_modifier!(), "}]"), + src = in(reg) src, + // must be allocated to even/odd register pair + inout("x4") 0_u64 => prev_lo, + inout("x5") 0_u64 => prev_hi, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_load!("", ""), + Ordering::Acquire => atomic_load!("a", ""), + Ordering::SeqCst => atomic_load!("a", "l"), + _ => unreachable!("{:?}", order), + } + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} +#[cfg(any( + test, + all( + not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")), + not(any(target_feature = "lse", portable_atomic_target_feature = "lse")), + ), +))] #[inline] unsafe fn _atomic_load_ldxp_stxp(src: *mut u128, order: Ordering) -> u128 { debug_assert!(src as usize % 16 == 0); @@ -366,24 +440,26 @@ unsafe fn atomic_compare_exchange( target_feature(enable = "lse") )] unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128; - _atomic_compare_exchange_casp_relaxed + atomic_compare_exchange_casp_relaxed = _atomic_compare_exchange_casp(Ordering::Relaxed); - _atomic_compare_exchange_casp_acquire + atomic_compare_exchange_casp_acquire = _atomic_compare_exchange_casp(Ordering::Acquire); - _atomic_compare_exchange_casp_release + atomic_compare_exchange_casp_release = _atomic_compare_exchange_casp(Ordering::Release); - _atomic_compare_exchange_casp_acqrel + // AcqRel and SeqCst RMWs are equivalent. + atomic_compare_exchange_casp_acqrel = _atomic_compare_exchange_casp(Ordering::AcqRel); } fn_alias! { unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128; - _atomic_compare_exchange_ldxp_stxp_relaxed + atomic_compare_exchange_ldxp_stxp_relaxed = _atomic_compare_exchange_ldxp_stxp(Ordering::Relaxed); - _atomic_compare_exchange_ldxp_stxp_acquire + atomic_compare_exchange_ldxp_stxp_acquire = _atomic_compare_exchange_ldxp_stxp(Ordering::Acquire); - _atomic_compare_exchange_ldxp_stxp_release + atomic_compare_exchange_ldxp_stxp_release = _atomic_compare_exchange_ldxp_stxp(Ordering::Release); - _atomic_compare_exchange_ldxp_stxp_acqrel + // AcqRel and SeqCst RMWs are equivalent. + atomic_compare_exchange_ldxp_stxp_acqrel = _atomic_compare_exchange_ldxp_stxp(Ordering::AcqRel); } // SAFETY: the caller must guarantee that `dst` is valid for both writes and @@ -394,27 +470,27 @@ unsafe fn atomic_compare_exchange( Ordering::Relaxed => { ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { if detect::detect().has_lse() { - _atomic_compare_exchange_casp_relaxed + atomic_compare_exchange_casp_relaxed } else { - _atomic_compare_exchange_ldxp_stxp_relaxed + atomic_compare_exchange_ldxp_stxp_relaxed } }) } Ordering::Acquire => { ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { if detect::detect().has_lse() { - _atomic_compare_exchange_casp_acquire + atomic_compare_exchange_casp_acquire } else { - _atomic_compare_exchange_ldxp_stxp_acquire + atomic_compare_exchange_ldxp_stxp_acquire } }) } Ordering::Release => { ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { if detect::detect().has_lse() { - _atomic_compare_exchange_casp_release + atomic_compare_exchange_casp_release } else { - _atomic_compare_exchange_ldxp_stxp_release + atomic_compare_exchange_ldxp_stxp_release } }) } @@ -422,9 +498,9 @@ unsafe fn atomic_compare_exchange( Ordering::AcqRel | Ordering::SeqCst => { ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { if detect::detect().has_lse() { - _atomic_compare_exchange_casp_acqrel + atomic_compare_exchange_casp_acqrel } else { - _atomic_compare_exchange_ldxp_stxp_acqrel + atomic_compare_exchange_ldxp_stxp_acqrel } }) } @@ -455,6 +531,7 @@ unsafe fn _atomic_compare_exchange_casp( order: Ordering, ) -> u128 { debug_assert!(dst as usize % 16 == 0); + debug_assert_lse!(); // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, that there are no concurrent non-atomic operations, @@ -486,6 +563,7 @@ unsafe fn _atomic_compare_exchange_casp( U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole } } +#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))] #[inline] unsafe fn _atomic_compare_exchange_ldxp_stxp( dst: *mut u128, @@ -554,31 +632,32 @@ unsafe fn _atomic_compare_exchange_ldxp_stxp( // so we always use strong CAS for now. use self::atomic_compare_exchange as atomic_compare_exchange_weak; -#[inline] -unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 { - #[cfg(all( - any(target_feature = "lse", portable_atomic_target_feature = "lse"), - not(portable_atomic_ll_sc_rmw), - ))] - // SAFETY: the caller must uphold the safety contract. - // cfg guarantee that the CPU supports FEAT_LSE. - unsafe { - _atomic_swap_casp(dst, val, order) - } - #[cfg(not(all( +// If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set, +// we use CAS-based atomic RMW. +#[cfg(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), +))] +use _atomic_swap_casp as atomic_swap; +#[cfg(not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), +)))] +use _atomic_swap_ldxp_stxp as atomic_swap; +// Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap. +#[cfg(any( + test, + all( any(target_feature = "lse", portable_atomic_target_feature = "lse"), not(portable_atomic_ll_sc_rmw), - )))] - // SAFETY: the caller must uphold the safety contract. - unsafe { - _atomic_swap_ldxp_stxp(dst, val, order) - } -} -// Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap. + ) +))] #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] #[inline] unsafe fn _atomic_swap_casp(dst: *mut u128, val: u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); + debug_assert_lse!(); + // SAFETY: the caller must uphold the safety contract. // cfg guarantee that the CPU supports FEAT_LSE. unsafe { @@ -619,6 +698,13 @@ unsafe fn _atomic_swap_casp(dst: *mut u128, val: u128, order: Ordering) -> u128 } } // Do not use atomic_rmw_ll_sc_3 because it needs extra MOV to implement swap. +#[cfg(any( + test, + not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + )) +))] #[inline] unsafe fn _atomic_swap_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); @@ -658,7 +744,7 @@ unsafe fn _atomic_swap_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) -> /// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`) /// - new_lo/new_hi pair: new value that will to stored by sc macro_rules! atomic_rmw_ll_sc_3 { - ($name:ident as $reexport_name:ident, options($($options:tt)*), $($op:tt)*) => { + ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => { // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set, // we use CAS-based atomic RMW generated by atomic_rmw_cas_3! macro instead. #[cfg(not(all( @@ -666,6 +752,13 @@ macro_rules! atomic_rmw_ll_sc_3 { not(portable_atomic_ll_sc_rmw), )))] use $name as $reexport_name; + #[cfg(any( + test, + not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + )) + ))] #[inline] unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); @@ -690,7 +783,7 @@ macro_rules! atomic_rmw_ll_sc_3 { new_lo = out(reg) _, new_hi = out(reg) _, r = out(reg) _, - options($($options)*), + options(nostack $(, $preserves_flags)?), ) }; } @@ -698,16 +791,6 @@ macro_rules! atomic_rmw_ll_sc_3 { U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole } } - #[cfg(test)] - paste::paste! { - // Helper to test $op separately. - unsafe fn [<$reexport_name _op>](dst: *mut u128, val: u128) -> u128 { - // SAFETY: the caller must uphold the safety contract. - unsafe { - $name(dst, val, Ordering::Relaxed) - } - } - } }; } /// Atomic RMW by CAS loop (3 arguments) @@ -726,10 +809,18 @@ macro_rules! atomic_rmw_cas_3 { not(portable_atomic_ll_sc_rmw), ))] use $name as $reexport_name; + #[cfg(any( + test, + all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + ) + ))] #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] #[inline] unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); + debug_assert_lse!(); // SAFETY: the caller must uphold the safety contract. // cfg guarantee that the CPU supports FEAT_LSE. unsafe { @@ -782,7 +873,7 @@ macro_rules! atomic_rmw_cas_3 { /// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`) /// - new_lo/new_hi pair: new value that will to stored by sc macro_rules! atomic_rmw_ll_sc_2 { - ($name:ident as $reexport_name:ident, options($($options:tt)*), $($op:tt)*) => { + ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => { // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set, // we use CAS-based atomic RMW generated by atomic_rmw_cas_2! macro instead. #[cfg(not(all( @@ -790,6 +881,13 @@ macro_rules! atomic_rmw_ll_sc_2 { not(portable_atomic_ll_sc_rmw), )))] use $name as $reexport_name; + #[cfg(any( + test, + not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + )) + ))] #[inline] unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); @@ -811,7 +909,7 @@ macro_rules! atomic_rmw_ll_sc_2 { new_lo = out(reg) _, new_hi = out(reg) _, r = out(reg) _, - options($($options)*), + options(nostack $(, $preserves_flags)?), ) }; } @@ -819,16 +917,6 @@ macro_rules! atomic_rmw_ll_sc_2 { U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole } } - #[cfg(test)] - paste::paste! { - // Helper to test $op separately. - unsafe fn [<$reexport_name _op>](dst: *mut u128) -> u128 { - // SAFETY: the caller must uphold the safety contract. - unsafe { - $name(dst, Ordering::Relaxed) - } - } - } }; } /// Atomic RMW by CAS loop (2 arguments) @@ -846,10 +934,18 @@ macro_rules! atomic_rmw_cas_2 { not(portable_atomic_ll_sc_rmw), ))] use $name as $reexport_name; + #[cfg(any( + test, + all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + ) + ))] #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] #[inline] unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); + debug_assert_lse!(); // SAFETY: the caller must uphold the safety contract. // cfg guarantee that the CPU supports FEAT_LSE. unsafe { @@ -892,10 +988,9 @@ macro_rules! atomic_rmw_cas_2 { }; } +// Do not use `preserves_flags` because ADDS and ADCS modify the condition flags. atomic_rmw_ll_sc_3! { _atomic_add_ldxp_stxp as atomic_add, - // Do not use `preserves_flags` because ADDS and ADCS modify the condition flags. - options(nostack), concat!( "adds ", select_le_or_be!("{new_lo}, {prev_lo}, {val_lo}", "{new_hi}, {prev_hi}, {val_hi}") @@ -917,10 +1012,9 @@ atomic_rmw_cas_3! { ), } +// Do not use `preserves_flags` because SUBS and SBCS modify the condition flags. atomic_rmw_ll_sc_3! { _atomic_sub_ldxp_stxp as atomic_sub, - // Do not use `preserves_flags` because SUBS and SBCS modify the condition flags. - options(nostack), concat!( "subs ", select_le_or_be!("{new_lo}, {prev_lo}, {val_lo}", "{new_hi}, {prev_hi}, {val_hi}") @@ -943,8 +1037,7 @@ atomic_rmw_cas_3! { } atomic_rmw_ll_sc_3! { - _atomic_and_ldxp_stxp as atomic_and, - options(nostack, preserves_flags), + _atomic_and_ldxp_stxp as atomic_and (preserves_flags), "and {new_lo}, {prev_lo}, {val_lo}", "and {new_hi}, {prev_hi}, {val_hi}", } @@ -955,8 +1048,7 @@ atomic_rmw_cas_3! { } atomic_rmw_ll_sc_3! { - _atomic_nand_ldxp_stxp as atomic_nand, - options(nostack, preserves_flags), + _atomic_nand_ldxp_stxp as atomic_nand (preserves_flags), "and {new_lo}, {prev_lo}, {val_lo}", "mvn {new_lo}, {new_lo}", "and {new_hi}, {prev_hi}, {val_hi}", @@ -971,8 +1063,7 @@ atomic_rmw_cas_3! { } atomic_rmw_ll_sc_3! { - _atomic_or_ldxp_stxp as atomic_or, - options(nostack, preserves_flags), + _atomic_or_ldxp_stxp as atomic_or (preserves_flags), "orr {new_lo}, {prev_lo}, {val_lo}", "orr {new_hi}, {prev_hi}, {val_hi}", } @@ -983,8 +1074,7 @@ atomic_rmw_cas_3! { } atomic_rmw_ll_sc_3! { - _atomic_xor_ldxp_stxp as atomic_xor, - options(nostack, preserves_flags), + _atomic_xor_ldxp_stxp as atomic_xor (preserves_flags), "eor {new_lo}, {prev_lo}, {val_lo}", "eor {new_hi}, {prev_hi}, {val_hi}", } @@ -995,8 +1085,7 @@ atomic_rmw_cas_3! { } atomic_rmw_ll_sc_2! { - _atomic_not_ldxp_stxp as atomic_not, - options(nostack, preserves_flags), + _atomic_not_ldxp_stxp as atomic_not (preserves_flags), "mvn {new_lo}, {prev_lo}", "mvn {new_hi}, {prev_hi}", } @@ -1006,10 +1095,9 @@ atomic_rmw_cas_2! { "mvn x5, x7", } +// Do not use `preserves_flags` because NEGS modifies the condition flags. atomic_rmw_ll_sc_2! { _atomic_neg_ldxp_stxp as atomic_neg, - // Do not use `preserves_flags` because NEGS modifies the condition flags. - options(nostack), concat!("negs ", select_le_or_be!("{new_lo}, {prev_lo}", "{new_hi}, {prev_hi}")), concat!("ngc ", select_le_or_be!("{new_hi}, {prev_hi}", "{new_lo}, {prev_lo}")), } @@ -1019,10 +1107,9 @@ atomic_rmw_cas_2! { concat!("ngc ", select_le_or_be!("x5, x7", "x4, x6")), } +// Do not use `preserves_flags` because CMP and SBCS modify the condition flags. atomic_rmw_ll_sc_3! { _atomic_max_ldxp_stxp as atomic_max, - // Do not use `preserves_flags` because CMP and SBCS modify the condition flags. - options(nostack), select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"), select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"), "csel {new_hi}, {prev_hi}, {val_hi}, lt", // select hi 64-bit @@ -1036,10 +1123,9 @@ atomic_rmw_cas_3! { "csel x4, x6, {val_lo}, lt", // select lo 64-bit } +// Do not use `preserves_flags` because CMP and SBCS modify the condition flags. atomic_rmw_ll_sc_3! { _atomic_umax_ldxp_stxp as atomic_umax, - // Do not use `preserves_flags` because CMP and SBCS modify the condition flags. - options(nostack), select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"), select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"), "csel {new_hi}, {prev_hi}, {val_hi}, lo", // select hi 64-bit @@ -1053,10 +1139,9 @@ atomic_rmw_cas_3! { "csel x4, x6, {val_lo}, lo", // select lo 64-bit } +// Do not use `preserves_flags` because CMP and SBCS modify the condition flags. atomic_rmw_ll_sc_3! { _atomic_min_ldxp_stxp as atomic_min, - // Do not use `preserves_flags` because CMP and SBCS modify the condition flags. - options(nostack), select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"), select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"), "csel {new_hi}, {prev_hi}, {val_hi}, ge", // select hi 64-bit @@ -1070,10 +1155,9 @@ atomic_rmw_cas_3! { "csel x4, x6, {val_lo}, ge", // select lo 64-bit } +// Do not use `preserves_flags` because CMP and SBCS modify the condition flags. atomic_rmw_ll_sc_3! { _atomic_umin_ldxp_stxp as atomic_umin, - // Do not use `preserves_flags` because CMP and SBCS modify the condition flags. - options(nostack), select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"), select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"), "csel {new_hi}, {prev_hi}, {val_hi}, hs", // select hi 64-bit @@ -1102,40 +1186,4 @@ mod tests { test_atomic_int!(i128); test_atomic_int!(u128); - - test_atomic128_op!(); -} - -#[cfg(test)] -#[allow(dead_code, clippy::undocumented_unsafe_blocks, clippy::wildcard_imports)] -mod tests_no_outline_atomics { - use super::*; - - #[inline] - unsafe fn atomic_compare_exchange( - dst: *mut u128, - old: u128, - new: u128, - success: Ordering, - _failure: Ordering, - ) -> Result { - // SAFETY: the caller must uphold the safety contract. - let res = unsafe { _atomic_compare_exchange_ldxp_stxp(dst, old, new, success) }; - if res == old { - Ok(res) - } else { - Err(res) - } - } - - // LLVM appears to generate strong CAS for aarch64 128-bit weak CAS, - // so we always use strong CAS. - use self::atomic_compare_exchange as atomic_compare_exchange_weak; - - atomic128!(AtomicI128, i128, atomic_max, atomic_min); - atomic128!(AtomicU128, u128, atomic_umax, atomic_umin); - - // Do not put this in the nested tests module due to glob imports refer to super::super::Atomic*. - test_atomic_int!(i128); - test_atomic_int!(u128); } diff --git a/src/imp/atomic128/detect/common.rs b/src/imp/atomic128/detect/common.rs index a23eb7c6..086d3054 100644 --- a/src/imp/atomic128/detect/common.rs +++ b/src/imp/atomic128/detect/common.rs @@ -33,7 +33,12 @@ pub(crate) fn detect() -> CpuInfo { return info; } info.set(CpuInfo::INIT); - _detect(&mut info); + // Note: This cfg is intended to make it easy for portable-atomic developers + // to test has_cmpxchg16b == false or has_lse == false cases, + // and is not a public API. + if !cfg!(portable_atomic_test_outline_atomics_detect_false) { + _detect(&mut info); + } CACHE.store(info.0, Ordering::Relaxed); info } @@ -60,7 +65,7 @@ impl CpuInfo { pub(crate) fn has_lse(self) -> bool { #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] { - // FEAT_LSE is statically available. + // FEAT_LSE is available at compile-time. true } #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] @@ -82,7 +87,7 @@ impl CpuInfo { pub(crate) fn has_cmpxchg16b(self) -> bool { #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] { - // CMPXCHG16B is statically available. + // CMPXCHG16B is available at compile-time. true } #[cfg(not(any( @@ -246,6 +251,7 @@ mod tests_common { let _ = stdout.write_all(features.as_bytes()); } + #[cfg(not(portable_atomic_test_outline_atomics_detect_false))] #[cfg(target_arch = "aarch64")] #[test] fn test_detect() { diff --git a/src/imp/atomic128/detect/x86_64.rs b/src/imp/atomic128/detect/x86_64.rs index fca3d354..d3dc6746 100644 --- a/src/imp/atomic128/detect/x86_64.rs +++ b/src/imp/atomic128/detect/x86_64.rs @@ -1,7 +1,12 @@ // Adapted from https://github.com/rust-lang/stdarch. #![cfg_attr( - any(not(target_feature = "sse"), miri, portable_atomic_sanitize_thread), + any( + not(target_feature = "sse"), + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + miri, + portable_atomic_sanitize_thread, + ), allow(dead_code) )] @@ -29,7 +34,7 @@ unsafe fn __cpuid(leaf: u32) -> CpuidResult { // rbx is reserved by LLVM "mov {ebx_tmp:r}, rbx", "cpuid", - "xchg {ebx_tmp:r}, rbx", + "xchg {ebx_tmp:r}, rbx", // restore rbx ebx_tmp = out(reg) ebx, inout("eax") leaf => eax, inout("ecx") 0 => ecx, @@ -105,8 +110,10 @@ fn _detect(info: &mut CpuInfo) { )] #[cfg(test)] mod tests { + #[cfg(not(portable_atomic_test_outline_atomics_detect_false))] use super::*; + #[cfg(not(portable_atomic_test_outline_atomics_detect_false))] #[test] // SGX doesn't support CPUID. // Miri doesn't support inline assembly. diff --git a/src/imp/atomic128/intrinsics.rs b/src/imp/atomic128/intrinsics.rs index 7dc5e985..15445dde 100644 --- a/src/imp/atomic128/intrinsics.rs +++ b/src/imp/atomic128/intrinsics.rs @@ -1,40 +1,60 @@ -// Atomic{I,U}128 implementation using core::intrinsics. +// Atomic{I,U}128 implementation without inline assembly. // -// Refs: https://github.com/rust-lang/rust/blob/1.68.0/library/core/src/sync/atomic.rs +// Note: This module is currently only enabled on Miri and ThreadSanitizer which +// do not support inline assembly. +// +// This uses `core::arch::x86_64::cmpxchg16b` on x86_64 and +// `core::intrinsics::atomic_*` on aarch64, powerpc64, and s390x. // -// On aarch64 and powerpc64, this module is currently only enabled on Miri and ThreadSanitizer -// which do not support inline assembly. (Note: on powerpc64, it requires LLVM 15+) -// On x86_64, this module is currently only enabled on benchmark. +// See README.md of this directory for performance comparison with the +// implementation with inline assembly. // -// Note that we cannot use this module on s390x because LLVM currently generates -// libcalls for operations other than load/store/cmpxchg: https://godbolt.org/z/5c9b3eYf7 +// Note: +// - This currently always needs nightly compilers. On x86_64, the stabilization +// of `core::arch::x86_64::cmpxchg16b` has been recently merged to stdarch: +// https://github.com/rust-lang/stdarch/pull/1358 +// - On powerpc64, this requires LLVM 15+ and pwr8+ (quadword-atomics LLVM target feature): +// https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445 +// - On aarch64 big-endian, LLVM (as of 15) generates broken code. +// (on cfg(miri)/cfg(sanitize) it is fine though) +// - On s390x, LLVM (as of 16) generates libcalls for operations other than load/store/cmpxchg: +// https://godbolt.org/z/5a5T4hxMh +// - On powerpc64, LLVM (as of 16) doesn't support 128-bit atomic min/max: +// https://godbolt.org/z/3rebKcbdf // -// Note that we cannot use this module on aarch64_be (big-endian) because LLVM -// currently generates broken code. (on cfg(miri)/cfg(sanitize) it is fine though) +// Refs: https://github.com/rust-lang/rust/blob/1.68.0/library/core/src/sync/atomic.rs + +include!("macros.rs"); + +#[allow(dead_code)] // we only use compare_exchange +#[cfg(target_arch = "x86_64")] +#[cfg(not(target_feature = "cmpxchg16b"))] +#[path = "../fallback/outline_atomics.rs"] +mod fallback; + +#[cfg(target_arch = "x86_64")] +#[cfg(not(target_feature = "cmpxchg16b"))] +#[path = "detect/x86_64.rs"] +mod detect; +use core::sync::atomic::Ordering; +#[cfg(not(target_arch = "x86_64"))] use core::{ - cell::UnsafeCell, intrinsics, - sync::atomic::Ordering::{self, AcqRel, Acquire, Relaxed, Release, SeqCst}, + sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst}, }; -// On x86_64, this module is only enabled on benchmark. -macro_rules! assert_cmpxchg16b { - () => { - #[cfg(all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")))] - { - assert!(std::is_x86_feature_detected!("cmpxchg16b")); - } - }; -} - #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { - crate::utils::assert_load_ordering(order); + #[cfg(target_arch = "x86_64")] + // SAFETY: the caller must uphold the safety contract. + unsafe { + let fail_order = crate::utils::strongest_failure_ordering(order); + match atomic_compare_exchange(src, 0, 0, order, fail_order) { + Ok(v) | Err(v) => v, + } + } + #[cfg(not(target_arch = "x86_64"))] // SAFETY: the caller must uphold the safety contract. unsafe { match order { @@ -47,47 +67,25 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { } #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { - crate::utils::assert_store_ordering(order); + #[cfg(target_arch = "x86_64")] // SAFETY: the caller must uphold the safety contract. unsafe { - match order { - Release => intrinsics::atomic_store_release(dst, val), - Relaxed => intrinsics::atomic_store_relaxed(dst, val), - SeqCst => intrinsics::atomic_store_seqcst(dst, val), - _ => unreachable!("{:?}", order), - } + atomic_swap(dst, val, order); } -} - -#[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] -unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 { + #[cfg(not(target_arch = "x86_64"))] // SAFETY: the caller must uphold the safety contract. unsafe { match order { - Acquire => intrinsics::atomic_xchg_acquire(dst, val), - Release => intrinsics::atomic_xchg_release(dst, val), - AcqRel => intrinsics::atomic_xchg_acqrel(dst, val), - Relaxed => intrinsics::atomic_xchg_relaxed(dst, val), - SeqCst => intrinsics::atomic_xchg_seqcst(dst, val), + Release => intrinsics::atomic_store_release(dst, val), + Relaxed => intrinsics::atomic_store_relaxed(dst, val), + SeqCst => intrinsics::atomic_store_seqcst(dst, val), _ => unreachable!("{:?}", order), } } } #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] unsafe fn atomic_compare_exchange( dst: *mut u128, old: u128, @@ -95,7 +93,53 @@ unsafe fn atomic_compare_exchange( success: Ordering, failure: Ordering, ) -> Result { - crate::utils::assert_compare_exchange_ordering(success, failure); + #[cfg(target_arch = "x86_64")] + let (val, ok) = { + #[cfg_attr(not(target_feature = "cmpxchg16b"), target_feature(enable = "cmpxchg16b"))] + #[cfg_attr(target_feature = "cmpxchg16b", inline)] + #[cfg_attr(not(target_feature = "cmpxchg16b"), inline(never))] + unsafe fn cmpxchg16b( + dst: *mut u128, + old: u128, + new: u128, + success: Ordering, + failure: Ordering, + ) -> (u128, bool) { + debug_assert!(dst as usize % 16 == 0); + #[cfg(not(target_feature = "cmpxchg16b"))] + { + debug_assert!(detect::detect().has_cmpxchg16b()); + } + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned (required by CMPXCHG16B), that there are no + // concurrent non-atomic operations, and that the CPU supports CMPXCHG16B. + let res = unsafe { core::arch::x86_64::cmpxchg16b(dst, old, new, success, failure) }; + (res, res == old) + } + let success = crate::utils::upgrade_success_ordering(success, failure); + #[cfg(target_feature = "cmpxchg16b")] + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, that there are no concurrent non-atomic operations, + // and cfg guarantees that CMPXCHG16B is available at compile-time. + unsafe { + cmpxchg16b(dst, old, new, success, failure) + } + #[cfg(not(target_feature = "cmpxchg16b"))] + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no different kinds of concurrent accesses. + unsafe { + ifunc!(unsafe fn( + dst: *mut u128, old: u128, new: u128, success: Ordering, failure: Ordering + ) -> (u128, bool) { + if detect::detect().has_cmpxchg16b() { + cmpxchg16b + } else { + fallback::atomic_compare_exchange + } + }) + } + }; + #[cfg(not(target_arch = "x86_64"))] // SAFETY: the caller must uphold the safety contract. let (val, ok) = unsafe { match (success, failure) { @@ -124,11 +168,10 @@ unsafe fn atomic_compare_exchange( } } +#[cfg(target_arch = "x86_64")] +use atomic_compare_exchange as atomic_compare_exchange_weak; +#[cfg(not(target_arch = "x86_64"))] #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] unsafe fn atomic_compare_exchange_weak( dst: *mut u128, old: u128, @@ -136,7 +179,6 @@ unsafe fn atomic_compare_exchange_weak( success: Ordering, failure: Ordering, ) -> Result { - crate::utils::assert_compare_exchange_ordering(success, failure); // SAFETY: the caller must uphold the safety contract. let (val, ok) = unsafe { match (success, failure) { @@ -165,11 +207,52 @@ unsafe fn atomic_compare_exchange_weak( } } +#[inline(always)] +unsafe fn atomic_update(dst: *mut u128, order: Ordering, mut f: F) -> u128 +where + F: FnMut(u128) -> u128, +{ + // SAFETY: the caller must uphold the safety contract. + unsafe { + // This is a private function and all instances of `f` only operate on the value + // loaded, so there is no need to synchronize the first load/failed CAS. + let mut old = atomic_load(dst, Ordering::Relaxed); + loop { + let next = f(old); + match atomic_compare_exchange_weak(dst, old, next, order, Ordering::Relaxed) { + Ok(x) => return x, + Err(x) => old = x, + } + } + } +} + +// On x86_64, we use core::arch::x86_64::cmpxchg16b instead of core::intrinsics. +// On s390x, LLVM (as of 16) generates libcalls for operations other than load/store/cmpxchg: https://godbolt.org/z/5a5T4hxMh +#[cfg(any(target_arch = "x86_64", target_arch = "s390x"))] +atomic_rmw_by_atomic_update!(); +// On powerpc64, LLVM (as of 16) doesn't support 128-bit atomic min/max: https://godbolt.org/z/3rebKcbdf +#[cfg(target_arch = "powerpc64")] +atomic_rmw_by_atomic_update!(cmp); + +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] +#[inline] +unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Acquire => intrinsics::atomic_xchg_acquire(dst, val), + Release => intrinsics::atomic_xchg_release(dst, val), + AcqRel => intrinsics::atomic_xchg_acqrel(dst, val), + Relaxed => intrinsics::atomic_xchg_relaxed(dst, val), + SeqCst => intrinsics::atomic_xchg_seqcst(dst, val), + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] unsafe fn atomic_add(dst: *mut u128, val: u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { @@ -184,11 +267,8 @@ unsafe fn atomic_add(dst: *mut u128, val: u128, order: Ordering) -> u128 { } } +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] unsafe fn atomic_sub(dst: *mut u128, val: u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { @@ -203,11 +283,8 @@ unsafe fn atomic_sub(dst: *mut u128, val: u128, order: Ordering) -> u128 { } } +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { @@ -222,11 +299,8 @@ unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 { } } +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] unsafe fn atomic_nand(dst: *mut u128, val: u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { @@ -241,11 +315,8 @@ unsafe fn atomic_nand(dst: *mut u128, val: u128, order: Ordering) -> u128 { } } +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { @@ -260,11 +331,8 @@ unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 { } } +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] unsafe fn atomic_xor(dst: *mut u128, val: u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { @@ -279,96 +347,41 @@ unsafe fn atomic_xor(dst: *mut u128, val: u128, order: Ordering) -> u128 { } } -#[inline(always)] -unsafe fn atomic_update(dst: *mut u128, order: Ordering, mut f: F) -> u128 -where - F: FnMut(u128) -> u128, -{ - // SAFETY: the caller must uphold the safety contract. - unsafe { - // This is a private function and all instances of `f` only operate on the value - // loaded, so there is no need to synchronize the first load/failed CAS. - let mut old = atomic_load(dst, Ordering::Relaxed); - loop { - let next = f(old); - match atomic_compare_exchange_weak(dst, old, next, order, Ordering::Relaxed) { - Ok(x) => return x, - Err(x) => old = x, - } - } - } -} - -/// returns the max value (signed comparison) +#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))] #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] -unsafe fn atomic_max(dst: *mut i128, val: i128, order: Ordering) -> i128 { - // LLVM 15 doesn't support 128-bit atomic min/max for powerpc64. - #[cfg(target_arch = "powerpc64")] - #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] - // SAFETY: the caller must uphold the safety contract. - unsafe { - atomic_update(dst.cast::(), order, |x| core::cmp::max(x as i128, val) as u128) as i128 - } - #[cfg(not(target_arch = "powerpc64"))] +unsafe fn atomic_max(dst: *mut u128, val: u128, order: Ordering) -> i128 { // SAFETY: the caller must uphold the safety contract. unsafe { match order { - Acquire => intrinsics::atomic_max_acquire(dst, val), - Release => intrinsics::atomic_max_release(dst, val), - AcqRel => intrinsics::atomic_max_acqrel(dst, val), - Relaxed => intrinsics::atomic_max_relaxed(dst, val), - SeqCst => intrinsics::atomic_max_seqcst(dst, val), + Acquire => intrinsics::atomic_max_acquire(dst.cast::(), val as i128), + Release => intrinsics::atomic_max_release(dst.cast::(), val as i128), + AcqRel => intrinsics::atomic_max_acqrel(dst.cast::(), val as i128), + Relaxed => intrinsics::atomic_max_relaxed(dst.cast::(), val as i128), + SeqCst => intrinsics::atomic_max_seqcst(dst.cast::(), val as i128), _ => unreachable!("{:?}", order), } } } -/// returns the min value (signed comparison) +#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))] #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] -unsafe fn atomic_min(dst: *mut i128, val: i128, order: Ordering) -> i128 { - // LLVM 15 doesn't support 128-bit atomic min/max for powerpc64. - #[cfg(target_arch = "powerpc64")] - #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] - // SAFETY: the caller must uphold the safety contract. - unsafe { - atomic_update(dst.cast::(), order, |x| core::cmp::min(x as i128, val) as u128) as i128 - } - #[cfg(not(target_arch = "powerpc64"))] +unsafe fn atomic_min(dst: *mut u128, val: u128, order: Ordering) -> i128 { // SAFETY: the caller must uphold the safety contract. unsafe { match order { - Acquire => intrinsics::atomic_min_acquire(dst, val), - Release => intrinsics::atomic_min_release(dst, val), - AcqRel => intrinsics::atomic_min_acqrel(dst, val), - Relaxed => intrinsics::atomic_min_relaxed(dst, val), - SeqCst => intrinsics::atomic_min_seqcst(dst, val), + Acquire => intrinsics::atomic_min_acquire(dst.cast::(), val as i128), + Release => intrinsics::atomic_min_release(dst.cast::(), val as i128), + AcqRel => intrinsics::atomic_min_acqrel(dst.cast::(), val as i128), + Relaxed => intrinsics::atomic_min_relaxed(dst.cast::(), val as i128), + SeqCst => intrinsics::atomic_min_seqcst(dst.cast::(), val as i128), _ => unreachable!("{:?}", order), } } } -/// returns the max value (unsigned comparison) +#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))] #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] unsafe fn atomic_umax(dst: *mut u128, val: u128, order: Ordering) -> u128 { - // LLVM 15 doesn't support 128-bit atomic min/max for powerpc64. - #[cfg(target_arch = "powerpc64")] - // SAFETY: the caller must uphold the safety contract. - unsafe { - atomic_update(dst, order, |x| core::cmp::max(x, val)) - } - #[cfg(not(target_arch = "powerpc64"))] // SAFETY: the caller must uphold the safety contract. unsafe { match order { @@ -382,20 +395,9 @@ unsafe fn atomic_umax(dst: *mut u128, val: u128, order: Ordering) -> u128 { } } -/// returns the min value (unsigned comparison) +#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))] #[inline] -#[cfg_attr( - all(target_arch = "x86_64", not(target_feature = "cmpxchg16b")), - target_feature(enable = "cmpxchg16b") -)] unsafe fn atomic_umin(dst: *mut u128, val: u128, order: Ordering) -> u128 { - // LLVM 15 doesn't support 128-bit atomic min/max for powerpc64. - #[cfg(target_arch = "powerpc64")] - // SAFETY: the caller must uphold the safety contract. - unsafe { - atomic_update(dst, order, |x| core::cmp::min(x, val)) - } - #[cfg(not(target_arch = "powerpc64"))] // SAFETY: the caller must uphold the safety contract. unsafe { match order { @@ -409,218 +411,43 @@ unsafe fn atomic_umin(dst: *mut u128, val: u128, order: Ordering) -> u128 { } } -macro_rules! atomic128 { - ($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => { - #[repr(C, align(16))] - pub(crate) struct $atomic_type { - v: UnsafeCell<$int_type>, - } - - // Send is implicitly implemented. - // SAFETY: any data races are prevented by atomic intrinsics. - unsafe impl Sync for $atomic_type {} - - impl_default_no_fetch_ops!($atomic_type, $int_type); - impl_default_bit_opts!($atomic_type, $int_type); - impl $atomic_type { - #[inline] - pub(crate) const fn new(v: $int_type) -> Self { - Self { v: UnsafeCell::new(v) } - } - - #[inline] - pub(crate) fn is_lock_free() -> bool { - Self::is_always_lock_free() - } - #[inline] - pub(crate) const fn is_always_lock_free() -> bool { - true - } - - #[inline] - pub(crate) fn get_mut(&mut self) -> &mut $int_type { - self.v.get_mut() - } - - #[inline] - pub(crate) fn into_inner(self) -> $int_type { - self.v.into_inner() - } - - #[inline] - #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] - pub(crate) fn load(&self, order: Ordering) -> $int_type { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { atomic_load(self.v.get().cast::(), order) as $int_type } - } - - #[inline] - #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] - pub(crate) fn store(&self, val: $int_type, order: Ordering) { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { atomic_store(self.v.get().cast::(), val as u128, order) } - } - - #[inline] - pub(crate) fn swap(&self, val: $int_type, order: Ordering) -> $int_type { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { atomic_swap(self.v.get().cast::(), val as u128, order) as $int_type } - } - - #[inline] - #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] - pub(crate) fn compare_exchange( - &self, - current: $int_type, - new: $int_type, - success: Ordering, - failure: Ordering, - ) -> Result<$int_type, $int_type> { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { - match atomic_compare_exchange( - self.v.get().cast::(), - current as u128, - new as u128, - success, - failure, - ) { - Ok(v) => Ok(v as $int_type), - Err(v) => Err(v as $int_type), - } - } - } - - #[inline] - #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] - pub(crate) fn compare_exchange_weak( - &self, - current: $int_type, - new: $int_type, - success: Ordering, - failure: Ordering, - ) -> Result<$int_type, $int_type> { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { - match atomic_compare_exchange_weak( - self.v.get().cast::(), - current as u128, - new as u128, - success, - failure, - ) { - Ok(v) => Ok(v as $int_type), - Err(v) => Err(v as $int_type), - } - } - } - - #[inline] - pub(crate) fn fetch_add(&self, val: $int_type, order: Ordering) -> $int_type { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { atomic_add(self.v.get().cast::(), val as u128, order) as $int_type } - } - - #[inline] - pub(crate) fn fetch_sub(&self, val: $int_type, order: Ordering) -> $int_type { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { atomic_sub(self.v.get().cast::(), val as u128, order) as $int_type } - } - - #[inline] - pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { atomic_and(self.v.get().cast::(), val as u128, order) as $int_type } - } - - #[inline] - pub(crate) fn fetch_nand(&self, val: $int_type, order: Ordering) -> $int_type { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { atomic_nand(self.v.get().cast::(), val as u128, order) as $int_type } - } - - #[inline] - pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { atomic_or(self.v.get().cast::(), val as u128, order) as $int_type } - } - - #[inline] - pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { atomic_xor(self.v.get().cast::(), val as u128, order) as $int_type } - } - - #[inline] - pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { $atomic_max(self.v.get(), val, order) } - } - - #[inline] - pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { $atomic_min(self.v.get(), val, order) } - } +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] +#[inline] +unsafe fn atomic_not(dst: *mut u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_xor(dst, core::u128::MAX, order) } +} - #[inline] - pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type { - const NOT_MASK: $int_type = (0 as $int_type).wrapping_sub(1); - self.fetch_xor(NOT_MASK, order) - } - #[inline] - pub(crate) fn not(&self, order: Ordering) { - self.fetch_not(order); - } +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] +#[inline] +unsafe fn atomic_neg(dst: *mut u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, u128::wrapping_neg) } +} - #[inline] - pub(crate) fn fetch_neg(&self, order: Ordering) -> $int_type { - assert_cmpxchg16b!(); - // SAFETY: any data races are prevented by atomic intrinsics and the raw - // pointer passed in is valid because we got it from a reference. - unsafe { - atomic_update(self.v.get().cast::(), order, u128::wrapping_neg) - as $int_type - } - } - #[inline] - pub(crate) fn neg(&self, order: Ordering) { - self.fetch_neg(order); - } +#[cfg(not(target_arch = "x86_64"))] +#[inline] +const fn is_lock_free() -> bool { + IS_ALWAYS_LOCK_FREE +} +#[cfg(not(target_arch = "x86_64"))] +const IS_ALWAYS_LOCK_FREE: bool = true; - #[inline] - pub(crate) const fn as_ptr(&self) -> *mut $int_type { - self.v.get() - } - } - }; +#[cfg(target_arch = "x86_64")] +#[inline] +fn is_lock_free() -> bool { + #[cfg(target_feature = "cmpxchg16b")] + { + // CMPXCHG16B is available at compile-time. + true + } + #[cfg(not(target_feature = "cmpxchg16b"))] + { + detect::detect().has_cmpxchg16b() + } } +#[cfg(target_arch = "x86_64")] +const IS_ALWAYS_LOCK_FREE: bool = cfg!(target_feature = "cmpxchg16b"); atomic128!(AtomicI128, i128, atomic_max, atomic_min); atomic128!(AtomicU128, u128, atomic_umax, atomic_umin); diff --git a/src/imp/atomic128/macros.rs b/src/imp/atomic128/macros.rs index 7e1fe30b..77281c36 100644 --- a/src/imp/atomic128/macros.rs +++ b/src/imp/atomic128/macros.rs @@ -201,130 +201,58 @@ macro_rules! atomic128 { }; } -#[cfg(any( - target_arch = "s390x", - all( - target_arch = "x86_64", - any( - test, - not(any( - target_feature = "cmpxchg16b", - portable_atomic_target_feature = "cmpxchg16b", - )), - any(miri, portable_atomic_sanitize_thread), - ), - ), -))] +#[cfg(any(target_arch = "powerpc64", target_arch = "s390x", target_arch = "x86_64"))] +#[cfg_attr(any(target_arch = "powerpc64", target_arch = "x86_64"), allow(unused_macros))] // only used by intrinsics.rs macro_rules! atomic_rmw_by_atomic_update { () => { - #[cfg_attr( - target_arch = "s390x", - cfg(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )) - )] #[inline] unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { atomic_update(dst, order, |_| val) } } - #[cfg_attr( - target_arch = "s390x", - cfg(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )) - )] #[inline] unsafe fn atomic_add(dst: *mut u128, val: u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { atomic_update(dst, order, |x| x.wrapping_add(val)) } } - #[cfg_attr( - target_arch = "s390x", - cfg(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )) - )] #[inline] unsafe fn atomic_sub(dst: *mut u128, val: u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { atomic_update(dst, order, |x| x.wrapping_sub(val)) } } - #[cfg_attr( - target_arch = "s390x", - cfg(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )) - )] #[inline] unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { atomic_update(dst, order, |x| x & val) } } - #[cfg_attr( - target_arch = "s390x", - cfg(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )) - )] #[inline] unsafe fn atomic_nand(dst: *mut u128, val: u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { atomic_update(dst, order, |x| !(x & val)) } } - #[cfg_attr( - target_arch = "s390x", - cfg(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )) - )] #[inline] unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { atomic_update(dst, order, |x| x | val) } } - #[cfg_attr( - target_arch = "s390x", - cfg(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )) - )] #[inline] unsafe fn atomic_xor(dst: *mut u128, val: u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { atomic_update(dst, order, |x| x ^ val) } } - #[cfg_attr( - target_arch = "s390x", - cfg(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )) - )] #[inline] unsafe fn atomic_not(dst: *mut u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { atomic_update(dst, order, |x| !x) } } - #[cfg_attr( - target_arch = "s390x", - cfg(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )) - )] #[inline] unsafe fn atomic_neg(dst: *mut u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { atomic_update(dst, order, u128::wrapping_neg) } } + atomic_rmw_by_atomic_update!(cmp); + }; + (cmp) => { #[inline] unsafe fn atomic_max(dst: *mut u128, val: u128, order: Ordering) -> u128 { #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] diff --git a/src/imp/atomic128/powerpc64.rs b/src/imp/atomic128/powerpc64.rs index e23eca31..af1bad8c 100644 --- a/src/imp/atomic128/powerpc64.rs +++ b/src/imp/atomic128/powerpc64.rs @@ -22,8 +22,8 @@ // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit // // Generated asm: -// - powerpc64 (pwr8) https://godbolt.org/z/xo6EWKojK -// - powerpc64le https://godbolt.org/z/z8ToMza5e +// - powerpc64 (pwr8) https://godbolt.org/z/4aGs41dEn +// - powerpc64le https://godbolt.org/z/oE3rPoqz4 include!("macros.rs"); @@ -229,6 +229,7 @@ use atomic_compare_exchange as atomic_compare_exchange_weak; #[inline] unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); + // SAFETY: the caller must uphold the safety contract. unsafe { let val = U128 { whole: val }; @@ -524,5 +525,9 @@ mod tests { #[cfg(qemu)] test_atomic_int_load_store!(u128); + // Test operation parts of LL/SC-based atomic RMW implementations separately. + // + // This allows testing more code on QEMU while avoiding the problem of some + // atomic instructions not working on QEMU. test_atomic128_op!(); } diff --git a/src/imp/atomic128/s390x.rs b/src/imp/atomic128/s390x.rs index 9ec5d073..fd02cc41 100644 --- a/src/imp/atomic128/s390x.rs +++ b/src/imp/atomic128/s390x.rs @@ -3,23 +3,19 @@ // s390x supports 128-bit atomic load/store/cmpxchg: // https://github.com/llvm/llvm-project/commit/a11f63a952664f700f076fd754476a2b9eb158cc // -// Note that LLVM currently generates libcalls for other operations: https://godbolt.org/z/5c9b3eYf7 +// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +// this module and use intrinsics.rs instead. // // Refs: // - z/Architecture Reference Summary https://www.ibm.com/support/pages/zarchitecture-reference-summary // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit // // Generated asm: -// - s390x https://godbolt.org/z/4Ms3M8x6c +// - s390x https://godbolt.org/z/oP5bhbqce include!("macros.rs"); -#[cfg(not(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, -)))] -use core::arch::asm; -use core::sync::atomic::Ordering; +use core::{arch::asm, sync::atomic::Ordering}; /// A 128-bit value represented as a pair of 64-bit values. /// @@ -42,21 +38,6 @@ struct Pair { unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { debug_assert!(src as usize % 16 == 0); - // Miri and Sanitizer do not support inline assembly. - #[cfg(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics))] - // SAFETY: the caller must uphold the safety contract. - unsafe { - match order { - Ordering::Acquire => core::intrinsics::atomic_load_acquire(src), - Ordering::Relaxed => core::intrinsics::atomic_load_relaxed(src), - Ordering::SeqCst => core::intrinsics::atomic_load_seqcst(src), - _ => unreachable!("{:?}", order), - } - } - #[cfg(not(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )))] // SAFETY: the caller must uphold the safety contract. unsafe { // atomic load is always SeqCst. @@ -78,21 +59,6 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { debug_assert!(dst as usize % 16 == 0); - // Miri and Sanitizer do not support inline assembly. - #[cfg(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics))] - // SAFETY: the caller must uphold the safety contract. - unsafe { - match order { - Ordering::Release => core::intrinsics::atomic_store_release(dst, val), - Ordering::Relaxed => core::intrinsics::atomic_store_relaxed(dst, val), - Ordering::SeqCst => core::intrinsics::atomic_store_seqcst(dst, val), - _ => unreachable!("{:?}", order), - } - } - #[cfg(not(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )))] // SAFETY: the caller must uphold the safety contract. unsafe { let val = U128 { whole: val }; @@ -134,35 +100,6 @@ unsafe fn atomic_compare_exchange( ) -> Result { debug_assert!(dst as usize % 16 == 0); - // Miri and Sanitizer do not support inline assembly. - #[cfg(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics))] - // SAFETY: the caller must uphold the safety contract. - let res = unsafe { - use core::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst}; - match (success, failure) { - (Relaxed, Relaxed) => core::intrinsics::atomic_cxchg_relaxed_relaxed(dst, old, new), - (Relaxed, Acquire) => core::intrinsics::atomic_cxchg_relaxed_acquire(dst, old, new), - (Relaxed, SeqCst) => core::intrinsics::atomic_cxchg_relaxed_seqcst(dst, old, new), - (Acquire, Relaxed) => core::intrinsics::atomic_cxchg_acquire_relaxed(dst, old, new), - (Acquire, Acquire) => core::intrinsics::atomic_cxchg_acquire_acquire(dst, old, new), - (Acquire, SeqCst) => core::intrinsics::atomic_cxchg_acquire_seqcst(dst, old, new), - (Release, Relaxed) => core::intrinsics::atomic_cxchg_release_relaxed(dst, old, new), - (Release, Acquire) => core::intrinsics::atomic_cxchg_release_acquire(dst, old, new), - (Release, SeqCst) => core::intrinsics::atomic_cxchg_release_seqcst(dst, old, new), - (AcqRel, Relaxed) => core::intrinsics::atomic_cxchg_acqrel_relaxed(dst, old, new), - (AcqRel, Acquire) => core::intrinsics::atomic_cxchg_acqrel_acquire(dst, old, new), - (AcqRel, SeqCst) => core::intrinsics::atomic_cxchg_acqrel_seqcst(dst, old, new), - (SeqCst, Relaxed) => core::intrinsics::atomic_cxchg_seqcst_relaxed(dst, old, new), - (SeqCst, Acquire) => core::intrinsics::atomic_cxchg_seqcst_acquire(dst, old, new), - (SeqCst, SeqCst) => core::intrinsics::atomic_cxchg_seqcst_seqcst(dst, old, new), - _ => unreachable!("{:?}, {:?}", success, failure), - } - .0 - }; - #[cfg(not(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )))] // SAFETY: the caller must uphold the safety contract. let res = unsafe { // atomic CAS is always SeqCst. @@ -211,11 +148,6 @@ where } } -// Miri and Sanitizer do not support inline assembly. -#[cfg(not(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, -)))] #[inline] unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); @@ -257,20 +189,14 @@ unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 { /// - r12/r13 pair: new value that will to stored // We could use atomic_update here, but using an inline assembly allows omitting // the comparison of results and the storing/comparing of condition flags. -#[rustfmt::skip] // buggy macro formatting macro_rules! atomic_rmw_cas_3 { ($name:ident, $($op:tt)*) => { - // Miri and Sanitizer do not support inline assembly. - #[cfg(not(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )))] #[inline] unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); // SAFETY: the caller must uphold the safety contract. unsafe { - // atomic swap is always SeqCst. + // atomic RMW is always SeqCst. let val = U128 { whole: val }; let (mut prev_hi, mut prev_lo); asm!( @@ -302,20 +228,14 @@ macro_rules! atomic_rmw_cas_3 { /// - r12/r13 pair: new value that will to stored // We could use atomic_update here, but using an inline assembly allows omitting // the comparison of results and the storing/comparing of condition flags. -#[rustfmt::skip] // buggy macro formatting macro_rules! atomic_rmw_cas_2 { ($name:ident, $($op:tt)*) => { - // Miri and Sanitizer do not support inline assembly. - #[cfg(not(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, - )))] #[inline] unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); // SAFETY: the caller must uphold the safety contract. unsafe { - // atomic swap is always SeqCst. + // atomic RMW is always SeqCst. let (mut prev_hi, mut prev_lo); asm!( "lpq %r0, 0({dst})", @@ -401,7 +321,12 @@ atomic_rmw_cas_2! { "slbgr %r12, %r0", } -atomic_rmw_by_atomic_update!(); +// We use atomic_update for atomic min/max in all cases because +// pre-z13 doesn't seem to have a good way to implement 128-bit min/max. +// https://godbolt.org/z/53fnrET7o +// (LLVM 16's minimal supported architecture level is z10: +// https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/lib/Target/SystemZ/SystemZProcessors.td) +atomic_rmw_by_atomic_update!(cmp); #[inline] const fn is_lock_free() -> bool { diff --git a/src/imp/atomic128/x86_64.rs b/src/imp/atomic128/x86_64.rs index 122c649f..0b6d332d 100644 --- a/src/imp/atomic128/x86_64.rs +++ b/src/imp/atomic128/x86_64.rs @@ -1,18 +1,18 @@ // Atomic{I,U}128 implementation for x86_64 using CMPXCHG16B (DWCAS). // +// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +// this module and use intrinsics.rs instead. +// // Refs: // - x86 and amd64 instruction reference https://www.felixcloutier.com/x86 // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit // // Generated asm: -// - x86_64 (+cmpxchg16b) https://godbolt.org/z/44xdG776a +// - x86_64 (+cmpxchg16b) https://godbolt.org/z/KahrWeW9G include!("macros.rs"); -#[cfg(any( - test, - not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), -))] +#[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))] #[path = "../fallback/outline_atomics.rs"] mod fallback; @@ -25,6 +25,27 @@ mod detect; use core::arch::asm; use core::sync::atomic::Ordering; +// Asserts that the function is called in the correct context. +macro_rules! debug_assert_cmpxchg16b { + () => { + #[cfg(not(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + )))] + { + debug_assert!(detect::detect().has_cmpxchg16b()); + } + }; +} +#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] +#[cfg(target_feature = "sse")] +macro_rules! debug_assert_vmovdqa_atomic { + () => {{ + debug_assert_cmpxchg16b!(); + debug_assert!(detect::detect().has_vmovdqa_atomic()); + }}; +} + #[allow(unused_macros)] #[cfg(target_pointer_width = "32")] macro_rules! ptr_modifier { @@ -61,33 +82,11 @@ struct Pair { not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), target_feature(enable = "cmpxchg16b") )] -#[cfg_attr( - any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), - inline -)] -#[cfg_attr( - not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), - inline(never) -)] -unsafe fn _cmpxchg16b( - dst: *mut u128, - old: u128, - new: u128, - success: Ordering, - failure: Ordering, -) -> (u128, bool) { +#[inline] +unsafe fn cmpxchg16b(dst: *mut u128, old: u128, new: u128) -> (u128, bool) { debug_assert!(dst as usize % 16 == 0); + debug_assert_cmpxchg16b!(); - // Miri and Sanitizer do not support inline assembly. - #[cfg(any(miri, portable_atomic_sanitize_thread))] - // SAFETY: the caller must guarantee that `dst` is valid for both writes and - // reads, 16-byte aligned (required by CMPXCHG16B), that there are no - // concurrent non-atomic operations, and that the CPU supports CMPXCHG16B. - unsafe { - let res = core::arch::x86_64::cmpxchg16b(dst, old, new, success, failure); - (res, res == old) - } - #[cfg(not(any(miri, portable_atomic_sanitize_thread)))] // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned (required by CMPXCHG16B), that there are no // concurrent non-atomic operations, and that the CPU supports CMPXCHG16B. @@ -102,7 +101,6 @@ unsafe fn _cmpxchg16b( // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b unsafe { // cmpxchg16b is always SeqCst. - let _ = (success, failure); let r: u8; let old = U128 { whole: old }; let new = U128 { whole: new }; @@ -114,8 +112,7 @@ unsafe fn _cmpxchg16b( "xchg {rbx_tmp}, rbx", concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), "sete r8b", - // restore rbx - "mov rbx, {rbx_tmp}", + "mov rbx, {rbx_tmp}", // restore rbx rbx_tmp = inout(reg) new.pair.lo => _, in("rcx") new.pair.hi, inout("rax") old.pair.lo => prev_lo, @@ -135,40 +132,6 @@ unsafe fn _cmpxchg16b( } } -// 128-bit atomic load by two 64-bit atomic loads. -// -// See atomic_update for details. -#[cfg(any( - test, - not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), - any(miri, portable_atomic_sanitize_thread), -))] -#[inline] -unsafe fn byte_wise_atomic_load(src: *mut u128) -> u128 { - debug_assert!(src as usize % 16 == 0); - - // Miri and Sanitizer do not support inline assembly. - #[cfg(any(miri, portable_atomic_sanitize_thread))] - // SAFETY: the caller must uphold the safety contract. - unsafe { - atomic_load(src, Ordering::Relaxed) - } - #[cfg(not(any(miri, portable_atomic_sanitize_thread)))] - // SAFETY: the caller must uphold the safety contract. - unsafe { - let (prev_lo, prev_hi); - asm!( - concat!("mov {prev_lo}, qword ptr [{src", ptr_modifier!(), "}]"), - concat!("mov {prev_hi}, qword ptr [{src", ptr_modifier!(), "} + 8]"), - src = in(reg) src, - prev_lo = out(reg) prev_lo, - prev_hi = out(reg) prev_hi, - options(nostack, preserves_flags, readonly), - ); - U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole - } -} - // VMOVDQA is atomic on Intel and AMD CPUs with AVX. // See https://gcc.gnu.org/bugzilla//show_bug.cgi?id=104688 for details. // @@ -176,13 +139,17 @@ unsafe fn byte_wise_atomic_load(src: *mut u128) -> u128 { // // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html +#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] #[cfg(target_feature = "sse")] #[target_feature(enable = "avx")] #[inline] -unsafe fn _atomic_load_vmovdqa(src: *mut u128, _order: Ordering) -> u128 { +unsafe fn atomic_load_vmovdqa(src: *mut u128) -> u128 { debug_assert!(src as usize % 16 == 0); + debug_assert_vmovdqa_atomic!(); // SAFETY: the caller must uphold the safety contract. + // + // atomic load by vmovdqa is always SeqCst. unsafe { let out: core::arch::x86_64::__m128; asm!( @@ -194,11 +161,13 @@ unsafe fn _atomic_load_vmovdqa(src: *mut u128, _order: Ordering) -> u128 { core::mem::transmute(out) } } +#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] #[cfg(target_feature = "sse")] #[target_feature(enable = "avx")] #[inline] -unsafe fn _atomic_store_vmovdqa(dst: *mut u128, val: u128, order: Ordering) { +unsafe fn atomic_store_vmovdqa(dst: *mut u128, val: u128, order: Ordering) { debug_assert!(dst as usize % 16 == 0); + debug_assert_vmovdqa_atomic!(); // SAFETY: the caller must uphold the safety contract. unsafe { @@ -227,51 +196,127 @@ unsafe fn _atomic_store_vmovdqa(dst: *mut u128, val: u128, order: Ordering) { } } +#[cfg(not(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), +)))] +macro_rules! load_store_detect { + ( + vmovdqa = $vmovdqa:ident + cmpxchg16b = $cmpxchg16b:ident + fallback = $fallback:ident + ) => {{ + let cpuid = detect::detect(); + #[cfg(not(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + )))] + { + // Check CMPXCHG16B first to prevent mixing atomic and non-atomic access. + if cpuid.has_cmpxchg16b() { + // We do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. + #[cfg(target_feature = "sse")] + { + if cpuid.has_vmovdqa_atomic() { + $vmovdqa + } else { + $cmpxchg16b + } + } + #[cfg(not(target_feature = "sse"))] + { + $cmpxchg16b + } + } else { + fallback::$fallback + } + } + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + { + if cpuid.has_vmovdqa_atomic() { + $vmovdqa + } else { + $cmpxchg16b + } + } + }}; +} + #[inline] -unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { +unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 { // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html // SGX doesn't support CPUID. - // Miri and Sanitizer do not support inline assembly. - #[cfg(any( - not(target_feature = "sse"), - portable_atomic_no_outline_atomics, - target_env = "sgx", - miri, - portable_atomic_sanitize_thread, + #[cfg(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), ))] // SAFETY: the caller must uphold the safety contract. + // cfg guarantees that CMPXCHG16B is available at compile-time. unsafe { - _atomic_load_cmpxchg16b(src, order) + // cmpxchg16b is always SeqCst. + atomic_load_cmpxchg16b(src) } - #[cfg(not(any( - not(target_feature = "sse"), - portable_atomic_no_outline_atomics, - target_env = "sgx", - miri, - portable_atomic_sanitize_thread, + #[cfg(not(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), )))] // SAFETY: the caller must uphold the safety contract. unsafe { - ifunc!(unsafe fn(src: *mut u128, order: Ordering) -> u128 { - // Check CMPXCHG16B anyway to prevent mixing atomic and non-atomic access. - let cpuid = detect::detect(); - if cpuid.has_cmpxchg16b() && cpuid.has_vmovdqa_atomic() { - _atomic_load_vmovdqa - } else { - _atomic_load_cmpxchg16b + ifunc!(unsafe fn(src: *mut u128) -> u128 { + load_store_detect! { + vmovdqa = atomic_load_vmovdqa + cmpxchg16b = atomic_load_cmpxchg16b + // Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst. + fallback = atomic_load_seqcst } }) } } +#[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") +)] #[inline] -unsafe fn _atomic_load_cmpxchg16b(src: *mut u128, order: Ordering) -> u128 { - let fail_order = crate::utils::strongest_failure_ordering(order); - // SAFETY: the caller must uphold the safety contract. +unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 { + debug_assert!(src as usize % 16 == 0); + debug_assert_cmpxchg16b!(); + + // SAFETY: the caller must guarantee that `src` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports CMPXCHG16B. + // + // See cmpxchg16b function for more. + // + // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows + // omitting the storing of condition flags and avoid use of xchg to handle rbx. unsafe { - match atomic_compare_exchange(src, 0, 0, order, fail_order) { - Ok(v) | Err(v) => v, + // cmpxchg16b is always SeqCst. + let (prev_lo, prev_hi); + macro_rules! cmpxchg16b { + ($rdi:tt) => { + asm!( + // rbx is reserved by LLVM + "mov {rbx_tmp}, rbx", + "xor rbx, rbx", // zeroed rbx + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "mov rbx, {rbx_tmp}", // restore rbx + // set old/new args of cmpxchg16b to 0 (rbx is zeroed after saved to rbx_tmp, to avoid xchg) + rbx_tmp = out(reg) _, + in("rcx") 0_u64, + inout("rax") 0_u64 => prev_lo, + inout("rdx") 0_u64 => prev_hi, + in($rdi) src, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ) + }; } + #[cfg(target_pointer_width = "32")] + cmpxchg16b!("edi"); + #[cfg(target_pointer_width = "64")] + cmpxchg16b!("rdi"); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole } } @@ -280,61 +325,50 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html // SGX doesn't support CPUID. - // Miri and Sanitizer do not support inline assembly. - #[cfg(any( - not(target_feature = "sse"), - portable_atomic_no_outline_atomics, - target_env = "sgx", - miri, - portable_atomic_sanitize_thread, + #[cfg(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), ))] // SAFETY: the caller must uphold the safety contract. + // cfg guarantees that CMPXCHG16B is available at compile-time. unsafe { - _atomic_store_cmpxchg16b(dst, val, order); + // cmpxchg16b is always SeqCst. + let _ = order; + atomic_store_cmpxchg16b(dst, val); } - #[cfg(not(any( - not(target_feature = "sse"), - portable_atomic_no_outline_atomics, - target_env = "sgx", - miri, - portable_atomic_sanitize_thread, + #[cfg(not(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), )))] // SAFETY: the caller must uphold the safety contract. unsafe { + #[cfg(target_feature = "sse")] fn_alias! { #[target_feature(enable = "avx")] unsafe fn(dst: *mut u128, val: u128); - _atomic_store_vmovdqa_relaxed = _atomic_store_vmovdqa(Ordering::Relaxed); - _atomic_store_vmovdqa_seqcst = _atomic_store_vmovdqa(Ordering::SeqCst); - } - fn_alias! { - unsafe fn(dst: *mut u128, val: u128); - _atomic_store_cmpxchg16b_relaxed = _atomic_store_cmpxchg16b(Ordering::Relaxed); - _atomic_store_cmpxchg16b_seqcst = _atomic_store_cmpxchg16b(Ordering::SeqCst); + // atomic store by vmovdqa has at least release semantics. + atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release); + atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst); } match order { // Relaxed and Release stores are equivalent in all implementations // that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback). - // Due to cfg, core::arch's cmpxchg16b will never called here. + // core::arch's cmpxchg16b will never called here. Ordering::Relaxed | Ordering::Release => { ifunc!(unsafe fn(dst: *mut u128, val: u128) { - // Check CMPXCHG16B anyway to prevent mixing atomic and non-atomic access. - let cpuid = detect::detect(); - if cpuid.has_cmpxchg16b() && cpuid.has_vmovdqa_atomic() { - _atomic_store_vmovdqa_relaxed - } else { - _atomic_store_cmpxchg16b_relaxed + load_store_detect! { + vmovdqa = atomic_store_vmovdqa_non_seqcst + cmpxchg16b = atomic_store_cmpxchg16b + fallback = atomic_store_non_seqcst } }); } Ordering::SeqCst => { ifunc!(unsafe fn(dst: *mut u128, val: u128) { - // Check CMPXCHG16B anyway to prevent mixing atomic and non-atomic access. - let cpuid = detect::detect(); - if cpuid.has_cmpxchg16b() && cpuid.has_vmovdqa_atomic() { - _atomic_store_vmovdqa_seqcst - } else { - _atomic_store_cmpxchg16b_seqcst + load_store_detect! { + vmovdqa = atomic_store_vmovdqa_seqcst + cmpxchg16b = atomic_store_cmpxchg16b + fallback = atomic_store_seqcst } }); } @@ -342,11 +376,15 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { } } } -#[inline] -unsafe fn _atomic_store_cmpxchg16b(dst: *mut u128, val: u128, order: Ordering) { +#[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") +)] +unsafe fn atomic_store_cmpxchg16b(dst: *mut u128, val: u128) { // SAFETY: the caller must uphold the safety contract. unsafe { - atomic_swap(dst, val, order); + // cmpxchg16b is always SeqCst. + atomic_swap_cmpxchg16b(dst, val, Ordering::SeqCst); } } @@ -355,30 +393,26 @@ unsafe fn atomic_compare_exchange( dst: *mut u128, old: u128, new: u128, - success: Ordering, - failure: Ordering, + _success: Ordering, + _failure: Ordering, ) -> Result { - let success = crate::utils::upgrade_success_ordering(success, failure); #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, that there are no concurrent non-atomic operations, - // and cfg guarantees that CMPXCHG16B is statically available. - let (res, ok) = unsafe { _cmpxchg16b(dst, old, new, success, failure) }; + // and cfg guarantees that CMPXCHG16B is available at compile-time. + let (res, ok) = unsafe { cmpxchg16b(dst, old, new) }; #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))] - let (res, ok) = { - // SAFETY: the caller must guarantee that `dst` is valid for both writes and - // reads, 16-byte aligned, and that there are no different kinds of concurrent accesses. - unsafe { - ifunc!(unsafe fn( - dst: *mut u128, old: u128, new: u128, success: Ordering, failure: Ordering - ) -> (u128, bool) { - if detect::detect().has_cmpxchg16b() { - _cmpxchg16b - } else { - fallback::atomic_compare_exchange - } - }) - } + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no different kinds of concurrent accesses. + let (res, ok) = unsafe { + ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool) { + if detect::detect().has_cmpxchg16b() { + cmpxchg16b + } else { + // Use SeqCst because cmpxchg16b is always SeqCst. + fallback::atomic_compare_exchange_seqcst + } + }) }; if ok { Ok(res) @@ -389,66 +423,29 @@ unsafe fn atomic_compare_exchange( use atomic_compare_exchange as atomic_compare_exchange_weak; -#[cfg(any( - not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), - any(miri, portable_atomic_sanitize_thread), -))] -#[inline(always)] -unsafe fn atomic_update(dst: *mut u128, order: Ordering, mut f: F) -> u128 -where - F: FnMut(u128) -> u128, -{ - // SAFETY: the caller must uphold the safety contract. - unsafe { - // This is based on the code generated for the first load in DW RMWs by LLVM, - // but it is interesting that they generate code that does mixed-sized atomic access. - // - // This is not single-copy atomic reads, but this is ok because subsequent - // CAS will check for consistency. - // - // byte_wise_atomic_load works the same way as seqlock's byte-wise atomic memcpy, - // so it works well even when atomic_compare_exchange_weak calls global lock-based fallback. - // - // Note that the C++20 memory model does not allow mixed-sized atomic access, - // so we must use inline assembly to implement byte_wise_atomic_load. - // (i.e., byte-wise atomic based on the standard library's atomic types - // cannot be used here). Since fallback's byte-wise atomic memcpy is per - // 64-bit on x86_64 (even on x32 ABI), it's okay to use it together with this. - let mut old = byte_wise_atomic_load(dst); - loop { - let next = f(old); - // This is a private function and all instances of `f` only operate on the value - // loaded, so there is no need to synchronize the first load/failed CAS. - match atomic_compare_exchange_weak(dst, old, next, order, Ordering::Relaxed) { - Ok(x) => return x, - Err(x) => old = x, - } - } - } -} - -// We use atomic_rmw_by_atomic_update when cmpxchg16b is not available at compile-time, or -// on Miri and Sanitizer that do not support inline assembly. -#[cfg(not(any( +#[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] +use atomic_swap_cmpxchg16b as atomic_swap; +#[cfg_attr( not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), - any(miri, portable_atomic_sanitize_thread), -)))] + target_feature(enable = "cmpxchg16b") +)] #[inline] -unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 { +unsafe fn atomic_swap_cmpxchg16b(dst: *mut u128, val: u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); + debug_assert_cmpxchg16b!(); // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. // cfg guarantees that the CPU supports CMPXCHG16B. // - // See _cmpxchg16b for more. + // See cmpxchg16b function for more. // - // We could use atomic_update here, but using an inline assembly allows omitting - // the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx. + // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows + // omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx. // // Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap. unsafe { - // atomic swap is always SeqCst. + // cmpxchg16b is always SeqCst. let _ = order; let val = U128 { whole: val }; let (mut prev_lo, mut prev_hi); @@ -457,14 +454,21 @@ unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 { asm!( // rbx is reserved by LLVM "xchg {rbx_tmp}, rbx", - // See atomic_update + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + // + // This is based on the code generated for the first load in DW RMWs by LLVM. + // + // Note that the C++20 memory model does not allow mixed-sized atomic access, + // so we must use inline assembly to implement this. + // (i.e., byte-wise atomic based on the standard library's atomic types + // cannot be used here). concat!("mov rax, qword ptr [", $rdi, "]"), concat!("mov rdx, qword ptr [", $rdi, " + 8]"), "2:", concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), "jne 2b", - // restore rbx - "mov rbx, {rbx_tmp}", + "mov rbx, {rbx_tmp}", // restore rbx rbx_tmp = inout(reg) val.pair.lo => _, in("rcx") val.pair.hi, out("rax") prev_lo, @@ -490,27 +494,27 @@ unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 { /// - rsi/r8 pair: val argument (read-only for `$op`) /// - rax/rdx pair: previous value loaded (read-only for `$op`) /// - rbx/rcx pair: new value that will to stored -// We could use atomic_update here, but using an inline assembly allows omitting -// the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx. -#[rustfmt::skip] // buggy macro formatting +// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows +// omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx. macro_rules! atomic_rmw_cas_3 { - ($name:ident, $($op:tt)*) => { - // We use atomic_rmw_by_atomic_update when cmpxchg16b is not available at compile-time, or - // on Miri and Sanitizer that do not support inline assembly. - #[cfg(not(any( + ($name:ident as $reexport_name:ident, $($op:tt)*) => { + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + use $name as $reexport_name; + #[cfg_attr( not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), - any(miri, portable_atomic_sanitize_thread), - )))] + target_feature(enable = "cmpxchg16b") + )] #[inline] unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); + debug_assert_cmpxchg16b!(); // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. // cfg guarantees that the CPU supports CMPXCHG16B. // - // See _cmpxchg16b for more. + // See cmpxchg16b function for more. unsafe { - // atomic swap is always SeqCst. + // cmpxchg16b is always SeqCst. let val = U128 { whole: val }; let (mut prev_lo, mut prev_hi); macro_rules! cmpxchg16b { @@ -518,15 +522,22 @@ macro_rules! atomic_rmw_cas_3 { asm!( // rbx is reserved by LLVM "mov {rbx_tmp}, rbx", - // See atomic_update + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + // + // This is based on the code generated for the first load in DW RMWs by LLVM. + // + // Note that the C++20 memory model does not allow mixed-sized atomic access, + // so we must use inline assembly to implement this. + // (i.e., byte-wise atomic based on the standard library's atomic types + // cannot be used here). concat!("mov rax, qword ptr [", $rdi, "]"), concat!("mov rdx, qword ptr [", $rdi, " + 8]"), "2:", $($op)* concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), "jne 2b", - // restore rbx - "mov rbx, {rbx_tmp}", + "mov rbx, {rbx_tmp}", // restore rbx rbx_tmp = out(reg) _, out("rcx") _, out("rax") prev_lo, @@ -554,42 +565,49 @@ macro_rules! atomic_rmw_cas_3 { /// `$op` can use the following registers: /// - rax/rdx pair: previous value loaded (read-only for `$op`) /// - rbx/rcx pair: new value that will to stored -// We could use atomic_update here, but using an inline assembly allows omitting -// the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx. -#[rustfmt::skip] // buggy macro formatting +// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows +// omitting the storing of condition flags and avoid use of xchg to handle rbx. macro_rules! atomic_rmw_cas_2 { - ($name:ident, $($op:tt)*) => { - // We use atomic_rmw_by_atomic_update when cmpxchg16b is not available at compile-time, or - // on Miri and Sanitizer that do not support inline assembly. - #[cfg(not(any( + ($name:ident as $reexport_name:ident, $($op:tt)*) => { + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + use $name as $reexport_name; + #[cfg_attr( not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), - any(miri, portable_atomic_sanitize_thread), - )))] + target_feature(enable = "cmpxchg16b") + )] #[inline] unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); + debug_assert_cmpxchg16b!(); // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. // cfg guarantees that the CPU supports CMPXCHG16B. // - // See _cmpxchg16b for more. + // See cmpxchg16b function for more. unsafe { - // atomic swap is always SeqCst. + // cmpxchg16b is always SeqCst. let (mut prev_lo, mut prev_hi); macro_rules! cmpxchg16b { ($rdi:tt) => { asm!( // rbx is reserved by LLVM "mov {rbx_tmp}, rbx", - // See atomic_update + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + // + // This is based on the code generated for the first load in DW RMWs by LLVM. + // + // Note that the C++20 memory model does not allow mixed-sized atomic access, + // so we must use inline assembly to implement this. + // (i.e., byte-wise atomic based on the standard library's atomic types + // cannot be used here). concat!("mov rax, qword ptr [", $rdi, "]"), concat!("mov rdx, qword ptr [", $rdi, " + 8]"), "2:", $($op)* concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), "jne 2b", - // restore rbx - "mov rbx, {rbx_tmp}", + "mov rbx, {rbx_tmp}", // restore rbx rbx_tmp = out(reg) _, out("rcx") _, out("rax") prev_lo, @@ -611,28 +629,28 @@ macro_rules! atomic_rmw_cas_2 { } atomic_rmw_cas_3! { - atomic_add, + atomic_add_cmpxchg16b as atomic_add, "mov rbx, rax", "add rbx, rsi", "mov rcx, rdx", "adc rcx, r8", } atomic_rmw_cas_3! { - atomic_sub, + atomic_sub_cmpxchg16b as atomic_sub, "mov rbx, rax", "sub rbx, rsi", "mov rcx, rdx", "sbb rcx, r8", } atomic_rmw_cas_3! { - atomic_and, + atomic_and_cmpxchg16b as atomic_and, "mov rbx, rax", "and rbx, rsi", "mov rcx, rdx", "and rcx, r8", } atomic_rmw_cas_3! { - atomic_nand, + atomic_nand_cmpxchg16b as atomic_nand, "mov rbx, rax", "and rbx, rsi", "not rbx", @@ -641,14 +659,14 @@ atomic_rmw_cas_3! { "not rcx", } atomic_rmw_cas_3! { - atomic_or, + atomic_or_cmpxchg16b as atomic_or, "mov rbx, rax", "or rbx, rsi", "mov rcx, rdx", "or rcx, r8", } atomic_rmw_cas_3! { - atomic_xor, + atomic_xor_cmpxchg16b as atomic_xor, "mov rbx, rax", "xor rbx, rsi", "mov rcx, rdx", @@ -656,14 +674,14 @@ atomic_rmw_cas_3! { } atomic_rmw_cas_2! { - atomic_not, + atomic_not_cmpxchg16b as atomic_not, "mov rbx, rax", "not rbx", "mov rcx, rdx", "not rcx", } atomic_rmw_cas_2! { - atomic_neg, + atomic_neg_cmpxchg16b as atomic_neg, "mov rbx, rax", "neg rbx", "mov rcx, 0", @@ -671,7 +689,7 @@ atomic_rmw_cas_2! { } atomic_rmw_cas_3! { - atomic_max, + atomic_max_cmpxchg16b as atomic_max, "cmp rsi, rax", "mov rcx, r8", "sbb rcx, rdx", @@ -681,7 +699,7 @@ atomic_rmw_cas_3! { "cmovl rbx, rax", } atomic_rmw_cas_3! { - atomic_umax, + atomic_umax_cmpxchg16b as atomic_umax, "cmp rsi, rax", "mov rcx, r8", "sbb rcx, rdx", @@ -691,7 +709,7 @@ atomic_rmw_cas_3! { "cmovb rbx, rax", } atomic_rmw_cas_3! { - atomic_min, + atomic_min_cmpxchg16b as atomic_min, "cmp rsi, rax", "mov rcx, r8", "sbb rcx, rdx", @@ -701,7 +719,7 @@ atomic_rmw_cas_3! { "cmovge rbx, rax", } atomic_rmw_cas_3! { - atomic_umin, + atomic_umin_cmpxchg16b as atomic_umin, "cmp rsi, rax", "mov rcx, r8", "sbb rcx, rdx", @@ -711,19 +729,117 @@ atomic_rmw_cas_3! { "cmovae rbx, rax", } -// We use atomic_rmw_by_atomic_update when cmpxchg16b is not available at compile-time, or -// on Miri and Sanitizer that do not support inline assembly. -#[cfg(any( - not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), - any(miri, portable_atomic_sanitize_thread), -))] -atomic_rmw_by_atomic_update!(); +macro_rules! atomic_rmw_with_ifunc { + ( + unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?; + cmpxchg16b = $cmpxchg16b_fn:ident; + fallback = $seqcst_fallback_fn:ident; + ) => { + #[cfg(not(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + )))] + #[inline] + unsafe fn $name($($arg)*, _order: Ordering) $(-> $ret_ty)? { + fn_alias! { + #[cfg_attr( + not(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + )), + target_feature(enable = "cmpxchg16b") + )] + unsafe fn($($arg)*) $(-> $ret_ty)?; + // cmpxchg16b is always SeqCst. + cmpxchg16b_seqcst_fn = $cmpxchg16b_fn(Ordering::SeqCst); + } + // SAFETY: the caller must uphold the safety contract. + // we only calls cmpxchg16b_fn if cmpxchg16b is available. + unsafe { + ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { + if detect::detect().has_cmpxchg16b() { + cmpxchg16b_seqcst_fn + } else { + // Use SeqCst because cmpxchg16b is always SeqCst. + fallback::$seqcst_fallback_fn + } + }) + } + } + }; +} + +atomic_rmw_with_ifunc! { + unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_swap_cmpxchg16b; + fallback = atomic_swap_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_add_cmpxchg16b; + fallback = atomic_add_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_sub_cmpxchg16b; + fallback = atomic_sub_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_and_cmpxchg16b; + fallback = atomic_and_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_nand_cmpxchg16b; + fallback = atomic_nand_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_or_cmpxchg16b; + fallback = atomic_or_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_xor_cmpxchg16b; + fallback = atomic_xor_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_max_cmpxchg16b; + fallback = atomic_max_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_umax_cmpxchg16b; + fallback = atomic_umax_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_min_cmpxchg16b; + fallback = atomic_min_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_umin_cmpxchg16b; + fallback = atomic_umin_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_not(dst: *mut u128) -> u128; + cmpxchg16b = atomic_not_cmpxchg16b; + fallback = atomic_not_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_neg(dst: *mut u128) -> u128; + cmpxchg16b = atomic_neg_cmpxchg16b; + fallback = atomic_neg_seqcst; +} #[inline] fn is_lock_free() -> bool { #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] { - // CMPXCHG16B is statically available. + // CMPXCHG16B is available at compile-time. true } #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))] @@ -744,147 +860,4 @@ mod tests { test_atomic_int!(i128); test_atomic_int!(u128); - - #[test] - fn test() { - // Miri doesn't support inline assembly used in is_x86_feature_detected - #[cfg(not(miri))] - { - assert!(std::is_x86_feature_detected!("cmpxchg16b")); - } - assert!(AtomicI128::is_lock_free()); - assert!(AtomicU128::is_lock_free()); - } - - #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] - mod quickcheck { - use core::cell::UnsafeCell; - - use test_helper::Align16; - - use super::super::*; - - ::quickcheck::quickcheck! { - fn test(x: u128, y: u128, z: u128) -> bool { - // Miri doesn't support inline assembly used in is_x86_feature_detected - #[cfg(not(miri))] - { - assert!(std::is_x86_feature_detected!("cmpxchg16b")); - } - unsafe { - let a = Align16(UnsafeCell::new(x)); - let (res, ok) = _cmpxchg16b(a.get(), y, z, Ordering::SeqCst, Ordering::SeqCst); - if x == y { - assert!(ok); - assert_eq!(res, x); - assert_eq!(*a.get(), z); - } else { - assert!(!ok); - assert_eq!(res, x); - assert_eq!(*a.get(), x); - } - } - true - } - } - } -} - -#[allow(clippy::undocumented_unsafe_blocks, clippy::wildcard_imports)] -#[cfg(test)] -mod tests_no_cmpxchg16b { - use super::*; - - #[inline(never)] - unsafe fn cmpxchg16b( - dst: *mut u128, - old: u128, - new: u128, - success: Ordering, - failure: Ordering, - ) -> (u128, bool) { - unsafe { fallback::atomic_compare_exchange(dst, old, new, success, failure) } - } - #[inline] - unsafe fn byte_wise_atomic_load(src: *mut u128) -> u128 { - debug_assert!(src as usize % 16 == 0); - - // Miri and Sanitizer do not support inline assembly. - #[cfg(any(miri, portable_atomic_sanitize_thread))] - unsafe { - atomic_load(src, Ordering::Relaxed) - } - #[cfg(not(any(miri, portable_atomic_sanitize_thread)))] - unsafe { - super::byte_wise_atomic_load(src) - } - } - - #[inline(never)] - unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { - let fail_order = crate::utils::strongest_failure_ordering(order); - unsafe { - match atomic_compare_exchange(src, 0, 0, order, fail_order) { - Ok(v) | Err(v) => v, - } - } - } - - #[inline(never)] - unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { - unsafe { - atomic_swap(dst, val, order); - } - } - - #[inline] - unsafe fn atomic_compare_exchange( - dst: *mut u128, - old: u128, - new: u128, - success: Ordering, - failure: Ordering, - ) -> Result { - let success = crate::utils::upgrade_success_ordering(success, failure); - let (res, ok) = unsafe { cmpxchg16b(dst, old, new, success, failure) }; - if ok { - Ok(res) - } else { - Err(res) - } - } - - use atomic_compare_exchange as atomic_compare_exchange_weak; - - #[inline(always)] - unsafe fn atomic_update(dst: *mut u128, order: Ordering, mut f: F) -> u128 - where - F: FnMut(u128) -> u128, - { - unsafe { - let mut old = byte_wise_atomic_load(dst); - loop { - let next = f(old); - match atomic_compare_exchange_weak(dst, old, next, order, Ordering::Relaxed) { - Ok(x) => return x, - Err(x) => old = x, - } - } - } - } - - atomic_rmw_by_atomic_update!(); - - #[inline] - const fn is_lock_free() -> bool { - IS_ALWAYS_LOCK_FREE - } - const IS_ALWAYS_LOCK_FREE: bool = false; - - atomic128!(AtomicI128, i128, atomic_max, atomic_min); - atomic128!(AtomicU128, u128, atomic_umax, atomic_umin); - - // Do not put this in the nested tests module due to glob imports refer to super::super::Atomic*. - test_atomic_int!(i128); - test_atomic_int!(u128); } diff --git a/src/imp/core_atomic.rs b/src/imp/core_atomic.rs index 3f121329..1e0bb497 100644 --- a/src/imp/core_atomic.rs +++ b/src/imp/core_atomic.rs @@ -65,7 +65,7 @@ impl AtomicBool { // See also https://github.com/rust-lang/rust/pull/66705 and // https://github.com/rust-lang/rust/issues/66136#issuecomment-557867116. unsafe { - (*(self as *const Self as *const core::cell::UnsafeCell)).get() as *mut bool + (*(self as *const Self as *const UnsafeCell)).get() as *mut bool } } } @@ -159,7 +159,7 @@ impl AtomicPtr { // SAFETY: Self is #[repr(C)] and internally UnsafeCell<*mut T>. // See also https://github.com/rust-lang/rust/pull/66705 and // https://github.com/rust-lang/rust/issues/66136#issuecomment-557867116. - unsafe { (*(self as *const Self as *const core::cell::UnsafeCell<*mut T>)).get() } + unsafe { (*(self as *const Self as *const UnsafeCell<*mut T>)).get() } } } } @@ -269,7 +269,7 @@ macro_rules! atomic_int { // See also https://github.com/rust-lang/rust/pull/66705 and // https://github.com/rust-lang/rust/issues/66136#issuecomment-557867116. unsafe { - (*(self as *const Self as *const core::cell::UnsafeCell<$int_type>)).get() + (*(self as *const Self as *const UnsafeCell<$int_type>)).get() } } } diff --git a/src/imp/fallback/mod.rs b/src/imp/fallback/mod.rs index a2b7442f..21de0943 100644 --- a/src/imp/fallback/mod.rs +++ b/src/imp/fallback/mod.rs @@ -375,26 +375,20 @@ macro_rules! atomic { }; } +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_64)))] +#[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any(test, not(target_has_atomic = "64"))) +)] cfg_no_fast_atomic_64! { - #[cfg_attr( - portable_atomic_no_cfg_target_has_atomic, - cfg(any(test, portable_atomic_no_atomic_64)) - )] - #[cfg_attr( - not(portable_atomic_no_cfg_target_has_atomic), - cfg(any(test, not(target_has_atomic = "64"))) - )] atomic!(AtomicI64, i64, 8); } +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_64)))] +#[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any(test, not(target_has_atomic = "64"))) +)] cfg_no_fast_atomic_64! { - #[cfg_attr( - portable_atomic_no_cfg_target_has_atomic, - cfg(any(test, portable_atomic_no_atomic_64)) - )] - #[cfg_attr( - not(portable_atomic_no_cfg_target_has_atomic), - cfg(any(test, not(target_has_atomic = "64"))) - )] atomic!(AtomicU64, u64, 8); } diff --git a/src/imp/fallback/outline_atomics.rs b/src/imp/fallback/outline_atomics.rs index b553c2fa..df018382 100644 --- a/src/imp/fallback/outline_atomics.rs +++ b/src/imp/fallback/outline_atomics.rs @@ -9,53 +9,63 @@ use core::sync::atomic::Ordering; #[cfg(target_arch = "x86_64")] -#[allow(clippy::upper_case_acronyms)] -pub(crate) type UDW = u128; +pub(crate) type Udw = u128; #[cfg(target_arch = "x86_64")] -pub(crate) type AtomicUDW = super::super::fallback::AtomicU128; -// #[cfg(target_arch = "x86_64")] -// pub(crate) type AtomicIDW = super::super::fallback::AtomicI128; +pub(crate) type AtomicUdw = super::super::fallback::AtomicU128; +#[cfg(target_arch = "x86_64")] +pub(crate) type AtomicIdw = super::super::fallback::AtomicI128; #[cfg(target_arch = "arm")] -#[allow(clippy::upper_case_acronyms)] -pub(crate) type UDW = u64; +pub(crate) type Udw = u64; #[cfg(target_arch = "arm")] -pub(crate) type AtomicUDW = super::super::fallback::AtomicU64; +pub(crate) type AtomicUdw = super::super::fallback::AtomicU64; #[cfg(target_arch = "arm")] -pub(crate) type AtomicIDW = super::super::fallback::AtomicI64; +pub(crate) type AtomicIdw = super::super::fallback::AtomicI64; + +// Asserts that the function is called in the correct context. +macro_rules! debug_assert_outline_atomics { + () => { + #[cfg(target_arch = "x86_64")] + { + debug_assert!(!super::detect::detect().has_cmpxchg16b()); + } + #[cfg(target_arch = "arm")] + { + debug_assert!(!super::has_kuser_cmpxchg64()); + } + }; +} -#[cfg(not(target_arch = "x86_64"))] #[cold] -pub(crate) unsafe fn atomic_load(src: *mut UDW, order: Ordering) -> UDW { +pub(crate) unsafe fn atomic_load(src: *mut Udw, order: Ordering) -> Udw { + debug_assert_outline_atomics!(); #[allow(clippy::cast_ptr_alignment)] // SAFETY: the caller must uphold the safety contract. unsafe { - (*(src as *const AtomicUDW)).load(order) + (*(src as *const AtomicUdw)).load(order) } } -#[cfg(not(target_arch = "x86_64"))] fn_alias! { #[cold] - pub(crate) unsafe fn(src: *mut UDW) -> UDW; + pub(crate) unsafe fn(src: *mut Udw) -> Udw; // fallback's atomic load has at least acquire semantics. - #[cfg(not(target_arch = "arm"))] + #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] atomic_load_non_seqcst = atomic_load(Ordering::Acquire); atomic_load_seqcst = atomic_load(Ordering::SeqCst); } -#[cfg(not(target_arch = "x86_64"))] #[cold] -pub(crate) unsafe fn atomic_store(dst: *mut UDW, val: UDW, order: Ordering) { +pub(crate) unsafe fn atomic_store(dst: *mut Udw, val: Udw, order: Ordering) { + debug_assert_outline_atomics!(); #[allow(clippy::cast_ptr_alignment)] // SAFETY: the caller must uphold the safety contract. unsafe { - (*(dst as *const AtomicUDW)).store(val, order); + (*(dst as *const AtomicUdw)).store(val, order); } } -#[cfg(not(target_arch = "x86_64"))] fn_alias! { #[cold] - pub(crate) unsafe fn(dst: *mut UDW, val: UDW); + pub(crate) unsafe fn(dst: *mut Udw, val: Udw); // fallback's atomic store has at least release semantics. #[cfg(not(target_arch = "arm"))] atomic_store_non_seqcst = atomic_store(Ordering::Release); @@ -64,27 +74,27 @@ fn_alias! { #[cold] pub(crate) unsafe fn atomic_compare_exchange( - dst: *mut UDW, - old: UDW, - new: UDW, + dst: *mut Udw, + old: Udw, + new: Udw, success: Ordering, failure: Ordering, -) -> (UDW, bool) { +) -> (Udw, bool) { + debug_assert_outline_atomics!(); #[allow(clippy::cast_ptr_alignment)] // SAFETY: the caller must uphold the safety contract. unsafe { - match (*(dst as *const AtomicUDW)).compare_exchange(old, new, success, failure) { + match (*(dst as *const AtomicUdw)).compare_exchange(old, new, success, failure) { Ok(v) => (v, true), Err(v) => (v, false), } } } -#[cfg(not(target_arch = "x86_64"))] fn_alias! { #[cold] - pub(crate) unsafe fn(dst: *mut UDW, old: UDW, new: UDW) -> (UDW, bool); - // fallback's atomic RMW has at least AcqRel semantics. - #[cfg(not(target_arch = "arm"))] + pub(crate) unsafe fn(dst: *mut Udw, old: Udw, new: Udw) -> (Udw, bool); + // fallback's atomic CAS has at least AcqRel semantics. + #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] atomic_compare_exchange_non_seqcst = atomic_compare_exchange(Ordering::AcqRel, Ordering::Acquire); atomic_compare_exchange_seqcst @@ -96,21 +106,20 @@ macro_rules! atomic_rmw_3 { $name:ident($atomic_type:ident::$method_name:ident), $non_seqcst_alias:ident, $seqcst_alias:ident ) => { - #[cfg(not(target_arch = "x86_64"))] #[cold] - pub(crate) unsafe fn $name(dst: *mut UDW, val: UDW, order: Ordering) -> UDW { + pub(crate) unsafe fn $name(dst: *mut Udw, val: Udw, order: Ordering) -> Udw { + debug_assert_outline_atomics!(); #[allow(clippy::cast_ptr_alignment)] // SAFETY: the caller must uphold the safety contract. unsafe { - (*(dst as *const $atomic_type)).$method_name(val as _, order) as UDW + (*(dst as *const $atomic_type)).$method_name(val as _, order) as Udw } } - #[cfg(not(target_arch = "x86_64"))] fn_alias! { #[cold] - pub(crate) unsafe fn(dst: *mut UDW, val: UDW) -> UDW; + pub(crate) unsafe fn(dst: *mut Udw, val: Udw) -> Udw; // fallback's atomic RMW has at least AcqRel semantics. - #[cfg(not(target_arch = "arm"))] + #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] $non_seqcst_alias = $name(Ordering::AcqRel); $seqcst_alias = $name(Ordering::SeqCst); } @@ -121,38 +130,37 @@ macro_rules! atomic_rmw_2 { $name:ident($atomic_type:ident::$method_name:ident), $non_seqcst_alias:ident, $seqcst_alias:ident ) => { - #[cfg(not(target_arch = "x86_64"))] #[cold] - pub(crate) unsafe fn $name(dst: *mut UDW, order: Ordering) -> UDW { + pub(crate) unsafe fn $name(dst: *mut Udw, order: Ordering) -> Udw { + debug_assert_outline_atomics!(); #[allow(clippy::cast_ptr_alignment)] // SAFETY: the caller must uphold the safety contract. unsafe { - (*(dst as *const $atomic_type)).$method_name(order) as UDW + (*(dst as *const $atomic_type)).$method_name(order) as Udw } } - #[cfg(not(target_arch = "x86_64"))] fn_alias! { #[cold] - pub(crate) unsafe fn(dst: *mut UDW) -> UDW; + pub(crate) unsafe fn(dst: *mut Udw) -> Udw; // fallback's atomic RMW has at least AcqRel semantics. - #[cfg(not(target_arch = "arm"))] + #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] $non_seqcst_alias = $name(Ordering::AcqRel); $seqcst_alias = $name(Ordering::SeqCst); } }; } -atomic_rmw_3!(atomic_swap(AtomicUDW::swap), atomic_swap_non_seqcst, atomic_swap_seqcst); -atomic_rmw_3!(atomic_add(AtomicUDW::fetch_add), atomic_add_non_seqcst, atomic_add_seqcst); -atomic_rmw_3!(atomic_sub(AtomicUDW::fetch_sub), atomic_sub_non_seqcst, atomic_sub_seqcst); -atomic_rmw_3!(atomic_and(AtomicUDW::fetch_and), atomic_and_non_seqcst, atomic_and_seqcst); -atomic_rmw_3!(atomic_nand(AtomicUDW::fetch_nand), atomic_nand_non_seqcst, atomic_nand_seqcst); -atomic_rmw_3!(atomic_or(AtomicUDW::fetch_or), atomic_or_non_seqcst, atomic_or_seqcst); -atomic_rmw_3!(atomic_xor(AtomicUDW::fetch_xor), atomic_xor_non_seqcst, atomic_xor_seqcst); -atomic_rmw_3!(atomic_max(AtomicIDW::fetch_max), atomic_max_non_seqcst, atomic_max_seqcst); -atomic_rmw_3!(atomic_umax(AtomicUDW::fetch_max), atomic_umax_non_seqcst, atomic_umax_seqcst); -atomic_rmw_3!(atomic_min(AtomicIDW::fetch_min), atomic_min_non_seqcst, atomic_min_seqcst); -atomic_rmw_3!(atomic_umin(AtomicUDW::fetch_min), atomic_umin_non_seqcst, atomic_umin_seqcst); +atomic_rmw_3!(atomic_swap(AtomicUdw::swap), atomic_swap_non_seqcst, atomic_swap_seqcst); +atomic_rmw_3!(atomic_add(AtomicUdw::fetch_add), atomic_add_non_seqcst, atomic_add_seqcst); +atomic_rmw_3!(atomic_sub(AtomicUdw::fetch_sub), atomic_sub_non_seqcst, atomic_sub_seqcst); +atomic_rmw_3!(atomic_and(AtomicUdw::fetch_and), atomic_and_non_seqcst, atomic_and_seqcst); +atomic_rmw_3!(atomic_nand(AtomicUdw::fetch_nand), atomic_nand_non_seqcst, atomic_nand_seqcst); +atomic_rmw_3!(atomic_or(AtomicUdw::fetch_or), atomic_or_non_seqcst, atomic_or_seqcst); +atomic_rmw_3!(atomic_xor(AtomicUdw::fetch_xor), atomic_xor_non_seqcst, atomic_xor_seqcst); +atomic_rmw_3!(atomic_max(AtomicIdw::fetch_max), atomic_max_non_seqcst, atomic_max_seqcst); +atomic_rmw_3!(atomic_umax(AtomicUdw::fetch_max), atomic_umax_non_seqcst, atomic_umax_seqcst); +atomic_rmw_3!(atomic_min(AtomicIdw::fetch_min), atomic_min_non_seqcst, atomic_min_seqcst); +atomic_rmw_3!(atomic_umin(AtomicUdw::fetch_min), atomic_umin_non_seqcst, atomic_umin_seqcst); -atomic_rmw_2!(atomic_not(AtomicUDW::fetch_not), atomic_not_non_seqcst, atomic_not_seqcst); -atomic_rmw_2!(atomic_neg(AtomicUDW::fetch_neg), atomic_neg_non_seqcst, atomic_neg_seqcst); +atomic_rmw_2!(atomic_not(AtomicUdw::fetch_not), atomic_not_non_seqcst, atomic_not_seqcst); +atomic_rmw_2!(atomic_neg(AtomicUdw::fetch_neg), atomic_neg_non_seqcst, atomic_neg_seqcst); diff --git a/src/imp/float.rs b/src/imp/float.rs index 6cfe99c1..fe790b7b 100644 --- a/src/imp/float.rs +++ b/src/imp/float.rs @@ -11,7 +11,7 @@ #![cfg(any(not(target_pointer_width = "16"), feature = "fallback"))] // See lib.rs's AtomicU32 definition -use core::sync::atomic::Ordering; +use core::{cell::UnsafeCell, sync::atomic::Ordering}; macro_rules! atomic_float { ( @@ -23,7 +23,7 @@ macro_rules! atomic_float { ) => { #[repr(C, align($align))] pub(crate) struct $atomic_type { - v: core::cell::UnsafeCell<$float_type>, + v: UnsafeCell<$float_type>, } // Send is implicitly implemented. @@ -33,7 +33,7 @@ macro_rules! atomic_float { impl $atomic_type { #[inline] pub(crate) const fn new(v: $float_type) -> Self { - Self { v: core::cell::UnsafeCell::new(v) } + Self { v: UnsafeCell::new(v) } } #[inline] diff --git a/src/imp/interrupt/mod.rs b/src/imp/interrupt/mod.rs index ff6d8680..1371b284 100644 --- a/src/imp/interrupt/mod.rs +++ b/src/imp/interrupt/mod.rs @@ -24,7 +24,7 @@ // interrupts [^avr2] in atomic ops by default, is considered the latter. // MSP430 as well. // -// See also README.md of this module. +// See also README.md of this directory. // // [^avr1]: https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp#LL963 // [^avr2]: https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/test/CodeGen/AVR/atomics/load16.ll#L5 diff --git a/src/imp/mod.rs b/src/imp/mod.rs index 08e75c8e..2c44868d 100644 --- a/src/imp/mod.rs +++ b/src/imp/mod.rs @@ -20,21 +20,21 @@ )] mod core_atomic; -// Miri and Sanitizer do not support inline assembly. -#[cfg(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics))] -#[cfg(target_arch = "aarch64")] -#[path = "atomic128/intrinsics.rs"] -mod aarch64; -#[cfg(not(all( - any(miri, portable_atomic_sanitize_thread), - portable_atomic_new_atomic_intrinsics, -)))] #[cfg(any(not(portable_atomic_no_asm), portable_atomic_unstable_asm))] #[cfg(target_arch = "aarch64")] -#[path = "atomic128/aarch64.rs"] +// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly. +#[cfg_attr( + all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics), + path = "atomic128/intrinsics.rs" +)] +#[cfg_attr( + not(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics)), + path = "atomic128/aarch64.rs" +)] mod aarch64; #[cfg(any(not(portable_atomic_no_asm), portable_atomic_unstable_asm))] +#[cfg(target_arch = "x86_64")] #[cfg(any( target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b", @@ -45,33 +45,39 @@ mod aarch64; not(target_env = "sgx"), ), ))] -#[cfg(target_arch = "x86_64")] -#[path = "atomic128/x86_64.rs"] +// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly. +#[cfg_attr(any(miri, portable_atomic_sanitize_thread), path = "atomic128/intrinsics.rs")] +#[cfg_attr(not(any(miri, portable_atomic_sanitize_thread)), path = "atomic128/x86_64.rs")] mod x86_64; -// Miri and Sanitizer do not support inline assembly. -#[cfg(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_llvm_15))] #[cfg(portable_atomic_unstable_asm_experimental_arch)] -#[cfg(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", -))] #[cfg(target_arch = "powerpc64")] -#[path = "atomic128/intrinsics.rs"] -mod powerpc64; -#[cfg(not(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_llvm_15)))] -#[cfg(portable_atomic_unstable_asm_experimental_arch)] #[cfg(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ))] -#[cfg(target_arch = "powerpc64")] -#[path = "atomic128/powerpc64.rs"] +// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly. +#[cfg_attr( + all(any(miri, portable_atomic_sanitize_thread), portable_atomic_llvm_15), + path = "atomic128/intrinsics.rs" +)] +#[cfg_attr( + not(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_llvm_15)), + path = "atomic128/powerpc64.rs" +)] mod powerpc64; #[cfg(portable_atomic_unstable_asm_experimental_arch)] #[cfg(target_arch = "s390x")] -#[path = "atomic128/s390x.rs"] +// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly. +#[cfg_attr( + all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics), + path = "atomic128/intrinsics.rs" +)] +#[cfg_attr( + not(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics)), + path = "atomic128/s390x.rs" +)] mod s390x; // Miri and Sanitizer do not support inline assembly. diff --git a/src/imp/x86.rs b/src/imp/x86.rs index 564b58fc..ba0ba3a0 100644 --- a/src/imp/x86.rs +++ b/src/imp/x86.rs @@ -117,7 +117,7 @@ macro_rules! atomic_bit_opts { // LLVM 15 only supports generating `lock bt{s,r,c}` for immediate bit offsets. // https://godbolt.org/z/dzzhr81z6 // LLVM 16 can generate `lock bt{s,r,c}` for both immediate and register bit offsets. - // https://github.com/taiki-e/portable-atomic/issues/48#issuecomment-1453473831 + // https://godbolt.org/z/7YTvsorn1 // So, use fetch_* based implementations on LLVM 16+, otherwise use asm based implementations. #[cfg(portable_atomic_llvm_16)] impl_default_bit_opts!($atomic_type, $int_type); diff --git a/src/lib.rs b/src/lib.rs index 8391585e..a484fd9a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4798,7 +4798,9 @@ This is `const fn` on Rust 1.58+."), /// Returning an `*mut` pointer from a shared reference to this atomic is /// safe because the atomic types work with interior mutability. Any use of /// the returned raw pointer requires an `unsafe` block and has to uphold - /// the safety requirements: + /// the safety requirements. If there is concurrent access, note the following + /// additional safety requirements: + /// /// - If this atomic type is [lock-free](Self::is_lock_free), any concurrent /// operations on it must be atomic. /// - Otherwise, any concurrent operations on it must be compatible with diff --git a/src/tests/helper.rs b/src/tests/helper.rs index a756d858..1dda9048 100644 --- a/src/tests/helper.rs +++ b/src/tests/helper.rs @@ -2209,17 +2209,8 @@ pub(crate) fn test_swap_ordering(f: impl Fn(Ordering) -> T) fn skip_should_panic_test() -> bool { // Miri's panic handling is slow // MSAN false positive: https://gist.github.com/taiki-e/dd6269a8ffec46284fdc764a4849f884 - is_panic_abort() + test_helper::is_panic_abort() || cfg!(miri) || option_env!("CARGO_PROFILE_RELEASE_LTO").map_or(false, |v| v == "fat") && option_env!("MSAN_OPTIONS").is_some() } -// For -C panic=abort -Z panic_abort_tests: https://github.com/rust-lang/rust/issues/67650 -#[rustversion::since(1.60)] // cfg!(panic) requires Rust 1.60 -fn is_panic_abort() -> bool { - cfg!(panic = "abort") -} -#[rustversion::before(1.60)] // cfg!(panic) requires Rust 1.60 -fn is_panic_abort() -> bool { - false -} diff --git a/src/tests/mod.rs b/src/tests/mod.rs index a9b2d136..fa7d67c2 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -105,7 +105,6 @@ extern "C" { fn _atomic_f64_ffi_safety(_: AtomicF64); } -#[rustversion::since(1.60)] // cfg!(target_has_atomic) requires Rust 1.60 #[test] fn test_is_lock_free() { assert!(AtomicI8::is_always_lock_free()); @@ -120,30 +119,34 @@ fn test_is_lock_free() { assert!(AtomicI32::is_lock_free()); assert!(AtomicU32::is_always_lock_free()); assert!(AtomicU32::is_lock_free()); - if cfg!(all( - feature = "fallback", - not(any(miri, portable_atomic_sanitize_thread)), - any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), - target_arch = "arm", - any(target_os = "linux", target_os = "android"), - not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), - not(portable_atomic_no_outline_atomics), - not(target_has_atomic = "64"), - )) { - assert!(!AtomicI64::is_always_lock_free()); - assert!(AtomicI64::is_lock_free()); - assert!(!AtomicU64::is_always_lock_free()); - assert!(AtomicU64::is_lock_free()); - } else if cfg!(target_has_atomic = "64") { - assert!(AtomicI64::is_always_lock_free()); - assert!(AtomicI64::is_lock_free()); - assert!(AtomicU64::is_always_lock_free()); - assert!(AtomicU64::is_lock_free()); - } else { - assert!(!AtomicI64::is_always_lock_free()); - assert!(!AtomicI64::is_lock_free()); - assert!(!AtomicU64::is_always_lock_free()); - assert!(!AtomicU64::is_lock_free()); + #[cfg(not(portable_atomic_no_cfg_target_has_atomic))] + { + if cfg!(all( + feature = "fallback", + not(any(miri, portable_atomic_sanitize_thread)), + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + target_arch = "arm", + any(target_os = "linux", target_os = "android"), + not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), + not(portable_atomic_no_outline_atomics), + not(target_has_atomic = "64"), + not(portable_atomic_test_outline_atomics_detect_false), + )) { + assert!(!AtomicI64::is_always_lock_free()); + assert!(AtomicI64::is_lock_free()); + assert!(!AtomicU64::is_always_lock_free()); + assert!(AtomicU64::is_lock_free()); + } else if cfg!(target_has_atomic = "64") { + assert!(AtomicI64::is_always_lock_free()); + assert!(AtomicI64::is_lock_free()); + assert!(AtomicU64::is_always_lock_free()); + assert!(AtomicU64::is_lock_free()); + } else { + assert!(!AtomicI64::is_always_lock_free()); + assert!(!AtomicI64::is_lock_free()); + assert!(!AtomicU64::is_always_lock_free()); + assert!(!AtomicU64::is_lock_free()); + } } if cfg!(any( target_arch = "aarch64", @@ -182,6 +185,7 @@ fn test_is_lock_free() { portable_atomic_cmpxchg16b_target_feature, not(portable_atomic_no_outline_atomics), not(target_env = "sgx"), + not(portable_atomic_test_outline_atomics_detect_false), )) && std::is_x86_feature_detected!("cmpxchg16b"); assert_eq!(AtomicI128::is_lock_free(), has_cmpxchg16b); assert_eq!(AtomicU128::is_lock_free(), has_cmpxchg16b); diff --git a/tests/helper/Cargo.toml b/tests/helper/Cargo.toml index df56cf94..61682d9a 100644 --- a/tests/helper/Cargo.toml +++ b/tests/helper/Cargo.toml @@ -8,13 +8,15 @@ publish = false [lib] doctest = false doc = false +test = false [features] -std = ["critical-section", "fs-err", "serde", "libc", "windows-sys"] +std = ["critical-section", "fs-err", "rustversion", "serde", "libc", "windows-sys"] [dependencies] critical-section = { version = "1", optional = true, features = ["restore-state-bool"] } fs-err = { version = "2", optional = true } +rustversion = { version = "1", optional = true } serde = { version = "1", optional = true } [target.'cfg(unix)'.dependencies] diff --git a/tests/helper/src/lib.rs b/tests/helper/src/lib.rs index 14c68c43..fb58d050 100644 --- a/tests/helper/src/lib.rs +++ b/tests/helper/src/lib.rs @@ -24,7 +24,7 @@ mod once_lock; #[cfg(feature = "std")] pub mod serde; -use core::{ops, sync::atomic::Ordering}; +use core::sync::atomic::Ordering; pub const LOAD_ORDERINGS: [Ordering; 3] = [Ordering::Relaxed, Ordering::Acquire, Ordering::SeqCst]; pub const STORE_ORDERINGS: [Ordering; 3] = [Ordering::Relaxed, Ordering::Release, Ordering::SeqCst]; @@ -50,15 +50,14 @@ pub const COMPARE_EXCHANGE_ORDERINGS: [(Ordering, Ordering); 15] = [ pub const FENCE_ORDERINGS: [Ordering; 4] = [Ordering::Release, Ordering::Acquire, Ordering::AcqRel, Ordering::SeqCst]; -#[derive(Debug, Clone, Copy, Default)] -#[repr(C, align(16))] -pub struct Align16(pub T); - -impl ops::Deref for Align16 { - type Target = T; - - #[inline] - fn deref(&self) -> &T { - &self.0 - } +// For -C panic=abort -Z panic_abort_tests: https://github.com/rust-lang/rust/issues/67650 +#[cfg(feature = "std")] +#[rustversion::since(1.60)] // cfg!(panic) requires Rust 1.60 +pub fn is_panic_abort() -> bool { + cfg!(panic = "abort") +} +#[cfg(feature = "std")] +#[rustversion::before(1.60)] // cfg!(panic) requires Rust 1.60 +pub fn is_panic_abort() -> bool { + false } diff --git a/tools/build.sh b/tools/build.sh index da0f299f..a63b1484 100755 --- a/tools/build.sh +++ b/tools/build.sh @@ -91,14 +91,18 @@ default_targets=( s390x-unknown-linux-gnu ) known_cfgs=( - docsrs - qemu - valgrind - rustfmt + # Public APIs portable_atomic_unsafe_assume_single_core portable_atomic_s_mode portable_atomic_disable_fiq portable_atomic_no_outline_atomics + + # Not public APIs + portable_atomic_test_outline_atomics_detect_false + docsrs + qemu + rustfmt + valgrind ) x() { diff --git a/tools/test.sh b/tools/test.sh index f529f306..63eeb83e 100755 --- a/tools/test.sh +++ b/tools/test.sh @@ -119,10 +119,23 @@ args+=( --workspace --exclude bench --exclude portable-atomic-internal-codegen ) target="${target:-"${host}"}" +target_lower="${target//-/_}" +target_lower="${target_lower//./_}" +target_upper="$(tr '[:lower:]' '[:upper:]' <<<"${target_lower}")" + +if [[ -n "${VALGRIND:-}" ]]; then + export "CARGO_TARGET_${target_upper}_RUNNER"="${VALGRIND} -v --error-exitcode=1 --error-limit=no --leak-check=full --show-leak-kinds=all --track-origins=yes" + export RUSTFLAGS="${RUSTFLAGS:-} --cfg valgrind" + export RUSTDOCFLAGS="${RUSTDOCFLAGS:-} --cfg valgrind" + # doctest on Valgrind is very slow + if [[ ${#tests[@]} -eq 0 ]]; then + tests=(--tests) + fi +fi run() { if [[ "${RUSTFLAGS:-}" == *"-Z sanitizer="* ]] || [[ "${RUSTFLAGS:-}" == *"-Zsanitizer="* ]]; then - # debug build + doctests is slow + # doctest with debug build on Sanitizer is slow x_cargo ${pre_args[@]+"${pre_args[@]}"} test --tests "$@" else x_cargo ${pre_args[@]+"${pre_args[@]}"} test ${tests[@]+"${tests[@]}"} "$@"