Skip to content

Commit

Permalink
Various cleanup
Browse files Browse the repository at this point in the history
- Optimize x86_64 128-bit outline-atomics. FYI, this improves
  performance by up to 15% in concurrent RMW/store microbenchmarks.
- Optimize x86_64 128-bit load that uses cmpxchg16b.
- Optimize aarch64 128-bit load that uses FEAT_LSE.
- Move 128-bit atomic implementation for Miri and ThreadSanitizer to
  intrinsics.rs on all architectures.
- Remove duplicate tests and add tests for cases where feature detection
  returns false.
- Several minor cleanups.
  • Loading branch information
taiki-e committed Apr 18, 2023
1 parent 0a7d5d2 commit 40c4cd4
Show file tree
Hide file tree
Showing 28 changed files with 1,049 additions and 1,298 deletions.
91 changes: 40 additions & 51 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ aarch64_linux_gnu_test_task:
- |
[ ! -f $HOME/.cargo/env ] || . $HOME/.cargo/env
- set -ex
- ./tools/test.sh -vv
- ./tools/test.sh -vv -- -Z unstable-options --report-time
# +lse
# Graviton2 (Neoverse N1) is ARMv8.2-a and doesn't support FEAT_LSE2.
# FEAT_LSE2 is tested on aarch64 macOS VM.
- RUSTFLAGS="$RUSTFLAGS -C target-feature=+lse" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=+lse" ./tools/test.sh -vv
- RUSTFLAGS="$RUSTFLAGS -C target-feature=+lse" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=+lse" ./tools/test.sh -vv -- -Z unstable-options --report-time

aarch64_linux_musl_test_task:
name: test ($TARGET)
Expand All @@ -55,47 +55,36 @@ aarch64_linux_musl_test_task:
- rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly
test_script:
- set -ex
- ./tools/test.sh -vv
- ./tools/test.sh -vv -- -Z unstable-options --report-time
# -crt-static
- RUSTFLAGS="$RUSTFLAGS -C target-feature=-crt-static" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=-crt-static" ./tools/test.sh -vv
- RUSTFLAGS="$RUSTFLAGS -C target-feature=-crt-static" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=-crt-static" ./tools/test.sh -vv -- -Z unstable-options --report-time
# +lse
# Graviton2 (Neoverse N1) is ARMv8.2-a and doesn't support FEAT_LSE2.
# FEAT_LSE2 is tested on aarch64 macOS VM.
- RUSTFLAGS="$RUSTFLAGS -C target-feature=+lse" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=+lse" ./tools/test.sh -vv
- RUSTFLAGS="$RUSTFLAGS -C target-feature=+lse" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=+lse" ./tools/test.sh -vv -- -Z unstable-options --report-time

armel_linux_test_task:
arm_linux_test_task:
name: test ($TARGET)
env:
TARGET: armv5te-unknown-linux-gnueabi
matrix:
- env:
TARGET: armv5te-unknown-linux-gnueabi
DPKG_ARCH: armel
- env:
TARGET: armv7-unknown-linux-gnueabihf
DPKG_ARCH: armhf
arm_container:
image: rust:latest
setup_script:
- set -ex
- lscpu
- rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly
- dpkg --add-architecture armel
- apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-arm-linux-gnueabi libc6-dev-armel-cross libc6:armel
- dpkg --add-architecture "$DPKG_ARCH"
- apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-"${TARGET/*-unknown/arm}" libc6-dev-"$DPKG_ARCH"-cross libc6:"$DPKG_ARCH"
test_script:
- set -ex
- export CARGO_TARGET_ARMV5TE_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc
- ./tools/test.sh --target "$TARGET" -Z doctest-xcompile -vv

armhf_linux_test_task:
name: test ($TARGET)
env:
TARGET: armv7-unknown-linux-gnueabihf
arm_container:
image: rust:latest
setup_script:
- set -ex
- lscpu
- rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly
- dpkg --add-architecture armhf
- apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-arm-linux-gnueabihf libc6-dev-armhf-cross libc6:armhf
test_script:
- set -ex
- export CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc
- ./tools/test.sh --target "$TARGET" -Z doctest-xcompile -vv
- ./tools/test.sh --target "$TARGET" -Z doctest-xcompile -vv -- -Z unstable-options --report-time

aarch64_macos_test_task:
name: test ($TARGET)
Expand All @@ -111,32 +100,28 @@ aarch64_macos_test_task:
- . $HOME/.cargo/env
- set -ex
# macOS is +lse,+lse2 by default
- ./tools/test.sh -vv
- ./tools/test.sh -vv -- -Z unstable-options --report-time

aarch64_linux_valgrind_task:
name: valgrind ($TARGET)
env:
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER: valgrind -v --error-exitcode=1 --error-limit=no --leak-check=full --show-leak-kinds=all --track-origins=yes
RUSTDOCFLAGS: -D warnings --cfg valgrind
RUSTFLAGS: -D warnings --cfg valgrind
VALGRIND: valgrind
TARGET: aarch64-unknown-linux-gnu
arm_container:
# Valgrind support ldxp/stxp on 3.19+: https://valgrind.org/docs/manual/dist.news.html
# However, current Debian stable's Valgrind is 3.16: https://packages.debian.org/en/stable/valgrind
image: debian:bookworm
setup_script:
- set -ex
- lscpu
- apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends ca-certificates curl gcc git libc6-dev valgrind
- apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends ca-certificates curl gcc git libc6-dev valgrind moreutils
- curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain nightly --component rust-src
test_script:
- . $HOME/.cargo/env
- set -ex
# doctests on Valgrind are very slow
- ./tools/test.sh -vv --tests
- ./tools/test.sh -vv 2>&1 | ts -i '%.s '
# +lse
# As of Valgrind 3.19, Valgrind supports atomic instructions of ARMv8.0 and ARMv8.1 (FEAT_LSE).
- RUSTFLAGS="$RUSTFLAGS -C target-feature=+lse" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=+lse" ./tools/test.sh -vv --tests
- RUSTFLAGS="$RUSTFLAGS -C target-feature=+lse" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=+lse" ./tools/test.sh -vv 2>&1 | ts -i '%.s '
#
# aarch64_linux_bench_task:
# name: bench ($TARGET)
Expand All @@ -152,33 +137,36 @@ aarch64_linux_valgrind_task:
# - rustup toolchain add nightly --no-self-update && rustup default nightly
# bench_script:
# - set -ex
# - cargo bench -vv --manifest-path bench/Cargo.toml
# - RUSTFLAGS="${RUSTFLAGS} -C target-feature=-outline-atomics --cfg portable_atomic_no_outline_atomics" cargo bench -vv --manifest-path bench/Cargo.toml
# - RUSTFLAGS="${RUSTFLAGS} -C target-feature=+lse" cargo bench -vv --manifest-path bench/Cargo.toml

# armel_linux_bench_task:
# x86_64_linux_bench_task:
# name: bench ($TARGET)
# env:
# TARGET: armv5te-unknown-linux-gnueabi
# arm_container:
# TARGET: x86_64-unknown-linux-gnu
# container:
# image: rust:latest
# cpu: 4
# memory: 12G
# setup_script:
# - set -ex
# - lscpu
# - rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly
# - rustup target add "$TARGET"
# - dpkg --add-architecture armel
# - apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-arm-linux-gnueabi libc6-dev-armel-cross libc6:armel
# test_script:
# - rustup toolchain add nightly --no-self-update && rustup default nightly
# bench_script:
# - set -ex
# - export CARGO_TARGET_ARMV5TE_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc
# - RUSTFLAGS="${RUSTFLAGS}" cargo bench --target "$TARGET" -vv --manifest-path bench/Cargo.toml
# - cargo bench -vv --manifest-path bench/Cargo.toml
# - RUSTFLAGS="${RUSTFLAGS} -C target-feature=+cmpxchg16b" cargo bench -vv --manifest-path bench/Cargo.toml

# armhf_linux_bench_task:
# arm_linux_bench_task:
# name: bench ($TARGET)
# env:
# TARGET: armv7-unknown-linux-gnueabihf
# matrix:
# - env:
# TARGET: armv5te-unknown-linux-gnueabi
# DPKG_ARCH: armel
# - env:
# TARGET: armv7-unknown-linux-gnueabihf
# DPKG_ARCH: armhf
# arm_container:
# image: rust:latest
# cpu: 4
Expand All @@ -188,10 +176,11 @@ aarch64_linux_valgrind_task:
# - lscpu
# - rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly
# - rustup target add "$TARGET"
# - dpkg --add-architecture armhf
# - apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-arm-linux-gnueabihf libc6-dev-armhf-cross libc6:armhf
# - dpkg --add-architecture "$DPKG_ARCH"
# - apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-"${TARGET/*-unknown/arm}" libc6-dev-"$DPKG_ARCH"-cross libc6:"$DPKG_ARCH"
# test_script:
# - set -ex
# - export CARGO_TARGET_ARMV5TE_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc
# - export CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc
# - RUSTFLAGS="${RUSTFLAGS}" cargo bench --target "$TARGET" -vv --manifest-path bench/Cargo.toml

Expand Down
43 changes: 24 additions & 19 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,6 @@ jobs:
target: armv7-unknown-linux-gnueabi
- rust: nightly
target: armv7-unknown-linux-gnueabihf
- rust: nightly
target: armv5te-unknown-linux-musleabi
- rust: nightly
target: arm-linux-androideabi
- rust: nightly
Expand Down Expand Up @@ -163,42 +161,53 @@ jobs:
if: matrix.target != '' && !startsWith(matrix.target, 'i686') && !startsWith(matrix.target, 'x86_64')
- run: echo "TARGET=--target=${{ matrix.target }}" >>"${GITHUB_ENV}"
if: matrix.target != ''
- run: echo "REPORT_TIME=-- -Z unstable-options --report-time" >>"${GITHUB_ENV}"
if: startsWith(matrix.rust, 'nightly')
# Since nightly-2022-12-23, -Z build-std + -Z randomize-layout + release mode on windows causes segfault.
- run: echo "RANDOMIZE_LAYOUT=-Z randomize-layout" >>"${GITHUB_ENV}"
if: startsWith(matrix.rust, 'nightly') && !startsWith(matrix.os, 'windows')

- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME
# macOS is skipped because it is +cmpxchg16b by default
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME
env:
# Note: This cfg is intended to make it easy for portable-atomic developers
# to test has_cmpxchg16b == false, has_lse == false, or __kuser_helper_version < 5 cases,
# and is not a public API.
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} --cfg portable_atomic_test_outline_atomics_detect_false
RUSTFLAGS: ${{ env.RUSTFLAGS }} --cfg portable_atomic_test_outline_atomics_detect_false
if: (matrix.target == '' || startsWith(matrix.target, 'x86_64')) && !startsWith(matrix.os, 'macos') || startsWith(matrix.target, 'aarch64') || startsWith(matrix.target, 'armv5te') || matrix.target == 'arm-linux-androideabi'
# -crt-static
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME
env:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=-crt-static
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=-crt-static
if: contains(matrix.target, '-musl')
# +cmpxchg16b
# macOS is skipped because it is +cmpxchg16b by default
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME
env:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b
if: (matrix.target == '' || startsWith(matrix.target, 'x86_64')) && !startsWith(matrix.os, 'macos')
# +lse
# As of QEMU 7.2, QEMU has not yet implemented FEAT_LSE2: https://linaro.atlassian.net/browse/QEMU-300
# FEAT_LSE2 is tested on Cirrus CI's aarch64 macOS VM.
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME
env:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+lse
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+lse
if: startsWith(matrix.target, 'aarch64')
# pwr7
# powerpc64- (big-endian) is skipped because it is pre-pwr8 by default
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME
env:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=pwr7
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=pwr7
if: startsWith(matrix.target, 'powerpc64le-')
# pwr8
# powerpc64le- (little-endian) is skipped because it is pwr8 by default
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME
env:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=pwr8
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=pwr8
Expand Down Expand Up @@ -303,8 +312,7 @@ jobs:
- run: sudo apt-get -o Acquire::Retries=10 -qq update && sudo apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends moreutils
- run: echo "TARGET=--target=${{ matrix.target }}" >>"${GITHUB_ENV}"
if: matrix.target != ''
- run: |
cargo miri test --workspace --all-features $EXCLUDE $TARGET 2>&1 | ts -i '%.s '
- run: cargo miri test --workspace --all-features $EXCLUDE $TARGET 2>&1 | ts -i '%.s '
env:
MIRIFLAGS: -Zmiri-strict-provenance -Zmiri-symbolic-alignment-check -Zmiri-retag-fields -Zmiri-disable-isolation
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -Z randomize-layout
Expand Down Expand Up @@ -351,7 +359,7 @@ jobs:

valgrind:
env:
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER: valgrind -v --error-exitcode=1 --error-limit=no --leak-check=full --show-leak-kinds=all --track-origins=yes
VALGRIND: valgrind
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
Expand All @@ -360,17 +368,14 @@ jobs:
persist-credentials: false
- name: Install Rust
run: rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly
- run: sudo apt-get -o Acquire::Retries=10 -qq update && sudo apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends moreutils
- uses: taiki-e/install-action@valgrind
# doctests on Valgrind are very slow
- run: tools/test.sh -vv --tests
env:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} --cfg valgrind
RUSTFLAGS: ${{ env.RUSTFLAGS }} --cfg valgrind
- run: tools/test.sh -vv 2>&1 | ts -i '%.s '
# +cmpxchg16b
- run: tools/test.sh -vv --tests
- run: tools/test.sh -vv 2>&1 | ts -i '%.s '
env:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b --cfg valgrind
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b --cfg valgrind
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b

codegen:
runs-on: ubuntu-latest
Expand Down
1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ crossbeam-utils = "0.8"
fastrand = "1"
paste = "1"
quickcheck = { default-features = false, git = "https://github.com/taiki-e/quickcheck.git", branch = "dev" } # https://github.com/BurntSushi/quickcheck/pull/304 + https://github.com/BurntSushi/quickcheck/pull/282 + lower MSRV
rustversion = "1"
serde_test = "1"
sptr = "0.3"
static_assertions = "1"
1 change: 0 additions & 1 deletion bench/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ crossbeam-utils = "0.8"
fastrand = "1"
paste = "1"
quickcheck = { default-features = false, git = "https://github.com/taiki-e/quickcheck.git", branch = "dev" } # https://github.com/BurntSushi/quickcheck/pull/304 + https://github.com/BurntSushi/quickcheck/pull/282 + lower MSRV
rustversion = "1"
static_assertions = "1"

[[bench]]
Expand Down
18 changes: 11 additions & 7 deletions src/imp/arm_linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ mod fallback;

#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
use core::{mem, sync::atomic::Ordering};
use core::{cell::UnsafeCell, mem, sync::atomic::Ordering};

/// A 64-bit value represented as a pair of 32-bit values.
///
Expand Down Expand Up @@ -56,10 +56,17 @@ fn __kuser_helper_version() -> i32 {
}
#[inline]
fn has_kuser_cmpxchg64() -> bool {
// Note: This cfg is intended to make it easy for portable-atomic developers
// to test __kuser_helper_version < 5 cases, and is not a public API.
if cfg!(portable_atomic_test_outline_atomics_detect_false) {
return false;
}
__kuser_helper_version() >= 5
}
#[inline]
unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut u64) -> bool {
debug_assert!(ptr as usize % 8 == 0);
debug_assert!(has_kuser_cmpxchg64());
// SAFETY: the caller must uphold the safety contract.
unsafe {
let f: extern "C" fn(*const u64, *const u64, *mut u64) -> u32 =
Expand Down Expand Up @@ -91,9 +98,6 @@ unsafe fn atomic_update_kuser_cmpxchg64<F>(dst: *mut u64, mut f: F) -> u64
where
F: FnMut(u64) -> u64,
{
debug_assert!(dst as usize % 8 == 0);
debug_assert!(has_kuser_cmpxchg64());

// SAFETY: the caller must uphold the safety contract.
unsafe {
loop {
Expand Down Expand Up @@ -130,7 +134,7 @@ macro_rules! atomic_with_ifunc {
if has_kuser_cmpxchg64() {
kuser_cmpxchg64_fn
} else {
// Use SeqCst because __kuser_cmpxchg64 is SeqCst.
// Use SeqCst because __kuser_cmpxchg64 is always SeqCst.
// https://github.com/torvalds/linux/blob/v6.1/arch/arm/kernel/entry-armv.S#L918-L925
fallback::$seqcst_fallback_fn
}
Expand Down Expand Up @@ -264,7 +268,7 @@ macro_rules! atomic64 {
($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => {
#[repr(C, align(8))]
pub(crate) struct $atomic_type {
v: core::cell::UnsafeCell<$int_type>,
v: UnsafeCell<$int_type>,
}

// Send is implicitly implemented.
Expand All @@ -276,7 +280,7 @@ macro_rules! atomic64 {
impl $atomic_type {
#[inline]
pub(crate) const fn new(v: $int_type) -> Self {
Self { v: core::cell::UnsafeCell::new(v) }
Self { v: UnsafeCell::new(v) }
}

#[inline]
Expand Down
Loading

0 comments on commit 40c4cd4

Please sign in to comment.