From e6fc62a1ef6cfb545d4f33914a4440c6bbcbf9eb Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Wed, 4 Jul 2018 02:48:30 -0700 Subject: [PATCH 1/2] Don't use SIMD in mem::swap for types smaller than the block size LLVM isn't able to remove the alloca for the unaligned block in the SIMD tail in some cases, so doing this helps SRoA work in cases where it currently doesn't. Found in the `replace_with` RFC discussion. --- src/libcore/mem.rs | 2 +- src/libcore/ptr.rs | 13 +++++++++++++ src/test/codegen/swap-small-types.rs | 26 ++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 src/test/codegen/swap-small-types.rs diff --git a/src/libcore/mem.rs b/src/libcore/mem.rs index 8fb4e0d6a02e3..a0fe6e9880606 100644 --- a/src/libcore/mem.rs +++ b/src/libcore/mem.rs @@ -638,7 +638,7 @@ pub unsafe fn uninitialized() -> T { #[stable(feature = "rust1", since = "1.0.0")] pub fn swap(x: &mut T, y: &mut T) { unsafe { - ptr::swap_nonoverlapping(x, y, 1); + ptr::swap_nonoverlapping_one(x, y); } } diff --git a/src/libcore/ptr.rs b/src/libcore/ptr.rs index 0af642258c277..f1405b58e1b32 100644 --- a/src/libcore/ptr.rs +++ b/src/libcore/ptr.rs @@ -187,6 +187,19 @@ pub unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize) { swap_nonoverlapping_bytes(x, y, len) } +#[inline] +pub(crate) unsafe fn swap_nonoverlapping_one(x: *mut T, y: *mut T) { + // For types smaller than the block optimization below, + // just swap directly to avoid pessimizing codegen. + if mem::size_of::() < 32 { + let z = read(x); + copy_nonoverlapping(y, x, 1); + write(y, z); + } else { + swap_nonoverlapping(x, y, 1); + } +} + #[inline] unsafe fn swap_nonoverlapping_bytes(x: *mut u8, y: *mut u8, len: usize) { // The approach here is to utilize simd to swap x & y efficiently. Testing reveals diff --git a/src/test/codegen/swap-small-types.rs b/src/test/codegen/swap-small-types.rs new file mode 100644 index 0000000000000..f34a1d669bda9 --- /dev/null +++ b/src/test/codegen/swap-small-types.rs @@ -0,0 +1,26 @@ +// Copyright 2018 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// compile-flags: -O + +#![crate_type = "lib"] + +use std::mem::swap; + +type RGB48 = [u16; 3]; + +// CHECK-LABEL: @swap_rgb48 +#[no_mangle] +pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) { +// CHECK-NOT: alloca +// CHECK: load i48 +// CHECK: store i48 + swap(x, y) +} From c9482f724f2c6369a56faddd3ba4c1f00545a086 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Sat, 21 Jul 2018 23:12:46 -0700 Subject: [PATCH 2/2] Only run the test on x86_64 Smaller platforms don't merge the loads the same way. --- src/test/codegen/swap-small-types.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/codegen/swap-small-types.rs b/src/test/codegen/swap-small-types.rs index f34a1d669bda9..46406ee5182a7 100644 --- a/src/test/codegen/swap-small-types.rs +++ b/src/test/codegen/swap-small-types.rs @@ -9,6 +9,7 @@ // except according to those terms. // compile-flags: -O +// only-x86_64 #![crate_type = "lib"]