Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tweak the threshold for chunked swapping #111803

Merged
merged 1 commit into from
Jan 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion library/core/src/mem/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -736,7 +736,7 @@ pub const fn swap<T>(x: &mut T, y: &mut T) {
// tends to copy the whole thing to stack rather than doing it one part
// at a time, so instead treat them as one-element slices and piggy-back
// the slice optimizations that will split up the swaps.
if size_of::<T>() / align_of::<T>() > 4 {
if const { size_of::<T>() / align_of::<T>() > 2 } {
// SAFETY: exclusive references always point to one non-overlapping
// element and are non-null and properly aligned.
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };
Expand Down
82 changes: 64 additions & 18 deletions tests/codegen/swap-small-types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,15 @@ pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) {
// CHECK-LABEL: @swap_rgb48
#[no_mangle]
pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
// FIXME: See #115212 for why this has an alloca again
// CHECK-NOT: alloca

// CHECK: alloca [3 x i16], align 2
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
// Whether `i8` is the best for this is unclear, but
// might as well record what's actually happening right now.

// CHECK: load i8
// CHECK: load i8
// CHECK: store i8
// CHECK: store i8
swap(x, y)
}

Expand All @@ -41,10 +44,39 @@ type RGBA64 = [u16; 4];
#[no_mangle]
pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) {
// CHECK-NOT: alloca
// CHECK-DAG: %[[XVAL:.+]] = load <4 x i16>, ptr %x, align 2
// CHECK-DAG: %[[YVAL:.+]] = load <4 x i16>, ptr %y, align 2
// CHECK-DAG: store <4 x i16> %[[YVAL]], ptr %x, align 2
// CHECK-DAG: store <4 x i16> %[[XVAL]], ptr %y, align 2
// CHECK-DAG: %[[XVAL:.+]] = load i64, ptr %x, align 2
// CHECK-DAG: %[[YVAL:.+]] = load i64, ptr %y, align 2
// CHECK-DAG: store i64 %[[YVAL]], ptr %x, align 2
// CHECK-DAG: store i64 %[[XVAL]], ptr %y, align 2
swap(x, y)
}

// CHECK-LABEL: @swap_vecs
#[no_mangle]
pub fn swap_vecs(x: &mut Vec<u32>, y: &mut Vec<u32>) {
// CHECK-NOT: alloca
// There are plenty more loads and stores than just these,
// but at least one sure better be 64-bit (for size or capacity).
// CHECK: load i64
// CHECK: load i64
// CHECK: store i64
// CHECK: store i64
// CHECK: ret void
swap(x, y)
}

// CHECK-LABEL: @swap_slices
#[no_mangle]
pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) {
// CHECK-NOT: alloca
// CHECK: load ptr
// CHECK: load i64
// CHECK: load ptr
// CHECK: load i64
// CHECK: store ptr
// CHECK: store i64
// CHECK: store ptr
// CHECK: store i64
swap(x, y)
}

Expand All @@ -55,9 +87,9 @@ type RGB24 = [u8; 3];
// CHECK-LABEL: @swap_rgb24_slices
#[no_mangle]
pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) {
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i8>
// CHECK: store <{{[0-9]+}} x i8>
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i8>
// CHECK: store <{{[0-9]+}} x i8>
if x.len() == y.len() {
x.swap_with_slice(y);
}
Expand All @@ -69,9 +101,9 @@ type RGBA32 = [u8; 4];
// CHECK-LABEL: @swap_rgba32_slices
#[no_mangle]
pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) {
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i32>
// CHECK: store <{{[0-9]+}} x i32>
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i32>
// CHECK: store <{{[0-9]+}} x i32>
if x.len() == y.len() {
x.swap_with_slice(y);
}
Expand All @@ -84,10 +116,24 @@ const _: () = assert!(!std::mem::size_of::<String>().is_power_of_two());
// CHECK-LABEL: @swap_string_slices
#[no_mangle]
pub fn swap_string_slices(x: &mut [String], y: &mut [String]) {
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i64>
// CHECK: store <{{[0-9]+}} x i64>
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i64>
// CHECK: store <{{[0-9]+}} x i64>
if x.len() == y.len() {
x.swap_with_slice(y);
}
}

#[repr(C, packed)]
pub struct Packed {
pub first: bool,
pub second: usize,
}

// CHECK-LABEL: @swap_packed_structs
#[no_mangle]
pub fn swap_packed_structs(x: &mut Packed, y: &mut Packed) {
// CHECK-NOT: alloca
// CHECK: ret void
swap(x, y)
}
Loading