From 5a4561749a18fa96691f9cf79699e6ab2f1f7b2c Mon Sep 17 00:00:00 2001 From: Catherine Flores Date: Tue, 1 Aug 2023 15:35:12 +0000 Subject: [PATCH 1/3] Add new intrinsic `is_constant` and optimize `pow` Fix overflow check Make MIRI choose the path randomly and rename the intrinsic Add back test Add miri test and make it operate on `ptr` Define `llvm.is.constant` for primitives Update MIRI comment and fix test in stage2 Add const eval test Clarify that both branches must have the same side effects guaranteed non guarantee use immediate type instead Co-Authored-By: Ralf Jung --- compiler/rustc_codegen_llvm/src/context.rs | 14 ++++ compiler/rustc_codegen_llvm/src/intrinsic.rs | 4 ++ .../src/const_eval/machine.rs | 5 ++ .../rustc_hir_analysis/src/check/intrinsic.rs | 2 + compiler/rustc_span/src/symbol.rs | 1 + library/core/src/intrinsics.rs | 46 +++++++++++++ library/core/src/lib.rs | 1 + library/core/src/num/int_macros.rs | 57 +++++++++++----- library/core/src/num/uint_macros.rs | 68 ++++++++++++++----- src/tools/miri/src/shims/intrinsics/mod.rs | 12 ++++ src/tools/miri/tests/pass/intrinsics.rs | 15 ++++ tests/codegen/is_val_statically_known.rs | 50 ++++++++++++++ tests/codegen/pow_of_two.rs | 68 +++++++++++++++++++ tests/ui/consts/is_val_statically_known.rs | 15 ++++ 14 files changed, 324 insertions(+), 34 deletions(-) create mode 100644 tests/codegen/is_val_statically_known.rs create mode 100644 tests/codegen/pow_of_two.rs create mode 100644 tests/ui/consts/is_val_statically_known.rs diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index 1d1b6e6148dd2..56fa14e3cee46 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -908,6 +908,20 @@ impl<'ll> CodegenCx<'ll, '_> { ifn!("llvm.lifetime.start.p0i8", fn(t_i64, ptr) -> void); ifn!("llvm.lifetime.end.p0i8", fn(t_i64, ptr) -> void); + // FIXME: This is an infinitesimally small portion of the types you can + // pass to this intrinsic, if we can ever lazily register intrinsics we + // should register these when they're used, that way any type can be + // passed. + ifn!("llvm.is.constant.i1", fn(i1) -> i1); + ifn!("llvm.is.constant.i8", fn(t_i8) -> i1); + ifn!("llvm.is.constant.i16", fn(t_i16) -> i1); + ifn!("llvm.is.constant.i32", fn(t_i32) -> i1); + ifn!("llvm.is.constant.i64", fn(t_i64) -> i1); + ifn!("llvm.is.constant.i128", fn(t_i128) -> i1); + ifn!("llvm.is.constant.isize", fn(t_isize) -> i1); + ifn!("llvm.is.constant.f32", fn(t_f32) -> i1); + ifn!("llvm.is.constant.f64", fn(t_f64) -> i1); + ifn!("llvm.expect.i1", fn(i1, i1) -> i1); ifn!("llvm.eh.typeid.for", fn(ptr) -> t_i32); ifn!("llvm.localescape", fn(...) -> void); diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index a0f9d5cf7cd36..f1a6f7bd8e690 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -119,6 +119,10 @@ impl<'ll, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'_, 'll, 'tcx> { sym::likely => { self.call_intrinsic("llvm.expect.i1", &[args[0].immediate(), self.const_bool(true)]) } + sym::is_val_statically_known => self.call_intrinsic( + &format!("llvm.is.constant.{:?}", args[0].layout.immediate_llvm_type(self.cx)), + &[args[0].immediate()], + ), sym::unlikely => self .call_intrinsic("llvm.expect.i1", &[args[0].immediate(), self.const_bool(false)]), kw::Try => { diff --git a/compiler/rustc_const_eval/src/const_eval/machine.rs b/compiler/rustc_const_eval/src/const_eval/machine.rs index 7fb5f10c6ca75..9265e2b42528f 100644 --- a/compiler/rustc_const_eval/src/const_eval/machine.rs +++ b/compiler/rustc_const_eval/src/const_eval/machine.rs @@ -531,6 +531,11 @@ impl<'mir, 'tcx> interpret::Machine<'mir, 'tcx> for CompileTimeInterpreter<'mir, )?; } } + // The intrinsic represents whether the value is known to the optimizer (LLVM). + // We're not doing any optimizations here, so there is no optimizer that could know the value. + // (We know the value here in the machine of course, but this is the runtime of that code, + // not the optimization stage.) + sym::is_val_statically_known => ecx.write_scalar(Scalar::from_bool(false), dest)?, _ => { throw_unsup_format!( "intrinsic `{intrinsic_name}` is not supported at compile-time" diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs index a5aedeb33ae96..40958f7a6e3aa 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs @@ -453,6 +453,8 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) { sym::black_box => (1, vec![param(0)], param(0)), + sym::is_val_statically_known => (1, vec![param(0)], tcx.types.bool), + sym::const_eval_select => (4, vec![param(0), param(1), param(2)], param(3)), sym::vtable_size | sym::vtable_align => { diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 8ed1255c010f1..c09e4dfcf7dd5 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -907,6 +907,7 @@ symbols! { io_stderr, io_stdout, irrefutable_let_patterns, + is_val_statically_known, isa_attribute, isize, issue, diff --git a/library/core/src/intrinsics.rs b/library/core/src/intrinsics.rs index 3d5b544bc1bc1..b49cdcf2f3623 100644 --- a/library/core/src/intrinsics.rs +++ b/library/core/src/intrinsics.rs @@ -2511,6 +2511,52 @@ extern "rust-intrinsic" { where G: FnOnce, F: FnOnce; + + /// Returns whether the argument's value is statically known at + /// compile-time. + /// + /// This is useful when there is a way of writing the code that will + /// be *faster* when some variables have known values, but *slower* + /// in the general case: an `if is_val_statically_known(var)` can be used + /// to select between these two variants. The `if` will be optimized away + /// and only the desired branch remains. + /// + /// Formally speaking, this function non-deterministically returns `true` + /// or `false`, and the caller has to ensure sound behavior for both cases. + /// In other words, the following code has *Undefined Behavior*: + /// + /// ```rust + /// if !is_val_statically_known(0) { unreachable_unchecked(); } + /// ``` + /// + /// This also means that the following code's behavior is unspecified; it + /// may panic, or it may not: + /// + /// ```rust,no_run + /// assert_eq!(is_val_statically_known(0), black_box(is_val_statically_known(0))) + /// ``` + /// + /// Unsafe code may not rely on `is_val_statically_known` returning any + /// particular value, ever. However, the compiler will generally make it + /// return `true` only if the value of the argument is actually known. + /// + /// When calling this in a `const fn`, both paths must be semantically + /// equivalent, that is, the result of the `true` branch and the `false` + /// branch must return the same value and have the same side-effects *no + /// matter what*. + #[rustc_const_unstable(feature = "is_val_statically_known", issue = "none")] + #[rustc_nounwind] + #[cfg(not(bootstrap))] + pub fn is_val_statically_known(arg: T) -> bool; +} + +// FIXME: Seems using `unstable` here completely ignores `rustc_allow_const_fn_unstable` +// and thus compiling stage0 core doesn't work. +#[rustc_const_stable(feature = "is_val_statically_known", since = "never")] +#[cfg(bootstrap)] +pub const unsafe fn is_val_statically_known(t: T) -> bool { + mem::forget(t); + false } // Some functions are defined here because they accidentally got made diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 1a8f245c8be30..38dcfe5aaf7d9 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -197,6 +197,7 @@ // // Language features: // tidy-alphabetical-start +#![cfg_attr(not(bootstrap), feature(is_val_statically_known))] #![feature(abi_unadjusted)] #![feature(adt_const_params)] #![feature(allow_internal_unsafe)] diff --git a/library/core/src/num/int_macros.rs b/library/core/src/num/int_macros.rs index fd01f1b261012..a36747830c782 100644 --- a/library/core/src/num/int_macros.rs +++ b/library/core/src/num/int_macros.rs @@ -2088,26 +2088,49 @@ macro_rules! int_impl { without modifying the original"] #[inline] #[rustc_inherit_overflow_checks] + #[rustc_allow_const_fn_unstable(is_val_statically_known)] pub const fn pow(self, mut exp: u32) -> Self { - if exp == 0 { - return 1; - } - let mut base = self; - let mut acc = 1; - - while exp > 1 { - if (exp & 1) == 1 { - acc = acc * base; + // SAFETY: This path has the same behavior as the other. + if unsafe { intrinsics::is_val_statically_known(self) } + && self > 0 + && (self & (self - 1) == 0) + { + let power_used = match self.checked_ilog2() { + Some(v) => v, + // SAFETY: We just checked this is a power of two. and above zero. + None => unsafe { core::hint::unreachable_unchecked() }, + }; + // So it panics. Have to use `overflowing_mul` to efficiently set the + // result to 0 if not. + #[cfg(debug_assertions)] + { + _ = power_used * exp; + } + let (num_shl, overflowed) = power_used.overflowing_mul(exp); + let fine = !overflowed + & (num_shl < (mem::size_of::() * 8) as u32); + (1 << num_shl) * fine as Self + } else { + if exp == 0 { + return 1; + } + let mut base = self; + let mut acc = 1; + + while exp > 1 { + if (exp & 1) == 1 { + acc = acc * base; + } + exp /= 2; + base = base * base; } - exp /= 2; - base = base * base; - } - // since exp!=0, finally the exp must be 1. - // Deal with the final bit of the exponent separately, since - // squaring the base afterwards is not necessary and may cause a - // needless overflow. - acc * base + // since exp!=0, finally the exp must be 1. + // Deal with the final bit of the exponent separately, since + // squaring the base afterwards is not necessary and may cause a + // needless overflow. + acc * base + } } /// Returns the square root of the number, rounded down. diff --git a/library/core/src/num/uint_macros.rs b/library/core/src/num/uint_macros.rs index 11a53aaf122ec..c5b39d8c4e874 100644 --- a/library/core/src/num/uint_macros.rs +++ b/library/core/src/num/uint_macros.rs @@ -1973,26 +1973,60 @@ macro_rules! uint_impl { without modifying the original"] #[inline] #[rustc_inherit_overflow_checks] + #[rustc_allow_const_fn_unstable(is_val_statically_known)] pub const fn pow(self, mut exp: u32) -> Self { - if exp == 0 { - return 1; - } - let mut base = self; - let mut acc = 1; - - while exp > 1 { - if (exp & 1) == 1 { - acc = acc * base; + // LLVM now knows that `self` is a constant value, but not a + // constant in Rust. This allows us to compute the power used at + // compile-time. + // + // This will likely add a branch in debug builds, but this should + // be ok. + // + // This is a massive performance boost in release builds as you can + // get the power of a power of two and the exponent through a `shl` + // instruction, but we must add a couple more checks for parity with + // our own `pow`. + // SAFETY: This path has the same behavior as the other. + if unsafe { intrinsics::is_val_statically_known(self) } + && self.is_power_of_two() + { + let power_used = match self.checked_ilog2() { + Some(v) => v, + // SAFETY: We just checked this is a power of two. `0` is not a + // power of two. + None => unsafe { core::hint::unreachable_unchecked() }, + }; + // So it panics. Have to use `overflowing_mul` to efficiently set the + // result to 0 if not. + #[cfg(debug_assertions)] + { + _ = power_used * exp; + } + let (num_shl, overflowed) = power_used.overflowing_mul(exp); + let fine = !overflowed + & (num_shl < (mem::size_of::() * 8) as u32); + (1 << num_shl) * fine as Self + } else { + if exp == 0 { + return 1; + } + let mut base = self; + let mut acc = 1; + + while exp > 1 { + if (exp & 1) == 1 { + acc = acc * base; + } + exp /= 2; + base = base * base; } - exp /= 2; - base = base * base; - } - // since exp!=0, finally the exp must be 1. - // Deal with the final bit of the exponent separately, since - // squaring the base afterwards is not necessary and may cause a - // needless overflow. - acc * base + // since exp!=0, finally the exp must be 1. + // Deal with the final bit of the exponent separately, since + // squaring the base afterwards is not necessary and may cause a + // needless overflow. + acc * base + } } /// Returns the square root of the number, rounded down. diff --git a/src/tools/miri/src/shims/intrinsics/mod.rs b/src/tools/miri/src/shims/intrinsics/mod.rs index a1db7bf74f281..8edc0a4220d4a 100644 --- a/src/tools/miri/src/shims/intrinsics/mod.rs +++ b/src/tools/miri/src/shims/intrinsics/mod.rs @@ -5,6 +5,7 @@ use std::iter; use log::trace; +use rand::Rng; use rustc_apfloat::{Float, Round}; use rustc_middle::ty::layout::LayoutOf; use rustc_middle::{ @@ -141,6 +142,17 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { this.write_pointer(Pointer::new(ptr.provenance, masked_addr), dest)?; } + // We want to return either `true` or `false` at random, or else something like + // ``` + // if !is_val_statically_known(0) { unreachable_unchecked(); } + // ``` + // Would not be considered UB, or the other way around (`is_val_statically_known(0)`). + "is_val_statically_known" => { + let [_] = check_arg_count(args)?; + let branch: bool = this.machine.rng.get_mut().gen(); + this.write_scalar(Scalar::from_bool(branch), dest)?; + } + // Floating-point operations "fabsf32" => { let [f] = check_arg_count(args)?; diff --git a/src/tools/miri/tests/pass/intrinsics.rs b/src/tools/miri/tests/pass/intrinsics.rs index 8c6eeab22195c..8e46bd7ad48fb 100644 --- a/src/tools/miri/tests/pass/intrinsics.rs +++ b/src/tools/miri/tests/pass/intrinsics.rs @@ -33,6 +33,21 @@ fn main() { assert_eq!(intrinsics::likely(false), false); assert_eq!(intrinsics::unlikely(true), true); + let mut saw_true = false; + let mut saw_false = false; + + for _ in 0..50 { + if unsafe { intrinsics::is_val_statically_known(0) } { + saw_true = true; + } else { + saw_false = true; + } + } + assert!( + saw_true && saw_false, + "`is_val_statically_known` failed to return both true and false. Congrats, you won the lottery!" + ); + intrinsics::forget(Bomb); let _v = intrinsics::discriminant_value(&Some(())); diff --git a/tests/codegen/is_val_statically_known.rs b/tests/codegen/is_val_statically_known.rs new file mode 100644 index 0000000000000..4dcab7442356b --- /dev/null +++ b/tests/codegen/is_val_statically_known.rs @@ -0,0 +1,50 @@ +// #[cfg(bootstrap)] +// ignore-stage1 +// compile-flags: --crate-type=lib -Zmerge-functions=disabled + +#![feature(core_intrinsics)] + +use std::intrinsics::is_val_statically_known; + +pub struct A(u32); +pub enum B { + Ye(u32), +} + +#[inline] +pub fn _u32(a: u32) -> i32 { + if unsafe { is_val_statically_known(a) } { 1 } else { 0 } +} + +// CHECK-LABEL: @_u32_true( +#[no_mangle] +pub fn _u32_true() -> i32 { + // CHECK: ret i32 1 + _u32(1) +} + +// CHECK-LABEL: @_u32_false( +#[no_mangle] +pub fn _u32_false(a: u32) -> i32 { + // CHECK: ret i32 0 + _u32(a) +} + +#[inline] +pub fn _bool(b: bool) -> i32 { + if unsafe { is_val_statically_known(b) } { 3 } else { 2 } +} + +// CHECK-LABEL: @_bool_true( +#[no_mangle] +pub fn _bool_true() -> i32 { + // CHECK: ret i32 3 + _bool(true) +} + +// CHECK-LABEL: @_bool_false( +#[no_mangle] +pub fn _bool_false(b: bool) -> i32 { + // CHECK: ret i32 2 + _bool(b) +} diff --git a/tests/codegen/pow_of_two.rs b/tests/codegen/pow_of_two.rs new file mode 100644 index 0000000000000..3bce5535c66ec --- /dev/null +++ b/tests/codegen/pow_of_two.rs @@ -0,0 +1,68 @@ +// #[cfg(bootstrap)] +// ignore-stage1 +// compile-flags: --crate-type=lib -Zmerge-functions=disabled + +// CHECK-LABEL: @a( +#[no_mangle] +pub fn a(exp: u32) -> u64 { + // CHECK: %[[R:.+]] = and i32 %exp, 63 + // CHECK: %[[R:.+]] = zext i32 %[[R:.+]] to i64 + // CHECK: %[[R:.+]] = shl nuw i64 %[[R:.+]].i, %[[R:.+]] + // CHECK: ret i64 %[[R:.+]] + 2u64.pow(exp) +} + +#[no_mangle] +pub fn b(exp: u32) -> i64 { + // CHECK: %[[R:.+]] = and i32 %exp, 63 + // CHECK: %[[R:.+]] = zext i32 %[[R:.+]] to i64 + // CHECK: %[[R:.+]] = shl nuw i64 %[[R:.+]].i, %[[R:.+]] + // CHECK: ret i64 %[[R:.+]] + 2i64.pow(exp) +} + +// CHECK-LABEL: @c( +#[no_mangle] +pub fn c(exp: u32) -> u32 { + // CHECK: %[[R:.+]].0.i = shl i32 %exp, 1 + // CHECK: %[[R:.+]].1.i = icmp sgt i32 %exp, -1 + // CHECK: %[[R:.+]].i = icmp ult i32 %[[R:.+]].0.i, 32 + // CHECK: %fine.i = and i1 %[[R:.+]].1.i, %[[R:.+]].i + // CHECK: %0 = and i32 %[[R:.+]].0.i, 30 + // CHECK: %[[R:.+]].i = zext i1 %fine.i to i32 + // CHECK: %[[R:.+]] = shl nuw nsw i32 %[[R:.+]].i, %0 + // CHECK: ret i32 %[[R:.+]] + 4u32.pow(exp) +} + +// CHECK-LABEL: @d( +#[no_mangle] +pub fn d(exp: u32) -> u32 { + // CHECK: tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %exp, i32 5) + // CHECK: %[[R:.+]].0.i = extractvalue { i32, i1 } %[[R:.+]], 0 + // CHECK: %[[R:.+]].1.i = extractvalue { i32, i1 } %[[R:.+]], 1 + // CHECK: %[[R:.+]].i = xor i1 %[[R:.+]].1.i, true + // CHECK: %[[R:.+]].i = icmp ult i32 %[[R:.+]].0.i, 32 + // CHECK: %fine.i = and i1 %[[R:.+]].i, %[[R:.+]].i + // CHECK: %[[R:.+]] = and i32 %[[R:.+]].0.i, 31 + // CHECK: %[[R:.+]].i = zext i1 %fine.i to i32 + // CHECK: %[[R:.+]] = shl nuw i32 %[[R:.+]].i, %1 + // CHECK: ret i32 %[[R:.+]] + 32u32.pow(exp) +} + +// CHECK-LABEL: @e( +#[no_mangle] +pub fn e(exp: u32) -> i32 { + // CHECK: tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %exp, i32 5) + // CHECK: %[[R:.+]].0.i = extractvalue { i32, i1 } %[[R:.+]], 0 + // CHECK: %[[R:.+]].i = icmp ult i32 %[[R:.+]].0.i, 32 + // CHECK: %[[R:.+]].1.i = extractvalue { i32, i1 } %[[R:.+]], 1 + // CHECK: %[[R:.+]].i = xor i1 %[[R:.+]].1.i, true + // CHECK: %fine.i = and i1 %[[R:.+]].i, %[[R:.+]].i + // CHECK: %[[R:.+]].i = zext i1 %fine.i to i32 + // CHECK: %[[R:.+]] = and i32 %[[R:.+]].0.i, 31 + // CHECK: %[[R:.+]] = shl nuw i32 %[[R:.+]].i, %1 + // CHECK: ret i32 %[[R:.+]] + 32i32.pow(exp) +} diff --git a/tests/ui/consts/is_val_statically_known.rs b/tests/ui/consts/is_val_statically_known.rs new file mode 100644 index 0000000000000..b0565842eb4e2 --- /dev/null +++ b/tests/ui/consts/is_val_statically_known.rs @@ -0,0 +1,15 @@ +// run-pass + +#![feature(core_intrinsics)] +#![feature(is_val_statically_known)] + +use std::intrinsics::is_val_statically_known; + +const CONST_TEST: bool = unsafe { is_val_statically_known(0) }; + +fn main() { + if CONST_TEST { + unreachable!("currently expected to return false during const eval"); + // but note that this is not a guarantee! + } +} From 971e37ff7e80cf2dbf4f95162d5957913803f30d Mon Sep 17 00:00:00 2001 From: Nicholas Thompson Date: Tue, 23 Jan 2024 12:02:31 -0500 Subject: [PATCH 2/3] Further Implement `is_val_statically_known` --- .../src/intrinsics/mod.rs | 6 ++++ compiler/rustc_codegen_gcc/src/context.rs | 7 +++-- .../rustc_codegen_gcc/src/intrinsic/mod.rs | 6 ++++ library/core/src/intrinsics.rs | 30 ++++++++++++++----- tests/codegen/is_val_statically_known.rs | 4 +-- 5 files changed, 39 insertions(+), 14 deletions(-) diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs index 15249402a63e7..f328ad93d260b 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs @@ -439,6 +439,12 @@ fn codegen_regular_intrinsic_call<'tcx>( ret.write_cvalue(fx, a); } + sym::is_val_statically_known => { + intrinsic_args!(fx, args => (_a); intrinsic); + + let res = fx.bcx.ins().iconst(types::I8, 0); + ret.write_cvalue(fx, CValue::by_val(res, ret.layout())); + } sym::breakpoint => { intrinsic_args!(fx, args => (); intrinsic); diff --git a/compiler/rustc_codegen_gcc/src/context.rs b/compiler/rustc_codegen_gcc/src/context.rs index 053f759329fb1..5760d96165ddf 100644 --- a/compiler/rustc_codegen_gcc/src/context.rs +++ b/compiler/rustc_codegen_gcc/src/context.rs @@ -196,15 +196,16 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { let mut functions = FxHashMap::default(); let builtins = [ - "__builtin_unreachable", "abort", "__builtin_expect", "__builtin_add_overflow", "__builtin_mul_overflow", - "__builtin_saddll_overflow", /*"__builtin_sadd_overflow",*/ "__builtin_smulll_overflow", /*"__builtin_smul_overflow",*/ + "__builtin_unreachable", "abort", "__builtin_expect", /*"__builtin_expect_with_probability",*/ + "__builtin_constant_p", "__builtin_add_overflow", "__builtin_mul_overflow", "__builtin_saddll_overflow", + /*"__builtin_sadd_overflow",*/ "__builtin_smulll_overflow", /*"__builtin_smul_overflow",*/ "__builtin_ssubll_overflow", /*"__builtin_ssub_overflow",*/ "__builtin_sub_overflow", "__builtin_uaddll_overflow", "__builtin_uadd_overflow", "__builtin_umulll_overflow", "__builtin_umul_overflow", "__builtin_usubll_overflow", "__builtin_usub_overflow", "sqrtf", "sqrt", "__builtin_powif", "__builtin_powi", "sinf", "sin", "cosf", "cos", "powf", "pow", "expf", "exp", "exp2f", "exp2", "logf", "log", "log10f", "log10", "log2f", "log2", "fmaf", "fma", "fabsf", "fabs", "fminf", "fmin", "fmaxf", "fmax", "copysignf", "copysign", "floorf", "floor", "ceilf", "ceil", "truncf", "trunc", "rintf", "rint", "nearbyintf", "nearbyint", "roundf", "round", - "__builtin_expect_with_probability", + ]; for builtin in builtins.iter() { diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs index 85b891fce3e42..eac8cb437794b 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs @@ -123,6 +123,12 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { sym::unlikely => { self.expect(args[0].immediate(), false) } + sym::is_val_statically_known => { + let a = args[0].immediate(); + let builtin = self.context.get_builtin_function("__builtin_constant_p"); + let res = self.context.new_call(None, builtin, &[a]); + self.icmp(IntPredicate::IntEQ, res, self.const_i32(0)) + } kw::Try => { try_intrinsic( self, diff --git a/library/core/src/intrinsics.rs b/library/core/src/intrinsics.rs index b49cdcf2f3623..5b652f9991105 100644 --- a/library/core/src/intrinsics.rs +++ b/library/core/src/intrinsics.rs @@ -2525,15 +2525,30 @@ extern "rust-intrinsic" { /// or `false`, and the caller has to ensure sound behavior for both cases. /// In other words, the following code has *Undefined Behavior*: /// - /// ```rust - /// if !is_val_statically_known(0) { unreachable_unchecked(); } + /// ``` + /// #![feature(is_val_statically_known)] + /// #![feature(core_intrinsics)] + /// # #![allow(internal_features)] + /// use std::hint::unreachable_unchecked; + /// use std::intrinsics::is_val_statically_known; + /// + /// unsafe { + /// if !is_val_statically_known(0) { unreachable_unchecked(); } + /// } /// ``` /// /// This also means that the following code's behavior is unspecified; it /// may panic, or it may not: /// - /// ```rust,no_run - /// assert_eq!(is_val_statically_known(0), black_box(is_val_statically_known(0))) + /// ```no_run + /// #![feature(is_val_statically_known)] + /// #![feature(core_intrinsics)] + /// # #![allow(internal_features)] + /// use std::intrinsics::is_val_statically_known; + /// + /// unsafe { + /// assert_eq!(is_val_statically_known(0), is_val_statically_known(0)); + /// } /// ``` /// /// Unsafe code may not rely on `is_val_statically_known` returning any @@ -2547,15 +2562,14 @@ extern "rust-intrinsic" { #[rustc_const_unstable(feature = "is_val_statically_known", issue = "none")] #[rustc_nounwind] #[cfg(not(bootstrap))] - pub fn is_val_statically_known(arg: T) -> bool; + pub fn is_val_statically_known(arg: T) -> bool; } // FIXME: Seems using `unstable` here completely ignores `rustc_allow_const_fn_unstable` // and thus compiling stage0 core doesn't work. -#[rustc_const_stable(feature = "is_val_statically_known", since = "never")] +#[rustc_const_stable(feature = "is_val_statically_known", since = "0.0.0")] #[cfg(bootstrap)] -pub const unsafe fn is_val_statically_known(t: T) -> bool { - mem::forget(t); +pub const unsafe fn is_val_statically_known(_arg: T) -> bool { false } diff --git a/tests/codegen/is_val_statically_known.rs b/tests/codegen/is_val_statically_known.rs index 4dcab7442356b..44187d4f667dd 100644 --- a/tests/codegen/is_val_statically_known.rs +++ b/tests/codegen/is_val_statically_known.rs @@ -1,6 +1,4 @@ -// #[cfg(bootstrap)] -// ignore-stage1 -// compile-flags: --crate-type=lib -Zmerge-functions=disabled +// compile-flags: --crate-type=lib -Zmerge-functions=disabled -O #![feature(core_intrinsics)] From 9dccd5dce11f2fcce81fc77f4271eaf2359e2626 Mon Sep 17 00:00:00 2001 From: Nicholas Thompson Date: Tue, 23 Jan 2024 12:03:50 -0500 Subject: [PATCH 3/3] Further Implement Power of Two Optimization --- library/core/src/num/int_macros.rs | 259 ++++++++++++++++++++-------- library/core/src/num/uint_macros.rs | 211 ++++++++++++++-------- tests/codegen/pow_of_two.rs | 65 +++---- 3 files changed, 350 insertions(+), 185 deletions(-) diff --git a/library/core/src/num/int_macros.rs b/library/core/src/num/int_macros.rs index a36747830c782..11b9f9e8f4e4d 100644 --- a/library/core/src/num/int_macros.rs +++ b/library/core/src/num/int_macros.rs @@ -901,26 +901,59 @@ macro_rules! int_impl { #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")] #[must_use = "this returns the result of the operation, \ without modifying the original"] + #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)] #[inline] pub const fn checked_pow(self, mut exp: u32) -> Option { - if exp == 0 { - return Some(1); - } - let mut base = self; - let mut acc: Self = 1; + // SAFETY: This path has the same behavior as the other. + if unsafe { intrinsics::is_val_statically_known(self) } + && self.unsigned_abs().is_power_of_two() + { + if self == 1 { // Avoid divide by zero + return Some(1); + } + if self == -1 { // Avoid divide by zero + return Some(if exp & 1 != 0 { -1 } else { 1 }); + } + // SAFETY: We just checked this is a power of two. and above zero. + let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 }; + if exp > Self::BITS / power_used { return None; } // Division of constants is free + + // SAFETY: exp <= Self::BITS / power_used + let res = unsafe { intrinsics::unchecked_shl( + 1 as Self, + intrinsics::unchecked_mul(power_used, exp) as Self + )}; + // LLVM doesn't always optimize out the checks + // at the ir level. + + let sign = self.is_negative() && exp & 1 != 0; + if !sign && res == Self::MIN { + None + } else if sign { + Some(res.wrapping_neg()) + } else { + Some(res) + } + } else { + if exp == 0 { + return Some(1); + } + let mut base = self; + let mut acc: Self = 1; - while exp > 1 { - if (exp & 1) == 1 { - acc = try_opt!(acc.checked_mul(base)); + while exp > 1 { + if (exp & 1) == 1 { + acc = try_opt!(acc.checked_mul(base)); + } + exp /= 2; + base = try_opt!(base.checked_mul(base)); } - exp /= 2; - base = try_opt!(base.checked_mul(base)); + // since exp!=0, finally the exp must be 1. + // Deal with the final bit of the exponent separately, since + // squaring the base afterwards is not necessary and may cause a + // needless overflow. + acc.checked_mul(base) } - // since exp!=0, finally the exp must be 1. - // Deal with the final bit of the exponent separately, since - // squaring the base afterwards is not necessary and may cause a - // needless overflow. - acc.checked_mul(base) } /// Returns the square root of the number, rounded down. @@ -1537,27 +1570,58 @@ macro_rules! int_impl { #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")] #[must_use = "this returns the result of the operation, \ without modifying the original"] + #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)] #[inline] pub const fn wrapping_pow(self, mut exp: u32) -> Self { - if exp == 0 { - return 1; - } - let mut base = self; - let mut acc: Self = 1; + // SAFETY: This path has the same behavior as the other. + if unsafe { intrinsics::is_val_statically_known(self) } + && self.unsigned_abs().is_power_of_two() + { + if self == 1 { // Avoid divide by zero + return 1; + } + if self == -1 { // Avoid divide by zero + return if exp & 1 != 0 { -1 } else { 1 }; + } + // SAFETY: We just checked this is a power of two. and above zero. + let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 }; + if exp > Self::BITS / power_used { return 0; } // Division of constants is free + + // SAFETY: exp <= Self::BITS / power_used + let res = unsafe { intrinsics::unchecked_shl( + 1 as Self, + intrinsics::unchecked_mul(power_used, exp) as Self + )}; + // LLVM doesn't always optimize out the checks + // at the ir level. + + let sign = self.is_negative() && exp & 1 != 0; + if sign { + res.wrapping_neg() + } else { + res + } + } else { + if exp == 0 { + return 1; + } + let mut base = self; + let mut acc: Self = 1; - while exp > 1 { - if (exp & 1) == 1 { - acc = acc.wrapping_mul(base); + while exp > 1 { + if (exp & 1) == 1 { + acc = acc.wrapping_mul(base); + } + exp /= 2; + base = base.wrapping_mul(base); } - exp /= 2; - base = base.wrapping_mul(base); - } - // since exp!=0, finally the exp must be 1. - // Deal with the final bit of the exponent separately, since - // squaring the base afterwards is not necessary and may cause a - // needless overflow. - acc.wrapping_mul(base) + // since exp!=0, finally the exp must be 1. + // Deal with the final bit of the exponent separately, since + // squaring the base afterwards is not necessary and may cause a + // needless overflow. + acc.wrapping_mul(base) + } } /// Calculates `self` + `rhs` @@ -2039,36 +2103,68 @@ macro_rules! int_impl { #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")] #[must_use = "this returns the result of the operation, \ without modifying the original"] + #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)] #[inline] pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) { - if exp == 0 { - return (1,false); - } - let mut base = self; - let mut acc: Self = 1; - let mut overflown = false; - // Scratch space for storing results of overflowing_mul. - let mut r; - - while exp > 1 { - if (exp & 1) == 1 { - r = acc.overflowing_mul(base); - acc = r.0; + // SAFETY: This path has the same behavior as the other. + if unsafe { intrinsics::is_val_statically_known(self) } + && self.unsigned_abs().is_power_of_two() + { + if self == 1 { // Avoid divide by zero + return (1, false); + } + if self == -1 { // Avoid divide by zero + return (if exp & 1 != 0 { -1 } else { 1 }, false); + } + // SAFETY: We just checked this is a power of two. and above zero. + let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 }; + if exp > Self::BITS / power_used { return (0, true); } // Division of constants is free + + // SAFETY: exp <= Self::BITS / power_used + let res = unsafe { intrinsics::unchecked_shl( + 1 as Self, + intrinsics::unchecked_mul(power_used, exp) as Self + )}; + // LLVM doesn't always optimize out the checks + // at the ir level. + + let sign = self.is_negative() && exp & 1 != 0; + let overflow = res == Self::MIN; + if sign { + (res.wrapping_neg(), overflow) + } else { + (res, overflow) + } + } else { + if exp == 0 { + return (1,false); + } + let mut base = self; + let mut acc: Self = 1; + let mut overflown = false; + // Scratch space for storing results of overflowing_mul. + let mut r; + + while exp > 1 { + if (exp & 1) == 1 { + r = acc.overflowing_mul(base); + acc = r.0; + overflown |= r.1; + } + exp /= 2; + r = base.overflowing_mul(base); + base = r.0; overflown |= r.1; } - exp /= 2; - r = base.overflowing_mul(base); - base = r.0; - overflown |= r.1; - } - // since exp!=0, finally the exp must be 1. - // Deal with the final bit of the exponent separately, since - // squaring the base afterwards is not necessary and may cause a - // needless overflow. - r = acc.overflowing_mul(base); - r.1 |= overflown; - r + // since exp!=0, finally the exp must be 1. + // Deal with the final bit of the exponent separately, since + // squaring the base afterwards is not necessary and may cause a + // needless overflow. + r = acc.overflowing_mul(base); + r.1 |= overflown; + r + } } /// Raises self to the power of `exp`, using exponentiation by squaring. @@ -2086,30 +2182,47 @@ macro_rules! int_impl { #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")] #[must_use = "this returns the result of the operation, \ without modifying the original"] + #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)] #[inline] #[rustc_inherit_overflow_checks] - #[rustc_allow_const_fn_unstable(is_val_statically_known)] + #[track_caller] // Hides the hackish overflow check for powers of two. pub const fn pow(self, mut exp: u32) -> Self { // SAFETY: This path has the same behavior as the other. if unsafe { intrinsics::is_val_statically_known(self) } - && self > 0 - && (self & (self - 1) == 0) + && self.unsigned_abs().is_power_of_two() { - let power_used = match self.checked_ilog2() { - Some(v) => v, - // SAFETY: We just checked this is a power of two. and above zero. - None => unsafe { core::hint::unreachable_unchecked() }, - }; - // So it panics. Have to use `overflowing_mul` to efficiently set the - // result to 0 if not. - #[cfg(debug_assertions)] - { - _ = power_used * exp; + if self == 1 { // Avoid divide by zero + return 1; + } + if self == -1 { // Avoid divide by zero + return if exp & 1 != 0 { -1 } else { 1 }; + } + // SAFETY: We just checked this is a power of two. and above zero. + let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 }; + if exp > Self::BITS / power_used { // Division of constants is free + #[allow(arithmetic_overflow)] + return Self::MAX * Self::MAX * 0; + } + + // SAFETY: exp <= Self::BITS / power_used + let res = unsafe { intrinsics::unchecked_shl( + 1 as Self, + intrinsics::unchecked_mul(power_used, exp) as Self + )}; + // LLVM doesn't always optimize out the checks + // at the ir level. + + let sign = self.is_negative() && exp & 1 != 0; + #[allow(arithmetic_overflow)] + if !sign && res == Self::MIN { + // So it panics. + _ = Self::MAX * Self::MAX; + } + if sign { + res.wrapping_neg() + } else { + res } - let (num_shl, overflowed) = power_used.overflowing_mul(exp); - let fine = !overflowed - & (num_shl < (mem::size_of::() * 8) as u32); - (1 << num_shl) * fine as Self } else { if exp == 0 { return 1; diff --git a/library/core/src/num/uint_macros.rs b/library/core/src/num/uint_macros.rs index c5b39d8c4e874..9f6ee43f8b95f 100644 --- a/library/core/src/num/uint_macros.rs +++ b/library/core/src/num/uint_macros.rs @@ -1005,28 +1005,49 @@ macro_rules! uint_impl { #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")] #[must_use = "this returns the result of the operation, \ without modifying the original"] + #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)] #[inline] pub const fn checked_pow(self, mut exp: u32) -> Option { - if exp == 0 { - return Some(1); - } - let mut base = self; - let mut acc: Self = 1; + // SAFETY: This path has the same behavior as the other. + if unsafe { intrinsics::is_val_statically_known(self) } + && self.is_power_of_two() + { + if self == 1 { // Avoid divide by zero + return Some(1); + } + // SAFETY: We just checked this is a power of two. and above zero. + let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 }; + if exp > Self::BITS / power_used { return None; } // Division of constants is free + + // SAFETY: exp <= Self::BITS / power_used + unsafe { Some(intrinsics::unchecked_shl( + 1 as Self, + intrinsics::unchecked_mul(power_used, exp) as Self + )) } + // LLVM doesn't always optimize out the checks + // at the ir level. + } else { + if exp == 0 { + return Some(1); + } + let mut base = self; + let mut acc: Self = 1; - while exp > 1 { - if (exp & 1) == 1 { - acc = try_opt!(acc.checked_mul(base)); + while exp > 1 { + if (exp & 1) == 1 { + acc = try_opt!(acc.checked_mul(base)); + } + exp /= 2; + base = try_opt!(base.checked_mul(base)); } - exp /= 2; - base = try_opt!(base.checked_mul(base)); - } - // since exp!=0, finally the exp must be 1. - // Deal with the final bit of the exponent separately, since - // squaring the base afterwards is not necessary and may cause a - // needless overflow. + // since exp!=0, finally the exp must be 1. + // Deal with the final bit of the exponent separately, since + // squaring the base afterwards is not necessary and may cause a + // needless overflow. - acc.checked_mul(base) + acc.checked_mul(base) + } } /// Saturating integer addition. Computes `self + rhs`, saturating at @@ -1475,27 +1496,48 @@ macro_rules! uint_impl { #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")] #[must_use = "this returns the result of the operation, \ without modifying the original"] + #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)] #[inline] pub const fn wrapping_pow(self, mut exp: u32) -> Self { - if exp == 0 { - return 1; - } - let mut base = self; - let mut acc: Self = 1; + // SAFETY: This path has the same behavior as the other. + if unsafe { intrinsics::is_val_statically_known(self) } + && self.is_power_of_two() + { + if self == 1 { // Avoid divide by zero + return 1; + } + // SAFETY: We just checked this is a power of two. and above zero. + let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 }; + if exp > Self::BITS / power_used { return 0; } // Division of constants is free + + // SAFETY: exp <= Self::BITS / power_used + unsafe { intrinsics::unchecked_shl( + 1 as Self, + intrinsics::unchecked_mul(power_used, exp) as Self + )} + // LLVM doesn't always optimize out the checks + // at the ir level. + } else { + if exp == 0 { + return 1; + } + let mut base = self; + let mut acc: Self = 1; - while exp > 1 { - if (exp & 1) == 1 { - acc = acc.wrapping_mul(base); + while exp > 1 { + if (exp & 1) == 1 { + acc = acc.wrapping_mul(base); + } + exp /= 2; + base = base.wrapping_mul(base); } - exp /= 2; - base = base.wrapping_mul(base); - } - // since exp!=0, finally the exp must be 1. - // Deal with the final bit of the exponent separately, since - // squaring the base afterwards is not necessary and may cause a - // needless overflow. - acc.wrapping_mul(base) + // since exp!=0, finally the exp must be 1. + // Deal with the final bit of the exponent separately, since + // squaring the base afterwards is not necessary and may cause a + // needless overflow. + acc.wrapping_mul(base) + } } /// Calculates `self` + `rhs` @@ -1925,37 +1967,58 @@ macro_rules! uint_impl { #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")] #[must_use = "this returns the result of the operation, \ without modifying the original"] + #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)] #[inline] pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) { - if exp == 0{ - return (1,false); - } - let mut base = self; - let mut acc: Self = 1; - let mut overflown = false; - // Scratch space for storing results of overflowing_mul. - let mut r; - - while exp > 1 { - if (exp & 1) == 1 { - r = acc.overflowing_mul(base); - acc = r.0; + // SAFETY: This path has the same behavior as the other. + if unsafe { intrinsics::is_val_statically_known(self) } + && self.is_power_of_two() + { + if self == 1 { // Avoid divide by zero + return (1, false); + } + // SAFETY: We just checked this is a power of two. and above zero. + let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 }; + if exp > Self::BITS / power_used { return (0, true); } // Division of constants is free + + // SAFETY: exp <= Self::BITS / power_used + unsafe { (intrinsics::unchecked_shl( + 1 as Self, + intrinsics::unchecked_mul(power_used, exp) as Self + ), false) } + // LLVM doesn't always optimize out the checks + // at the ir level. + } else { + if exp == 0{ + return (1,false); + } + let mut base = self; + let mut acc: Self = 1; + let mut overflown = false; + // Scratch space for storing results of overflowing_mul. + let mut r; + + while exp > 1 { + if (exp & 1) == 1 { + r = acc.overflowing_mul(base); + acc = r.0; + overflown |= r.1; + } + exp /= 2; + r = base.overflowing_mul(base); + base = r.0; overflown |= r.1; } - exp /= 2; - r = base.overflowing_mul(base); - base = r.0; - overflown |= r.1; - } - // since exp!=0, finally the exp must be 1. - // Deal with the final bit of the exponent separately, since - // squaring the base afterwards is not necessary and may cause a - // needless overflow. - r = acc.overflowing_mul(base); - r.1 |= overflown; + // since exp!=0, finally the exp must be 1. + // Deal with the final bit of the exponent separately, since + // squaring the base afterwards is not necessary and may cause a + // needless overflow. + r = acc.overflowing_mul(base); + r.1 |= overflown; - r + r + } } /// Raises self to the power of `exp`, using exponentiation by squaring. @@ -1971,9 +2034,10 @@ macro_rules! uint_impl { #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")] #[must_use = "this returns the result of the operation, \ without modifying the original"] + #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)] #[inline] #[rustc_inherit_overflow_checks] - #[rustc_allow_const_fn_unstable(is_val_statically_known)] + #[track_caller] // Hides the hackish overflow check for powers of two. pub const fn pow(self, mut exp: u32) -> Self { // LLVM now knows that `self` is a constant value, but not a // constant in Rust. This allows us to compute the power used at @@ -1990,22 +2054,23 @@ macro_rules! uint_impl { if unsafe { intrinsics::is_val_statically_known(self) } && self.is_power_of_two() { - let power_used = match self.checked_ilog2() { - Some(v) => v, - // SAFETY: We just checked this is a power of two. `0` is not a - // power of two. - None => unsafe { core::hint::unreachable_unchecked() }, - }; - // So it panics. Have to use `overflowing_mul` to efficiently set the - // result to 0 if not. - #[cfg(debug_assertions)] - { - _ = power_used * exp; + if self == 1 { // Avoid divide by zero + return 1; } - let (num_shl, overflowed) = power_used.overflowing_mul(exp); - let fine = !overflowed - & (num_shl < (mem::size_of::() * 8) as u32); - (1 << num_shl) * fine as Self + // SAFETY: We just checked this is a power of two. and above zero. + let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 }; + if exp > Self::BITS / power_used { // Division of constants is free + #[allow(arithmetic_overflow)] + return Self::MAX * Self::MAX * 0; + } + + // SAFETY: exp <= Self::BITS / power_used + unsafe { intrinsics::unchecked_shl( + 1 as Self, + intrinsics::unchecked_mul(power_used, exp) as Self + )} + // LLVM doesn't always optimize out the checks + // at the ir level. } else { if exp == 0 { return 1; diff --git a/tests/codegen/pow_of_two.rs b/tests/codegen/pow_of_two.rs index 3bce5535c66ec..a8c0550e33263 100644 --- a/tests/codegen/pow_of_two.rs +++ b/tests/codegen/pow_of_two.rs @@ -1,68 +1,55 @@ -// #[cfg(bootstrap)] -// ignore-stage1 -// compile-flags: --crate-type=lib -Zmerge-functions=disabled +// compile-flags: --crate-type=lib -Zmerge-functions=disabled -O -C overflow-checks=false // CHECK-LABEL: @a( #[no_mangle] pub fn a(exp: u32) -> u64 { - // CHECK: %[[R:.+]] = and i32 %exp, 63 - // CHECK: %[[R:.+]] = zext i32 %[[R:.+]] to i64 - // CHECK: %[[R:.+]] = shl nuw i64 %[[R:.+]].i, %[[R:.+]] - // CHECK: ret i64 %[[R:.+]] + // CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 64 + // CHECK: %{{[^ ]+}} = zext i32 %exp to i64 + // CHECK: %{{[^ ]+}} = shl nuw i64 {{[^ ]+}}, %{{[^ ]+}} + // CHECK: ret i64 %{{[^ ]+}} 2u64.pow(exp) } +// CHECK-LABEL: @b( #[no_mangle] pub fn b(exp: u32) -> i64 { - // CHECK: %[[R:.+]] = and i32 %exp, 63 - // CHECK: %[[R:.+]] = zext i32 %[[R:.+]] to i64 - // CHECK: %[[R:.+]] = shl nuw i64 %[[R:.+]].i, %[[R:.+]] - // CHECK: ret i64 %[[R:.+]] + // CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 64 + // CHECK: %{{[^ ]+}} = zext i32 %exp to i64 + // CHECK: %{{[^ ]+}} = shl nuw i64 {{[^ ]+}}, %{{[^ ]+}} + // CHECK: ret i64 %{{[^ ]+}} 2i64.pow(exp) } // CHECK-LABEL: @c( #[no_mangle] pub fn c(exp: u32) -> u32 { - // CHECK: %[[R:.+]].0.i = shl i32 %exp, 1 - // CHECK: %[[R:.+]].1.i = icmp sgt i32 %exp, -1 - // CHECK: %[[R:.+]].i = icmp ult i32 %[[R:.+]].0.i, 32 - // CHECK: %fine.i = and i1 %[[R:.+]].1.i, %[[R:.+]].i - // CHECK: %0 = and i32 %[[R:.+]].0.i, 30 - // CHECK: %[[R:.+]].i = zext i1 %fine.i to i32 - // CHECK: %[[R:.+]] = shl nuw nsw i32 %[[R:.+]].i, %0 - // CHECK: ret i32 %[[R:.+]] + // CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 16 + // CHECK: %{{[^ ]+}} = shl nuw nsw i32 %exp, 1 + // CHECK: %{{[^ ]+}} = shl nuw i32 1, %{{[^ ]+}} + // CHECK: %{{[^ ]+}} = select i1 %{{[^ ]+}}, i32 0, i32 %{{[^ ]+}} + // CHECK: ret i32 %{{[^ ]+}} 4u32.pow(exp) } // CHECK-LABEL: @d( #[no_mangle] pub fn d(exp: u32) -> u32 { - // CHECK: tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %exp, i32 5) - // CHECK: %[[R:.+]].0.i = extractvalue { i32, i1 } %[[R:.+]], 0 - // CHECK: %[[R:.+]].1.i = extractvalue { i32, i1 } %[[R:.+]], 1 - // CHECK: %[[R:.+]].i = xor i1 %[[R:.+]].1.i, true - // CHECK: %[[R:.+]].i = icmp ult i32 %[[R:.+]].0.i, 32 - // CHECK: %fine.i = and i1 %[[R:.+]].i, %[[R:.+]].i - // CHECK: %[[R:.+]] = and i32 %[[R:.+]].0.i, 31 - // CHECK: %[[R:.+]].i = zext i1 %fine.i to i32 - // CHECK: %[[R:.+]] = shl nuw i32 %[[R:.+]].i, %1 - // CHECK: ret i32 %[[R:.+]] + // CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 6 + // CHECK: %{{[^ ]+}} = mul nuw nsw i32 %exp, 5 + // CHECK: %{{[^ ]+}} = shl nuw nsw i32 1, %{{[^ ]+}} + // CHECK: %{{[^ ]+}} = select i1 {{[^ ]+}}, i32 0, i32 %{{[^ ]+}} + // CHECK: ret i32 %{{[^ ]+}} 32u32.pow(exp) } // CHECK-LABEL: @e( #[no_mangle] pub fn e(exp: u32) -> i32 { - // CHECK: tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %exp, i32 5) - // CHECK: %[[R:.+]].0.i = extractvalue { i32, i1 } %[[R:.+]], 0 - // CHECK: %[[R:.+]].i = icmp ult i32 %[[R:.+]].0.i, 32 - // CHECK: %[[R:.+]].1.i = extractvalue { i32, i1 } %[[R:.+]], 1 - // CHECK: %[[R:.+]].i = xor i1 %[[R:.+]].1.i, true - // CHECK: %fine.i = and i1 %[[R:.+]].i, %[[R:.+]].i - // CHECK: %[[R:.+]].i = zext i1 %fine.i to i32 - // CHECK: %[[R:.+]] = and i32 %[[R:.+]].0.i, 31 - // CHECK: %[[R:.+]] = shl nuw i32 %[[R:.+]].i, %1 - // CHECK: ret i32 %[[R:.+]] + // CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 6 + // CHECK: %{{[^ ]+}} = mul nuw {{(nsw )?}}i32 %exp, 5 + // CHECK: %{{[^ ]+}} = shl nuw {{(nsw )?}}i32 1, %{{[^ ]+}} + // CHECK: %{{[^ ]+}} = select i1 {{[^ ]+}}, i32 0, i32 %{{[^ ]+}} + // CHECK: ret i32 %{{[^ ]+}} 32i32.pow(exp) } +// note: d and e are expected to yield the same IR