Skip to content

Commit

Permalink
Fix performance for CUDA 9.2 / 10.0 (#1281)
Browse files Browse the repository at this point in the history
- make constexpr compiler dependent whenever possible (replace with macro `GT_CONSTEXPR`)
- remove `const_expr::move` and `const_expr::forward`
- add `wstd::move` and `wstd::forward` (move and forward are only constexpr if compiler does not suffer from it)
  • Loading branch information
lukasm91 authored and anstaf committed May 3, 2019
1 parent 8a61e67 commit 9fa2862
Show file tree
Hide file tree
Showing 134 changed files with 3,976 additions and 4,032 deletions.
12 changes: 6 additions & 6 deletions docs_src/manuals/user_manual/storages.hrst
Original file line number Diff line number Diff line change
Expand Up @@ -150,17 +150,17 @@ is surrounded by a halo region (green).

**Interface**: A ``storage_info`` object provides methods for querying the meta data.

* ``template <uint_t D> constexpr uint_t total_length() const``: retrieve the total number of data points in dimension
* ``template <uint_t D> uint_t total_length() const``: retrieve the total number of data points in dimension
``D`` dimensions
* ``template <int D> constexpr int stride() const``: retrieve the stride in dimension ``D``
* ``template <int D> int stride() const``: retrieve the stride in dimension ``D``
* ``const array<uint_t, ndims> &total_lengths() const``: return the array of total number of data points in each
direction
* ``const array<uint_t, ndims> &strides() const``: return the array of (aligned) strides.
* ``template <uint_t D> constexpr uint_t begin() const``: retrieve the position of the first non halo point in dimension `D`
* ``template <uint_t D> constexpr uint_t end() const``: retrieve the position of the last non halo point in dimension `D`
* ``template <uint_t D> constexpr uint_t total_begin() const``: retrieve the position of the first point (can also be a
* ``template <uint_t D> uint_t begin() const``: retrieve the position of the first non halo point in dimension `D`
* ``template <uint_t D> uint_t end() const``: retrieve the position of the last non halo point in dimension `D`
* ``template <uint_t D> uint_t total_begin() const``: retrieve the position of the first point (can also be a
halo point) in dimension ``D`` (always ``0``)
* ``template <uint_t D> constexpr uint_t total_end() const``: retrieve the position of the last point (can also be a
* ``template <uint_t D> uint_t total_end() const``: retrieve the position of the last point (can also be a
halo point) in dimension ``D``

.. _data-store:
Expand Down
8 changes: 4 additions & 4 deletions include/gridtools/c_bindings/function_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ namespace gridtools {
template <class T,
typename std::enable_if<std::is_class<typename std::remove_reference<T>::type>::value, int>::type = 0>
gt_handle *convert_to_c(T &&obj) {
return new gt_handle{std::forward<T>(obj)};
return new gt_handle{wstd::forward<T>(obj)};
}

template <class T>
Expand Down Expand Up @@ -160,13 +160,13 @@ namespace gridtools {

/// Wrap the functor of type `Impl` to another functor that can be invoked with the 'wrapped_t<T>' signature.
template <class T, class Impl>
constexpr _impl::wrapped_f<T, typename std::decay<Impl>::type> wrap(Impl &&obj) {
return {std::forward<Impl>(obj)};
GT_CONSTEXPR _impl::wrapped_f<T, typename std::decay<Impl>::type> wrap(Impl &&obj) {
return {wstd::forward<Impl>(obj)};
}

/// Specialization for function pointers.
template <class T>
constexpr _impl::wrapped_f<T, T *> wrap(T *obj) {
GT_CONSTEXPR _impl::wrapped_f<T, T *> wrap(T *obj) {
return {obj};
}
} // namespace c_bindings
Expand Down
4 changes: 2 additions & 2 deletions include/gridtools/c_bindings/generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ namespace gridtools {
void for_each_param(TypeToStr &&type_to_str, Fun &&fun) {
int count = 0;
for_each_type<Params>(for_each_param_helper_f<TypeToStr, Fun>{
std::forward<TypeToStr>(type_to_str), std::forward<Fun>(fun), count});
wstd::forward<TypeToStr>(type_to_str), wstd::forward<Fun>(fun), count});
};

template <class CSignature>
Expand Down Expand Up @@ -439,7 +439,7 @@ namespace gridtools {
get_entities<Traits>().add(name,
std::bind(Traits::template generate_entity<Signature>,
std::placeholders::_1,
std::forward<Params>(params)...));
wstd::forward<Params>(params)...));
}

template <class CSignature>
Expand Down
7 changes: 4 additions & 3 deletions include/gridtools/common/any_moveable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <utility>

#include "defs.hpp"
#include "generic_metafunctions/utility.hpp"

namespace gridtools {

Expand All @@ -36,7 +37,7 @@ namespace gridtools {
struct impl : iface {
T m_obj;
impl(T const &obj) : m_obj(obj) {}
impl(T &&obj) : m_obj(std::move(obj)) {}
impl(T &&obj) : m_obj(wstd::move(obj)) {}
std::type_info const &type() const noexcept override { return typeid(T); }
};
std::unique_ptr<iface> m_impl;
Expand All @@ -45,12 +46,12 @@ namespace gridtools {
any_moveable() = default;

template <class Arg, class Decayed = typename std::decay<Arg>::type>
any_moveable(Arg &&arg) : m_impl(new impl<Decayed>(std::forward<Arg>(arg))) {}
any_moveable(Arg &&arg) : m_impl(new impl<Decayed>(wstd::forward<Arg>(arg))) {}
any_moveable(any_moveable &&) = default;

template <class Arg, class Decayed = typename std::decay<Arg>::type>
any_moveable &operator=(Arg &&obj) {
m_impl.reset(new impl<Decayed>(std::forward<Arg>(obj)));
m_impl.reset(new impl<Decayed>(wstd::forward<Arg>(obj)));
return *this;
}
any_moveable &operator=(any_moveable &&) = default;
Expand Down
20 changes: 10 additions & 10 deletions include/gridtools/common/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,12 @@ namespace gridtools {
T *end() { return &m_array[D]; }

GT_FUNCTION
constexpr const T *data() const noexcept { return m_array; }
GT_CONSTEXPR const T *data() const noexcept { return m_array; }
GT_FUNCTION
T *data() noexcept { return m_array; }

GT_FUNCTION
constexpr T const &operator[](size_t i) const { return m_array[i]; }
GT_CONSTEXPR T const &operator[](size_t i) const { return m_array[i]; }

GT_FUNCTION
T &operator[](size_t i) {
Expand Down Expand Up @@ -111,21 +111,21 @@ namespace gridtools {

struct getter {
template <size_t I, typename T, size_t D>
static GT_FUNCTION constexpr T &get(array<T, D> &arr) noexcept {
static GT_FUNCTION GT_CONSTEXPR T &get(array<T, D> &arr) noexcept {
GT_STATIC_ASSERT(I < D, "index is out of bounds");
return arr.m_array[I];
}

template <size_t I, typename T, size_t D>
static GT_FUNCTION constexpr const T &get(const array<T, D> &arr) noexcept {
static GT_FUNCTION GT_CONSTEXPR const T &get(const array<T, D> &arr) noexcept {
GT_STATIC_ASSERT(I < D, "index is out of bounds");
return arr.m_array[I];
}

template <size_t I, typename T, size_t D>
static GT_FUNCTION constexpr T &&get(array<T, D> &&arr) noexcept {
static GT_FUNCTION GT_CONSTEXPR T &&get(array<T, D> &&arr) noexcept {
GT_STATIC_ASSERT(I < D, "index is out of bounds");
return const_expr::move(arr.m_array[I]);
return wstd::move(arr.m_array[I]);
}
};
} // namespace array_impl_
Expand Down Expand Up @@ -182,21 +182,21 @@ namespace gridtools {
};

template <size_t I, typename T, size_t D>
GT_FUNCTION constexpr T &get(array<T, D> &arr) noexcept {
GT_FUNCTION GT_CONSTEXPR T &get(array<T, D> &arr) noexcept {
GT_STATIC_ASSERT(I < D, "index is out of bounds");
return arr.m_array[I];
}

template <size_t I, typename T, size_t D>
GT_FUNCTION constexpr const T &get(const array<T, D> &arr) noexcept {
GT_FUNCTION GT_CONSTEXPR const T &get(const array<T, D> &arr) noexcept {
GT_STATIC_ASSERT(I < D, "index is out of bounds");
return arr.m_array[I];
}

template <size_t I, typename T, size_t D>
GT_FUNCTION constexpr T &&get(array<T, D> &&arr) noexcept {
GT_FUNCTION GT_CONSTEXPR T &&get(array<T, D> &&arr) noexcept {
GT_STATIC_ASSERT(I < D, "index is out of bounds");
return std::move(get<I>(arr));
return wstd::move(get<I>(arr));
}

/** @} */
Expand Down
5 changes: 2 additions & 3 deletions include/gridtools/common/array_dot_product.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ namespace gridtools {

namespace _impl {
template <typename T, typename U, size_t D, size_t... Is>
GT_FUNCTION constexpr auto dot_impl(
array<T, D> const &a, array<U, D> const &b, meta::integer_sequence<size_t, Is...>)
GT_FUNCTION GT_CONSTEXPR auto dot_impl(array<T, D> const &a, array<U, D> const &b, meta::integer_sequence<size_t, Is...>)
-> decltype(accumulate(plus_functor{}, (a[Is] * b[Is])...)) {
return accumulate(plus_functor{}, (a[Is] * b[Is])...);
}
Expand All @@ -47,7 +46,7 @@ namespace gridtools {
typename U,
size_t D,
typename std::enable_if<std::is_arithmetic<T>::value and std::is_arithmetic<U>::value, T>::type = 0>
GT_FUNCTION constexpr T array_dot_product(array<T, D> const &a, array<U, D> const &b) {
GT_FUNCTION GT_CONSTEXPR T array_dot_product(array<T, D> const &a, array<U, D> const &b) {
return _impl::dot_impl(a, b, meta::make_integer_sequence<size_t, D>{});
}

Expand Down
8 changes: 4 additions & 4 deletions include/gridtools/common/binops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ namespace gridtools {
namespace binop {
struct sum {
template <class Lhs, class Rhs>
GT_FUNCTION constexpr auto operator()(Lhs &&lhs, Rhs &&rhs) const
GT_AUTO_RETURN(const_expr::forward<Lhs>(lhs) + const_expr::forward<Rhs>(rhs));
GT_FUNCTION GT_CONSTEXPR auto operator()(Lhs &&lhs, Rhs &&rhs) const
GT_AUTO_RETURN(wstd::forward<Lhs>(lhs) + wstd::forward<Rhs>(rhs));
};

struct prod {
template <class Lhs, class Rhs>
GT_FUNCTION constexpr auto operator()(Lhs &&lhs, Rhs &&rhs) const
GT_AUTO_RETURN(const_expr::forward<Lhs>(lhs) * const_expr::forward<Rhs>(rhs));
GT_FUNCTION GT_CONSTEXPR auto operator()(Lhs &&lhs, Rhs &&rhs) const
GT_AUTO_RETURN(wstd::forward<Lhs>(lhs) * wstd::forward<Rhs>(rhs));
};
} // namespace binop
} // namespace gridtools
21 changes: 7 additions & 14 deletions include/gridtools/common/boollist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,25 +46,18 @@ namespace gridtools {
array<bool, I> m_value;

public:
GT_FUNCTION
constexpr uint_t const &size() const { return m_size; }
GT_CONSTEXPR GT_FUNCTION uint_t const &size() const { return m_size; }

GT_FUNCTION
constexpr bool const &value(uint_t const &id) const { return m_value[id]; }
GT_FUNCTION
constexpr array<bool, I> const &value() const { return m_value; }
GT_CONSTEXPR GT_FUNCTION bool const &value(uint_t const &id) const { return m_value[id]; }
GT_CONSTEXPR GT_FUNCTION array<bool, I> const &value() const { return m_value; }

GT_FUNCTION
boollist(bool v0) : m_value{v0} {}
GT_FUNCTION boollist(bool v0) : m_value{v0} {}

GT_FUNCTION
boollist(bool v0, bool v1) : m_value{v0, v1} {}
GT_FUNCTION boollist(bool v0, bool v1) : m_value{v0, v1} {}

GT_FUNCTION
boollist(bool v0, bool v1, bool v2) : m_value{v0, v1, v2} {}
GT_FUNCTION boollist(bool v0, bool v1, bool v2) : m_value{v0, v1, v2} {}

GT_FUNCTION
boollist(boollist const &bl) : m_value(bl.m_value) {}
GT_FUNCTION boollist(boollist const &bl) : m_value(bl.m_value) {}

GT_FUNCTION
void copy_out(bool *arr) const {
Expand Down
14 changes: 7 additions & 7 deletions include/gridtools/common/compose.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ namespace gridtools {
G m_g;

template <class... Args>
constexpr GT_TARGET GT_FORCE_INLINE auto operator()(Args &&... args) const
GT_AUTO_RETURN(m_f(m_g(const_expr::forward<Args>(args)...)));
GT_CONSTEXPR GT_TARGET GT_FORCE_INLINE auto operator()(Args &&... args) const
GT_AUTO_RETURN(m_f(m_g(wstd::forward<Args>(args)...)));
};

template <class F, class... Fs>
struct composed_f<F, Fs...> : composed_f<F, composed_f<Fs...>> {
constexpr GT_TARGET GT_FORCE_INLINE composed_f(F f, Fs... fs)
: composed_f<F, composed_f<Fs...>>{const_expr::move(f), {const_expr::move(fs)...}} {}
GT_CONSTEXPR GT_TARGET GT_FORCE_INLINE composed_f(F f, Fs... fs)
: composed_f<F, composed_f<Fs...>>{wstd::move(f), {wstd::move(fs)...}} {}
};
} // namespace compose_impl_

Expand All @@ -52,12 +52,12 @@ namespace gridtools {
/// compose(a, b, c)(x, y) <==> a(b(c(x, y)))
///
template <class... Funs>
constexpr GT_TARGET GT_FORCE_INLINE compose_impl_::composed_f<Funs...> compose(Funs && ... funs) {
return {const_expr::forward<Funs>(funs)...};
GT_CONSTEXPR GT_TARGET GT_FORCE_INLINE compose_impl_::composed_f<Funs...> compose(Funs && ... funs) {
return {wstd::forward<Funs>(funs)...};
}

template <class Fun>
constexpr GT_TARGET GT_FORCE_INLINE Fun compose(Fun && fun) {
GT_CONSTEXPR GT_TARGET GT_FORCE_INLINE Fun compose(Fun && fun) {
return fun;
}
}
Expand Down
8 changes: 7 additions & 1 deletion include/gridtools/common/defs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,17 @@
#endif
#endif

#ifdef __CUDA_ARCH__
#define GT_CONSTEXPR
#else
#define GT_CONSTEXPR constexpr
#endif

/**
* Macro to allow make functions constexpr in c++14 (in case they are not only a return statement)
*/
#if __cplusplus >= 201402L
#define GT_CXX14CONSTEXPR constexpr
#define GT_CXX14CONSTEXPR GT_CONSTEXPR
#else
#define GT_CXX14CONSTEXPR
#endif
Expand Down
9 changes: 5 additions & 4 deletions include/gridtools/common/functional.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include <utility>

#include "./generic_metafunctions/utility.hpp"
#include "./host_device.hpp"

#define GT_FILENAME <gridtools/common/functional.hpp>
Expand All @@ -46,8 +47,8 @@ namespace gridtools {
template <typename T>
struct ctor {
template <typename... Args>
GT_TARGET GT_FORCE_INLINE constexpr T operator()(Args &&... args) const {
return T{std::forward<Args>(args)...};
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR T operator()(Args &&... args) const {
return T{wstd::forward<Args>(args)...};
}

#ifndef BOOST_RESULT_OF_USE_DECLTYPE
Expand All @@ -70,7 +71,7 @@ namespace gridtools {
//
struct identity {
template <typename Arg>
GT_TARGET GT_FORCE_INLINE constexpr Arg operator()(Arg &&arg) const {
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Arg operator()(Arg &&arg) const {
return arg;
}

Expand All @@ -88,7 +89,7 @@ namespace gridtools {
//
struct clone {
template <typename Arg>
GT_TARGET GT_FORCE_INLINE constexpr Arg operator()(Arg const &arg) const {
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Arg operator()(Arg const &arg) const {
return arg;
}
#ifndef BOOST_RESULT_OF_USE_DECLTYPE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace gridtools {
* The use of identity creates a non-deduced form, so that the explicit template argument must be supplied
*/
template <class T>
GT_FUNCTION constexpr T implicit_cast(typename meta::lazy::id<T>::type x) {
GT_FUNCTION GT_CONSTEXPR T implicit_cast(typename meta::lazy::id<T>::type x) {
return x;
}
} // namespace gridtools
26 changes: 14 additions & 12 deletions include/gridtools/common/generic_metafunctions/utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,35 @@

#pragma once

#include <tuple>
#include <type_traits>
#include <utility>

#include "../defs.hpp"
#include "../host_device.hpp"

namespace gridtools {
/**
* `std::forward`/`std::move` versions that are guarantied to be constexpr
* `std::forward`/`std::move` versions that are guaranteed to be not constexpr. They are needed because
* some compilers, especially nvcc have problems with functions that return references in constexpr functions,
* if they are not used in constexpr context. As the `std` versions are constexpr, we must have separate
* functions that are constexpr only if the compiler is known to not mess up with them.
*/
namespace const_expr {
// cuda < 9.2 doesn't have std::move/std::forward definded as `constexpr`
#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ < 9 || __CUDACC_VER_MAJOR__ == 9 && __CUDACC_VER_MINOR__ < 2)
namespace wstd {
template <class T>
constexpr __device__ __host__ typename std::remove_reference<T>::type &&move(T &&obj) noexcept {
GT_CONSTEXPR GT_HOST_DEVICE typename std::remove_reference<T>::type &&move(T &&obj) noexcept {
return static_cast<typename std::remove_reference<T>::type &&>(obj);
}
template <class T>
constexpr __device__ __host__ T &&forward(typename std::remove_reference<T>::type &obj) noexcept {
GT_CONSTEXPR GT_HOST_DEVICE T &&forward(typename std::remove_reference<T>::type &obj) noexcept {
return static_cast<T &&>(obj);
}
template <class T>
constexpr __device__ __host__ T &&forward(typename std::remove_reference<T>::type &&obj) noexcept {
GT_CONSTEXPR GT_HOST_DEVICE T &&forward(typename std::remove_reference<T>::type &&obj) noexcept {
static_assert(
!std::is_lvalue_reference<T>::value, "Error: obj is instantiated with an lvalue reference type");
return static_cast<T &&>(obj);
}
#else
using std::forward;
using std::move;
#endif
} // namespace const_expr

} // namespace wstd
} // namespace gridtools
Loading

0 comments on commit 9fa2862

Please sign in to comment.