Skip to content

Commit

Permalink
Revert "Fix performance for CUDA >= 9.2 (master) (#1327)"
Browse files Browse the repository at this point in the history
This reverts commit 9bb2d64.
  • Loading branch information
havogt authored Jul 16, 2019
1 parent 4fb0a39 commit 27a49d3
Show file tree
Hide file tree
Showing 52 changed files with 3,405 additions and 3,433 deletions.
9 changes: 4 additions & 5 deletions include/gridtools/common/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "../meta/macros.hpp"
#include "../meta/repeat.hpp"
#include "defs.hpp"
#include "generic_metafunctions/const_ref.hpp"
#include "generic_metafunctions/utility.hpp"
#include "gt_assert.hpp"
#include "host_device.hpp"
Expand Down Expand Up @@ -118,13 +117,13 @@ namespace gridtools {
}

template <size_t I, typename T, size_t D>
static GT_FUNCTION GT_CONSTEXPR const_ref<T> get(const array<T, D> &arr) noexcept {
static GT_FUNCTION GT_CONSTEXPR const T &get(const array<T, D> &arr) noexcept {
GT_STATIC_ASSERT(I < D, "index is out of bounds");
return arr.m_array[I];
}

template <size_t I, typename T, size_t D>
static GT_FUNCTION GT_CONSTEXPR T get(array<T, D> &&arr) noexcept {
static GT_FUNCTION GT_CONSTEXPR T &&get(array<T, D> &&arr) noexcept {
GT_STATIC_ASSERT(I < D, "index is out of bounds");
return wstd::move(arr.m_array[I]);
}
Expand Down Expand Up @@ -188,13 +187,13 @@ namespace gridtools {
}

template <size_t I, typename T, size_t D>
GT_FUNCTION GT_CONSTEXPR const_ref<T> get(const array<T, D> &arr) noexcept {
GT_FUNCTION GT_CONSTEXPR const T &get(const array<T, D> &arr) noexcept {
GT_STATIC_ASSERT(I < D, "index is out of bounds");
return arr.m_array[I];
}

template <size_t I, typename T, size_t D>
GT_FUNCTION GT_CONSTEXPR T get(array<T, D> &&arr) noexcept {
GT_FUNCTION GT_CONSTEXPR T &&get(array<T, D> &&arr) noexcept {
GT_STATIC_ASSERT(I < D, "index is out of bounds");
return wstd::move(get<I>(arr));
}
Expand Down
4 changes: 4 additions & 0 deletions include/gridtools/common/defs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@
@brief global definitions
*/

#ifdef __CUDA_ARCH__
#define GT_CONSTEXPR
#else
#define GT_CONSTEXPR constexpr
#endif

#define GT_RESTRICT __restrict__

Expand Down
31 changes: 0 additions & 31 deletions include/gridtools/common/generic_metafunctions/const_ref.hpp

This file was deleted.

19 changes: 12 additions & 7 deletions include/gridtools/common/gt_assert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
#pragma once
#include <cassert>
#include <stdexcept>

/** \ingroup common
Expand All @@ -17,13 +16,19 @@
@{
*/

#ifdef __CUDA_ARCH__
#if __CUDACC_VER_MAJOR__ == 9 && __CUDACC_VER_MINOR__ == 2
// we define this macro to an empty string for CUDA 9.2 because in certain cases, CUDA 9.2 tries to compile device
// instantiations of certain constexpr function templates, which can lead to compile-time errors like "cannot use an
// entity undefined in device code".
#define __PRETTY_FUNCTION__ ""
#ifdef __CUDACC__
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
// we take the cuda assert for arch greater than 2.x
#include <assert.h>
#else
#undef assert
#define assert(e)
#endif
#else
#include <cassert>
#endif

#ifdef __CUDA_ARCH__
#define GT_ASSERT_OR_THROW(cond, msg) assert(cond)
#else
#define GT_ASSERT_OR_THROW(cond, msg) \
Expand Down
9 changes: 4 additions & 5 deletions include/gridtools/common/pair.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#include <utility>

#include "defs.hpp"
#include "generic_metafunctions/const_ref.hpp"
#include "generic_metafunctions/utility.hpp"
#include "host_device.hpp"

Expand Down Expand Up @@ -130,30 +129,30 @@ namespace gridtools {
template <>
struct pair_get<0> {
template <typename T1, typename T2>
static GT_CONSTEXPR GT_FUNCTION const_ref<T1> const_get(const pair<T1, T2> &p) noexcept {
static GT_CONSTEXPR GT_FUNCTION const T1 &const_get(const pair<T1, T2> &p) noexcept {
return p.first;
}
template <typename T1, typename T2>
static GT_CONSTEXPR GT_FUNCTION T1 &get(pair<T1, T2> &p) noexcept {
return p.first;
}
template <typename T1, typename T2>
static GT_CONSTEXPR GT_FUNCTION T1 move_get(pair<T1, T2> &&p) noexcept {
static GT_CONSTEXPR GT_FUNCTION T1 &&move_get(pair<T1, T2> &&p) noexcept {
return wstd::move(p.first);
}
};
template <>
struct pair_get<1> {
template <typename T1, typename T2>
static GT_CONSTEXPR GT_FUNCTION const_ref<T2> const_get(const pair<T1, T2> &p) noexcept {
static GT_CONSTEXPR GT_FUNCTION const T2 &const_get(const pair<T1, T2> &p) noexcept {
return p.second;
}
template <typename T1, typename T2>
static GT_CONSTEXPR GT_FUNCTION T2 &get(pair<T1, T2> &p) noexcept {
return p.second;
}
template <typename T1, typename T2>
static GT_CONSTEXPR GT_FUNCTION T2 move_get(pair<T1, T2> &&p) noexcept {
static GT_CONSTEXPR GT_FUNCTION T2 &&move_get(pair<T1, T2> &&p) noexcept {
return wstd::move(p.second);
}
};
Expand Down
17 changes: 8 additions & 9 deletions include/gridtools/common/tuple.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

#include "../meta/type_traits.hpp"
#include "defs.hpp"
#include "generic_metafunctions/const_ref.hpp"
#include "generic_metafunctions/utility.hpp"
#include "host_device.hpp"

Expand Down Expand Up @@ -51,7 +50,7 @@ namespace gridtools {

struct tuple_leaf_getter {
template <size_t I, class T>
static GT_CONSTEXPR GT_FUNCTION const_ref<T> get(tuple_leaf<I, T, false> const &obj) noexcept {
static GT_CONSTEXPR GT_FUNCTION T const &get(tuple_leaf<I, T, false> const &obj) noexcept {
return obj.m_value;
}

Expand All @@ -61,12 +60,12 @@ namespace gridtools {
}

template <size_t I, class T>
static GT_CONSTEXPR GT_FUNCTION T get(tuple_leaf<I, T, false> &&obj) noexcept {
static GT_CONSTEXPR GT_FUNCTION T &&get(tuple_leaf<I, T, false> &&obj) noexcept {
return static_cast<T &&>(get<I>(obj));
}

template <size_t I, class T>
static GT_CONSTEXPR GT_FUNCTION const_ref<T> get(tuple_leaf<I, T, true> const &obj) noexcept {
static GT_CONSTEXPR GT_FUNCTION T const &get(tuple_leaf<I, T, true> const &obj) noexcept {
return obj;
}

Expand All @@ -76,7 +75,7 @@ namespace gridtools {
}

template <size_t I, class T>
static GT_CONSTEXPR GT_FUNCTION T get(tuple_leaf<I, T, true> &&obj) noexcept {
static GT_CONSTEXPR GT_FUNCTION T &&get(tuple_leaf<I, T, true> &&obj) noexcept {
return static_cast<T &&>(obj);
}
};
Expand Down Expand Up @@ -172,7 +171,7 @@ namespace gridtools {
tuple &operator=(tuple const &) = default;
tuple &operator=(tuple &&) = default;

GT_CONSTEXPR GT_FUNCTION tuple(const_ref<Ts>... args) noexcept : m_impl(args...) {}
GT_CONSTEXPR GT_FUNCTION tuple(Ts const &... args) noexcept : m_impl(args...) {}

template <class... Args,
std::enable_if_t<sizeof...(Ts) == sizeof...(Args) &&
Expand Down Expand Up @@ -206,7 +205,7 @@ namespace gridtools {
T m_value;
struct getter {
template <size_t I, std::enable_if_t<I == 0, int> = 0>
static GT_CONSTEXPR GT_FUNCTION const_ref<T> get(tuple const &obj) noexcept {
static GT_CONSTEXPR GT_FUNCTION T const &get(tuple const &obj) noexcept {
return obj.m_value;
}

Expand All @@ -216,7 +215,7 @@ namespace gridtools {
}

template <size_t I, std::enable_if_t<I == 0, int> = 0>
static GT_CONSTEXPR GT_FUNCTION T get(tuple &&obj) noexcept {
static GT_CONSTEXPR GT_FUNCTION T &&get(tuple &&obj) noexcept {
return static_cast<T &&>(obj.m_value);
}
};
Expand All @@ -233,7 +232,7 @@ namespace gridtools {
tuple &operator=(tuple const &) = default;
tuple &operator=(tuple &&) = default;

GT_CONSTEXPR GT_FUNCTION tuple(const_ref<T> arg) noexcept : m_value(arg) {}
GT_CONSTEXPR GT_FUNCTION tuple(T const &arg) noexcept : m_value(arg) {}

template <class Arg, std::enable_if_t<std::is_constructible<T, Arg &&>::value, int> = 0>
GT_CONSTEXPR GT_FUNCTION tuple(Arg &&arg) noexcept : m_value(wstd::forward<Arg>(arg)) {}
Expand Down
32 changes: 17 additions & 15 deletions include/gridtools/common/tuple_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,10 @@ namespace gridtools {
enum class ref_kind { rvalue, lvalue, const_lvalue };

template <class>
struct get_ref_kind : std::integral_constant<ref_kind, ref_kind::rvalue> {};
struct get_ref_kind;

template <class T>
struct get_ref_kind<T &&> : std::integral_constant<ref_kind, ref_kind::rvalue> {};

template <class T>
struct get_ref_kind<T &> : std::integral_constant<ref_kind, ref_kind::lvalue> {};
Expand All @@ -266,9 +269,7 @@ namespace gridtools {
struct add_ref;

template <class T>
struct add_ref<ref_kind::rvalue, T> {
using type = T;
};
struct add_ref<ref_kind::rvalue, T> : std::add_rvalue_reference<T> {};

template <class T>
struct add_ref<ref_kind::lvalue, T> : std::add_lvalue_reference<T> {};
Expand Down Expand Up @@ -418,7 +419,8 @@ namespace gridtools {
template <class Tup,
class... Tups,
class Is = meta::make_indices<size<std::decay_t<Tup>>>,
class Res = from_types<Tup, get_results_t<Is, get_accessors<Tup>, get_accessors<Tups>...>>>
class Res =
from_types<Tup, get_results_t<Is, get_accessors<Tup &&>, get_accessors<Tups &&>...>>>
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup, Tups &&... tups) const {
using generators_t = meta::transform<get_transform_index_generator, Is>;
return generate_f<generators_t, Res>{}(
Expand Down Expand Up @@ -515,7 +517,7 @@ namespace gridtools {
meta::make_indices_for<InnerTup>>;

template <class Tup,
class Accessors = meta::transform<get_accessors, get_accessors<Tup>>,
class Accessors = meta::transform<get_accessors, get_accessors<Tup &&>>,
class First = meta::first<to_types<Tup>>,
class Res = from_types<First, meta::flatten<Accessors>>>
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup) const {
Expand All @@ -532,7 +534,7 @@ namespace gridtools {
using get_drop_front_generator = get_nth_f<N + I::value>;

template <class Tup,
class Accessors = get_accessors<Tup>,
class Accessors = get_accessors<Tup &&>,
class Res = from_types<Tup, meta::drop_front_c<N, Accessors>>>
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup) const {
using generators =
Expand All @@ -556,7 +558,7 @@ namespace gridtools {
struct push_back_f {
template <class Tup,
class... Args,
class Accessors = get_accessors<Tup>,
class Accessors = get_accessors<Tup &&>,
class Res = from_types<Tup, meta::push_back<Accessors, Args &&...>>>
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup, Args &&... args) const {
return push_back_impl_f<std::make_index_sequence<size<Accessors>::value>, Res>{}(
Expand All @@ -579,7 +581,7 @@ namespace gridtools {
struct push_front_f {
template <class Tup,
class... Args,
class Accessors = get_accessors<Tup>,
class Accessors = get_accessors<Tup &&>,
class Res = from_types<Tup, meta::push_front<Accessors, Args &&...>>>
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup, Args &&... args) const {
return push_front_impl_f<std::make_index_sequence<size<Accessors>::value>, Res>{}(
Expand Down Expand Up @@ -632,7 +634,7 @@ namespace gridtools {
size_t N,
class State,
class Tup,
class AllAccessors = get_accessors<Tup>,
class AllAccessors = get_accessors<Tup &&>,
class Accessors = meta::drop_front_c<I, AllAccessors>,
class Res = meta::lfold<meta_fun, State &&, Accessors>,
std::enable_if_t<(I + 4 < N), int> = 0>
Expand All @@ -649,15 +651,15 @@ namespace gridtools {

template <class State,
class Tup,
class Accessors = get_accessors<Tup>,
class Accessors = get_accessors<Tup &&>,
class Res = meta::lfold<meta_fun, State &&, Accessors>>
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(State &&state, Tup &&tup) const {
return impl<0, size<std::decay_t<Tup>>::value>(
wstd::forward<State>(state), wstd::forward<Tup>(tup));
}

template <class Tup,
class AllAccessors = get_accessors<Tup>,
class AllAccessors = get_accessors<Tup &&>,
class StateAccessor = meta::first<AllAccessors>,
class Accessors = meta::drop_front_c<1, AllAccessors>,
class Res = meta::lfold<meta_fun, StateAccessor, Accessors>>
Expand Down Expand Up @@ -751,7 +753,7 @@ namespace gridtools {

template <class Tup,
class First = meta::first<to_types<Tup>>,
class Accessors = meta::transform<get_accessors, get_accessors<Tup>>,
class Accessors = meta::transform<get_accessors, get_accessors<Tup &&>>,
class Types = meta::transpose<Accessors>,
class InnerTuples = meta::transform<get_inner_tuple_f<Tup>::template apply, Types>,
class Res = from_types<First, InnerTuples>>
Expand All @@ -772,7 +774,7 @@ namespace gridtools {
};

template <class Tup,
class Accessors = get_accessors<Tup>,
class Accessors = get_accessors<Tup &&>,
class Res = from_types<Tup, meta::reverse<Accessors>>>
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup) const {
using n_t = size<std::decay_t<Tup>>;
Expand Down Expand Up @@ -811,7 +813,7 @@ namespace gridtools {
meta::if_c<I::value == N, insert_val_generator_f, insert_tup_generator_f<I::value - 1>>>;

template <class Tup,
class Accessors = get_accessors<Tup>,
class Accessors = get_accessors<Tup &&>,
class Types = meta::insert_c<N, Accessors, Val>,
class Res = from_types<Tup, Types>>
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup) const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,19 +63,10 @@ namespace gridtools {
return arg;
}

// intel compiler 18.0 segfaults if this is a value. On the other hand, nvcc performs much worse in the
// dycore if it is a lvalue reference
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER <= 1800)
template <class Eval, class Arg, std::enable_if_t<!std::is_arithmetic<Arg>::value, int> = 0>
GT_FUNCTION GT_CONSTEXPR decltype(auto) apply_eval(Eval &eval, Arg const &arg) {
return eval(arg);
}
#else
template <class Eval, class Arg, std::enable_if_t<!std::is_arithmetic<Arg>::value, int> = 0>
GT_FUNCTION GT_CONSTEXPR decltype(auto) apply_eval(Eval &eval, Arg arg) {
return eval(wstd::move(arg));
}
#endif

template <class Eval, class Op, class Arg>
GT_FUNCTION GT_CONSTEXPR auto value(Eval &eval, expr<Op, Arg> const &arg) {
Expand Down
Loading

0 comments on commit 27a49d3

Please sign in to comment.