Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Follow-ups to the CUDA framework PR review #429

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
7a21528
Go back to forward declare WaitingTask
makortel Dec 4, 2019
0f42a7e
Fix comment
makortel Dec 4, 2019
6e5543c
Enable CUDA for compute capability 3.5
makortel Dec 5, 2019
66effb1
Add CUDA existence protections to BuildFiles
makortel Dec 5, 2019
417d558
Clean up CUDAService
makortel Dec 5, 2019
2decd6a
Use iftool instead of ifarchitecture
makortel Dec 5, 2019
29430c4
CUDAServices does not directly depend on cub
makortel Dec 5, 2019
fc90c2c
Clean up CUDATest
makortel Dec 13, 2019
14a992e
Mark thread-safe static variables with CMS_THREAD_SAFE
makortel Dec 13, 2019
cc0991b
Move mutability of a member from TestCUDAAnalyzerGPUKernel to TestCUD…
makortel Dec 13, 2019
3cdc5cc
Guarantee that cache returns only occurred events
makortel Dec 16, 2019
e3b3cfb
Always record and query the CUDA event, to minimize need for error ch…
makortel Dec 16, 2019
47bdfdc
Add comment motivating cudautils::MessageLogger
makortel Jan 6, 2020
541c916
Use hasCUDADevices() for host_noncached_unique_ptr_t as well
makortel Jan 6, 2020
a9b026c
Test reset of multiple elements
makortel Jan 7, 2020
072a823
Rename {hasCUDA,requireCUDA}Devices to cms::cudatest::{test,require}D…
makortel Jan 6, 2020
8c47b5d
Propagate {hasCUDA,requireCUDA}Devices -> cms::cudatest::{test,requir…
makortel Jan 8, 2020
83a3ac8
Added comments to highlight the pieces in CachingDeviceAllocator that…
makortel Jan 7, 2020
4816af0
Add a comment motivating assert_t.cu
makortel Jan 7, 2020
36ae7e8
Rename cudautils::cudaDeviceCount() to cudautils::deviceCount()
makortel Jan 7, 2020
0080fc0
Remove redundant calls to cudaDeviceCount()
makortel Jan 8, 2020
978ad64
Rename cudautils::CUDAEventCache -> cudautils::EventCache
makortel Jan 7, 2020
1d441e2
Rename cudautils::CUDAStreamCache -> cudautils::StreamCache
makortel Jan 7, 2020
20f7926
Rename cudautils::eventIsOccurred() -> cudautils::eventWorkHasComplet…
makortel Jan 7, 2020
034b34a
Use eventWorkHasCompleted() in EventCache
makortel Jan 7, 2020
f51beed
Rename all core CUDAX -> cms::cuda::X, and test CUDAX -> cms::cudates…
makortel Jan 8, 2020
cc3a9ee
Propagate CUDAX->cms::cuda::X rename
makortel Jan 8, 2020
083ac40
Rename cudautils namespace to cms::cuda
makortel Jan 14, 2020
55bf994
Propagate cudautils->cms::cuda rename
makortel Jan 15, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class BeamSpotCUDA {
Data const* data() const { return data_d_.get(); }

private:
cudautils::device::unique_ptr<Data> data_d_;
cms::cuda::device::unique_ptr<Data> data_d_;
};

#endif
2 changes: 1 addition & 1 deletion CUDADataFormats/BeamSpot/src/BeamSpotCUDA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"

BeamSpotCUDA::BeamSpotCUDA(Data const* data_h, cudaStream_t stream) {
data_d_ = cudautils::make_device_unique<Data>(stream);
data_d_ = cms::cuda::make_device_unique<Data>(stream);
cudaCheck(cudaMemcpyAsync(data_d_.get(), data_h, sizeof(Data), cudaMemcpyHostToDevice, stream));
}
2 changes: 1 addition & 1 deletion CUDADataFormats/BeamSpot/src/classes.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef CUDADataFormats_BeamSpot_classes_h
#define CUDADataFormats_BeamSpot_classes_h

#include "CUDADataFormats/Common/interface/CUDAProduct.h"
#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
#include "DataFormats/Common/interface/Wrapper.h"

Expand Down
4 changes: 2 additions & 2 deletions CUDADataFormats/BeamSpot/src/classes_def.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<lcgdict>
<class name="CUDAProduct<BeamSpotCUDA>" persistent="false"/>
<class name="edm::Wrapper<CUDAProduct<BeamSpotCUDA>>" persistent="false"/>
<class name="cms::cuda::Product<BeamSpotCUDA>" persistent="false"/>
<class name="edm::Wrapper<cms::cuda::Product<BeamSpotCUDA>>" persistent="false"/>
</lcgdict>
2 changes: 2 additions & 0 deletions CUDADataFormats/Common/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
<iftool name="cuda-gcc-support">
<use name="HeterogeneousCore/CUDAUtilities"/>

<export>
<lib name="1"/>
</export>
</iftool>
55 changes: 0 additions & 55 deletions CUDADataFormats/Common/interface/CUDAProduct.h

This file was deleted.

90 changes: 0 additions & 90 deletions CUDADataFormats/Common/interface/CUDAProductBase.h

This file was deleted.

40 changes: 20 additions & 20 deletions CUDADataFormats/Common/interface/HeterogeneousSoA.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ class HeterogeneousSoA {
HeterogeneousSoA(HeterogeneousSoA &&) = default;
HeterogeneousSoA &operator=(HeterogeneousSoA &&) = default;

explicit HeterogeneousSoA(cudautils::device::unique_ptr<T> &&p) : dm_ptr(std::move(p)) {}
explicit HeterogeneousSoA(cudautils::host::unique_ptr<T> &&p) : hm_ptr(std::move(p)) {}
explicit HeterogeneousSoA(cms::cuda::device::unique_ptr<T> &&p) : dm_ptr(std::move(p)) {}
explicit HeterogeneousSoA(cms::cuda::host::unique_ptr<T> &&p) : hm_ptr(std::move(p)) {}
explicit HeterogeneousSoA(std::unique_ptr<T> &&p) : std_ptr(std::move(p)) {}

auto const *get() const { return dm_ptr ? dm_ptr.get() : (hm_ptr ? hm_ptr.get() : std_ptr.get()); }
Expand All @@ -36,74 +36,74 @@ class HeterogeneousSoA {
auto *operator-> () { return get(); }

// in reality valid only for GPU version...
cudautils::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const {
cms::cuda::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const {
assert(dm_ptr);
auto ret = cudautils::make_host_unique<T>(stream);
auto ret = cms::cuda::make_host_unique<T>(stream);
cudaCheck(cudaMemcpyAsync(ret.get(), dm_ptr.get(), sizeof(T), cudaMemcpyDefault, stream));
return ret;
}

private:
// a union wan't do it, a variant will not be more efficienct
cudautils::device::unique_ptr<T> dm_ptr; //!
cudautils::host::unique_ptr<T> hm_ptr; //!
cms::cuda::device::unique_ptr<T> dm_ptr; //!
cms::cuda::host::unique_ptr<T> hm_ptr; //!
std::unique_ptr<T> std_ptr; //!
};

namespace cudaCompat {

struct GPUTraits {
template <typename T>
using unique_ptr = cudautils::device::unique_ptr<T>;
using unique_ptr = cms::cuda::device::unique_ptr<T>;

template <typename T>
static auto make_unique(cudaStream_t stream) {
return cudautils::make_device_unique<T>(stream);
return cms::cuda::make_device_unique<T>(stream);
}

template <typename T>
static auto make_unique(size_t size, cudaStream_t stream) {
return cudautils::make_device_unique<T>(size, stream);
return cms::cuda::make_device_unique<T>(size, stream);
}

template <typename T>
static auto make_host_unique(cudaStream_t stream) {
return cudautils::make_host_unique<T>(stream);
return cms::cuda::make_host_unique<T>(stream);
}

template <typename T>
static auto make_device_unique(cudaStream_t stream) {
return cudautils::make_device_unique<T>(stream);
return cms::cuda::make_device_unique<T>(stream);
}

template <typename T>
static auto make_device_unique(size_t size, cudaStream_t stream) {
return cudautils::make_device_unique<T>(size, stream);
return cms::cuda::make_device_unique<T>(size, stream);
}
};

struct HostTraits {
template <typename T>
using unique_ptr = cudautils::host::unique_ptr<T>;
using unique_ptr = cms::cuda::host::unique_ptr<T>;

template <typename T>
static auto make_unique(cudaStream_t stream) {
return cudautils::make_host_unique<T>(stream);
return cms::cuda::make_host_unique<T>(stream);
}

template <typename T>
static auto make_host_unique(cudaStream_t stream) {
return cudautils::make_host_unique<T>(stream);
return cms::cuda::make_host_unique<T>(stream);
}

template <typename T>
static auto make_device_unique(cudaStream_t stream) {
return cudautils::make_device_unique<T>(stream);
return cms::cuda::make_device_unique<T>(stream);
}

template <typename T>
static auto make_device_unique(size_t size, cudaStream_t stream) {
return cudautils::make_device_unique<T>(size, stream);
return cms::cuda::make_device_unique<T>(size, stream);
}
};

Expand Down Expand Up @@ -158,7 +158,7 @@ class HeterogeneousSoAImpl {

T *get() { return m_ptr.get(); }

cudautils::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const;
cms::cuda::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const;

private:
unique_ptr<T> m_ptr; //!
Expand All @@ -171,8 +171,8 @@ HeterogeneousSoAImpl<T, Traits>::HeterogeneousSoAImpl(cudaStream_t stream) {

// in reality valid only for GPU version...
template <typename T, typename Traits>
cudautils::host::unique_ptr<T> HeterogeneousSoAImpl<T, Traits>::toHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique<T>(stream);
cms::cuda::host::unique_ptr<T> HeterogeneousSoAImpl<T, Traits>::toHostAsync(cudaStream_t stream) const {
auto ret = cms::cuda::make_host_unique<T>(stream);
cudaCheck(cudaMemcpyAsync(ret.get(), get(), sizeof(T), cudaMemcpyDefault, stream));
return ret;
}
Expand Down
4 changes: 2 additions & 2 deletions CUDADataFormats/Common/interface/HostProduct.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class HostProduct {
HostProduct(HostProduct&&) = default;
HostProduct& operator=(HostProduct&&) = default;

explicit HostProduct(cudautils::host::unique_ptr<T>&& p) : hm_ptr(std::move(p)) {}
explicit HostProduct(cms::cuda::host::unique_ptr<T>&& p) : hm_ptr(std::move(p)) {}
explicit HostProduct(std::unique_ptr<T>&& p) : std_ptr(std::move(p)) {}

auto const* get() const { return hm_ptr ? hm_ptr.get() : std_ptr.get(); }
Expand All @@ -22,7 +22,7 @@ class HostProduct {
auto const* operator-> () const { return get(); }

private:
cudautils::host::unique_ptr<T> hm_ptr; //!
cms::cuda::host::unique_ptr<T> hm_ptr; //!
std::unique_ptr<T> std_ptr; //!
};

Expand Down
60 changes: 60 additions & 0 deletions CUDADataFormats/Common/interface/Product.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#ifndef CUDADataFormats_Common_Product_h
#define CUDADataFormats_Common_Product_h

#include <memory>

#include "CUDADataFormats/Common/interface/ProductBase.h"

namespace edm {
template <typename T>
class Wrapper;
}

namespace cms {
namespace cuda {
namespace impl {
class ScopedContextGetterBase;
}

/**
* The purpose of this class is to wrap CUDA data to edm::Event in a
* way which forces correct use of various utilities.
*
* The non-default construction has to be done with cms::cuda::ScopedContext
* (in order to properly register the CUDA event).
*
* The default constructor is needed only for the ROOT dictionary generation.
*
* The CUDA event is in practice needed only for stream-stream
* synchronization, but someone with long-enough lifetime has to own
* it. Here is a somewhat natural place. If overhead is too much, we
* can use them only where synchronization between streams is needed.
*/
template <typename T>
class Product : public ProductBase {
public:
Product() = default; // Needed only for ROOT dictionary generation

Product(const Product&) = delete;
Product& operator=(const Product&) = delete;
Product(Product&&) = default;
Product& operator=(Product&&) = default;

private:
friend class impl::ScopedContextGetterBase;
friend class ScopedContextProduce;
friend class edm::Wrapper<Product<T>>;

explicit Product(int device, SharedStreamPtr stream, SharedEventPtr event, T data)
: ProductBase(device, std::move(stream), std::move(event)), data_(std::move(data)) {}

template <typename... Args>
explicit Product(int device, SharedStreamPtr stream, SharedEventPtr event, Args&&... args)
: ProductBase(device, std::move(stream), std::move(event)), data_(std::forward<Args>(args)...) {}

T data_; //!
};
} // namespace cuda
} // namespace cms

#endif
Loading