Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Heterogeneous renumbering implementation #4602

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
f82dee3
update the API & documentation
seunghwak Jul 30, 2024
2d8baa2
remove unused code
seunghwak Aug 1, 2024
f400c24
update heterogeneous renumbering API
seunghwak Aug 1, 2024
909f575
implemeent heterogeneous renumbeering
seunghwak Aug 1, 2024
eafe349
remove thrust calls from sampling post processing tests
seunghwak Aug 2, 2024
4798df5
convert post processing test file from .cu to .cpp
seunghwak Aug 2, 2024
93e5dc4
add test utility functions to generate edge IDs
seunghwak Aug 5, 2024
e473bf8
bug fixes
seunghwak Aug 7, 2024
711c99a
add heterogeneous post processing validation functions
seunghwak Aug 7, 2024
48cc770
tweak sampling post processing tests
seunghwak Aug 7, 2024
06f5598
add a utility function to set edge type based on src & dst vertices
seunghwak Aug 7, 2024
47ef442
add more expensive checks
seunghwak Aug 7, 2024
1aa9c06
update sampling post processing validation functions
seunghwak Aug 7, 2024
5e53f65
update vertex renumber map validation code
seunghwak Aug 7, 2024
10a209c
add tests for heterogeneous post processing
seunghwak Aug 7, 2024
8578fa7
tests/sampling/sampling_heterogeneous_post_processing_test.cpp
seunghwak Aug 8, 2024
58a9ab3
edge ID renumber map validation
seunghwak Aug 8, 2024
a73aa52
update documentation
seunghwak Aug 8, 2024
e407219
Merge branch 'branch-24.10' of https://github.com/rapidsai/cugraph in…
seunghwak Aug 8, 2024
a64859b
clang-format
seunghwak Aug 8, 2024
c328b8a
set do_expensive_check to false
seunghwak Aug 8, 2024
861014d
Merge branch 'branch-24.10' of https://github.com/rapidsai/cugraph in…
seunghwak Aug 13, 2024
871ab91
Merge branch 'branch-24.10' of https://github.com/rapidsai/cugraph in…
seunghwak Aug 15, 2024
d6770c4
fix merge conflicts
seunghwak Aug 21, 2024
5f3fdc4
fix copyright year
seunghwak Aug 22, 2024
caac26d
fix a typo in documentation
seunghwak Aug 22, 2024
0d21965
remove unnecessary checks
seunghwak Aug 22, 2024
6c240d9
Merge branch 'branch-24.10' of https://github.com/rapidsai/cugraph in…
seunghwak Aug 23, 2024
fd25d3b
Merge branch 'branch-24.10' of https://github.com/rapidsai/cugraph in…
seunghwak Aug 28, 2024
d52c64c
Merge branch 'branch-24.10' of https://github.com/rapidsai/cugraph in…
seunghwak Aug 28, 2024
3c87c43
Merge branch 'branch-24.10' of https://github.com/rapidsai/cugraph in…
seunghwak Aug 29, 2024
a921b99
Merge branch 'branch-24.10' of https://github.com/rapidsai/cugraph in…
seunghwak Aug 30, 2024
01eeb1d
remove a possibly erroneous check
seunghwak Aug 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,6 @@ set(CUGRAPH_SOURCES
src/sampling/negative_sampling_mg_v32_e64.cu
src/sampling/negative_sampling_mg_v32_e32.cu
src/sampling/negative_sampling_mg_v64_e64.cu
src/sampling/renumber_sampled_edgelist_sg_v64_e64.cu
src/sampling/renumber_sampled_edgelist_sg_v32_e32.cu
src/sampling/sampling_post_processing_sg_v64_e64.cu
src/sampling/sampling_post_processing_sg_v32_e32.cu
src/sampling/sampling_post_processing_sg_v32_e64.cu
Expand Down
22 changes: 22 additions & 0 deletions cpp/include/cugraph/detail/utility_wrappers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,28 @@ void sequence_fill(rmm::cuda_stream_view const& stream_view,
size_t size,
value_t start_value);

/**
* @brief Fill a buffer with a sequence of values with the input stride
*
* Fills the buffer with the sequence with the input stride:
* {start_value, start_value+stride, start_value+stride*2, ..., start_value+stride*(size-1)}
*
* @tparam value_t type of the value to operate on
*
* @param[in] stream_view stream view
* @param[out] d_value device array to fill
* @param[in] size number of elements in array
* @param[in] start_value starting value for sequence
* @param[in] stride input stride
*
*/
template <typename value_t>
void stride_fill(rmm::cuda_stream_view const& stream_view,
value_t* d_value,
size_t size,
value_t start_value,
value_t stride);

/**
* @brief Compute the maximum vertex id of an edge list
*
Expand Down
57 changes: 0 additions & 57 deletions cpp/include/cugraph/graph_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -988,63 +988,6 @@ rmm::device_uvector<vertex_t> select_random_vertices(
bool sort_vertices,
bool do_expensive_check = false);

/**
* @brief renumber sampling output
*
* @deprecated This API will be deprecated and will be replaced by the
* renumber_and_compress_sampled_edgelist and renumber_and_sort_sampled_edgelist functions in
* sampling_functions.hpp.
*
* This function renumbers sampling function (e.g. uniform_neighbor_sample) outputs satisfying the
* following requirements.
*
* 1. If @p edgelist_hops is valid, we can consider (vertex ID, flag=src, hop) triplets for each
* vertex ID in @p edgelist_srcs and (vertex ID, flag=dst, hop) triplets for each vertex ID in @p
* edgelist_dsts. From these triplets, we can find the minimum (hop, flag) pairs for every unique
* vertex ID (hop is the primary key and flag is the secondary key, flag=src is considered smaller
* than flag=dst if hop numbers are same). Vertex IDs with smaller (hop, flag) pairs precede vertex
* IDs with larger (hop, flag) pairs in renumbering. Ordering can be arbitrary among the vertices
* with the same (hop, flag) pairs.
* 2. If @p edgelist_hops is invalid, unique vertex IDs in @p edgelist_srcs precede vertex IDs that
* appear only in @p edgelist_dsts.
* 3. If label_offsets.has_value() is ture, edge lists for different labels will be renumbered
* separately.
*
* This function is single-GPU only (we are not aware of any practical multi-GPU use cases).
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam label_t Type of labels. Needs to be an integral type.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param edgelist_srcs A vector storing original edgelist source vertices.
* @param edgelist_dsts A vector storing original edgelist destination vertices (size = @p
* edgelist_srcs.size()).
* @param edgelist_hops An optional pointer to the array storing hops for each edge list (source,
* destination) pairs (size = @p edgelist_srcs.size() if valid).
* @param label_offsets An optional tuple of unique labels and the input edge list (@p
* edgelist_srcs, @p edgelist_hops, and @p edgelist_dsts) offsets for the labels (siez = # unique
* labels + 1).
* @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
* @return Tuple of vectors storing renumbered edge sources (size = @p edgelist_srcs.size()) ,
* renumbered edge destinations (size = @p edgelist_dsts.size()), renumber_map to query original
* verties (size = # unique vertices or aggregate # unique vertices for every label), and
* renumber_map offsets (size = std::get<0>(*label_offsets).size() + 1, valid only if @p
* label_offsets.has_value() is true).
*/
template <typename vertex_t, typename label_t>
std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
std::optional<rmm::device_uvector<size_t>>>
renumber_sampled_edgelist(
raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& edgelist_srcs,
rmm::device_uvector<vertex_t>&& edgelist_dsts,
std::optional<raft::device_span<int32_t const>> edgelist_hops,
std::optional<std::tuple<raft::device_span<label_t const>, raft::device_span<size_t const>>>
label_offsets,
bool do_expensive_check = false);

/**
* @brief Remove self loops from an edge list
*
Expand Down
84 changes: 47 additions & 37 deletions cpp/include/cugraph/sampling_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -476,12 +476,12 @@ renumber_and_sort_sampled_edgelist(
* 1. If @p edgelist_hops is valid, we can consider (vertex ID, hop, flag=major) triplets for each
* vertex ID in edge majors (@p edgelist_srcs if @p src_is_major is true, @p edgelist_dsts if false)
* and (vertex ID, hop, flag=minor) triplets for each vertex ID in edge minors. From these triplets,
* we can find the minimum (hop, flag) pairs for every unique vertex ID (hop is the primary key and
* we can find the minimum (hop, flag) pair for every unique vertex ID (hop is the primary key and
* flag is the secondary key, flag=major is considered smaller than flag=minor if hop numbers are
* same). Vertex IDs with smaller (hop, flag) pairs precede vertex IDs with larger (hop, flag) pairs
* in renumbering (if their vertex types are same, vertices with different types are renumbered
* separately). Ordering can be arbitrary among the vertices with the same (vertex type, hop, flag)
* triplets. If @p seed_vertices.has-value() is true, we assume (hop=0, flag=major) for every vertex
* triplets. If @p seed_vertices.has_value() is true, we assume (hop=0, flag=major) for every vertex
* in @p *seed_vertices in renumbering (this is relevant when there are seed vertices with no
* neighbors).
* 2. If @p edgelist_hops is invalid, unique vertex IDs in edge majors precede vertex IDs that
Expand All @@ -495,11 +495,15 @@ renumber_and_sort_sampled_edgelist(
* Edge IDs are renumbered fulfilling the following requirements (This is relevant only when @p
* edgelist_edge_ids.has_value() is true).
*
* 1. If @p edgelist_edge_types.has_value() is true, unique (edge type, edge ID) pairs are
* renumbered to consecutive integers starting from 0 for each edge type. If @p
* edgelist_edge_types.has_value() is true, unique edge IDs are renumbered to consecutive inetgers
* starting from 0.
* 2. If edgelist_label_offsets.has_value() is true, edge lists for different labels will be
* 1. If @p edgelist_hops is valid, we can consider (edge ID, hop) pairs. From these pairs, we can
* find the minimum hop value for every unique edge ID. Edge IDs with smaller hop values precede
* edge IDs with larger hop values in renumbering (if their edge types are same, edges with
* different edge types are renumbered separately). Ordering can be arbitrary among the edge IDs
* with the same (edge type, hop) pairs.
* 2. If @p edgelist_edge_hops.has_value() is false, unique edge IDs (for each edge type is @p
* edgelist_edge_types.has_value() is true) are mapped to consecutive integers starting from 0. The
* ordering can be arbitrary.
* 3. If edgelist_label_offsets.has_value() is true, edge lists for different labels will be
* renumbered separately.
*
* The renumbered edges are sorted based on the following rules.
Expand All @@ -510,6 +514,11 @@ renumber_and_sort_sampled_edgelist(
* true.
* 2. Edges in each label are sorted independently if @p edgelist_label_offsets.has_value() is true.
*
* This function assumes that there is a single edge source vertex type and a single edge
* destination vertex type for each edge. If @p edgelist_edge_types.has_value() is false (i.e. there
* is only one edge type), there should be only one edge source vertex type and only one edge
* destination vertex type; the source & destination vertex types may or may not coincide.
*
* This function is single-GPU only (we are not aware of any practical multi-GPU use cases).
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
Expand All @@ -530,19 +539,16 @@ renumber_and_sort_sampled_edgelist(
* edgelist_srcs.size() if valid).
* @param edgelist_hops An optional vector storing edge list hop numbers (size = @p
* edgelist_srcs.size() if valid). @p edgelist_hops should be valid if @p num_hops >= 2.
* @param edgelist_label_offsets An optional pointer to the array storing label offsets to the input
* edges (size = @p num_labels + 1). @p edgelist_label_offsets should be valid if @p num_labels
* >= 2.
* @param seed_vertices An optional pointer to the array storing seed vertices in hop 0.
* @param seed_vertex_label_offsets An optional pointer to the array storing label offsets to the
* seed vertices (size = @p num_labels + 1). @p seed_vertex_label_offsets should be valid if @p
* num_labels >= 2 and @p seed_vertices is valid and invalid otherwise.
* ext_vertices A pointer to the array storing external vertex IDs for the local internal vertices.
* The local internal vertex range can be obatined bgy invoking a graph_view_t object's
* local_vertex_partition_range() function. ext_vertex_type offsets A pointer to the array storing
* vertex type offsets for the entire external vertex ID range (array size = @p num_vertex_types +
* 1). For example, if the array stores [0, 100, 200], external vertex IDs [0, 100) has vertex type
* 0 and external vertex IDs [100, 200) has vertex type 1.
* @param edgelist_label_offsets An optional pointer to the array storing label offsets to the input
* edges (size = @p num_labels + 1). @p edgelist_label_offsets should be valid if @p num_labels
* >= 2.
* @param vertex_type offsets A pointer to the array storing vertex type offsets for the entire
* vertex ID range (array size = @p num_vertex_types + 1). For example, if the array stores [0, 100,
* 200], vertex IDs [0, 100) has vertex type 0 and vertex IDs [100, 200) has vertex type 1.
* @param num_labels Number of labels. Labels are considered if @p num_labels >=2 and ignored if @p
* num_labels = 1.
* @param num_hops Number of hops. Hop numbers are considered if @p num_hops >=2 and ignored if @p
Expand All @@ -552,31 +558,36 @@ renumber_and_sort_sampled_edgelist(
* @param src_is_major A flag to determine whether to use the source or destination as the
* major key in renumbering and sorting.
* @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
* @return Tuple of vectors storing edge sources, edge destinations, optional edge weights (valid
* only if @p edgelist_weights.has_value() is true), optional edge IDs (valid only if @p
* edgelist_edge_ids.has_value() is true), optional edge types (valid only if @p
* edgelist_edge_types.has_value() is true), optional (label, hop) offset values to the renumbered
* and sorted edges (size = @p num_labels * @p num_hops + 1, valid only when @p
* edgelist_hops.has_value() or @p edgelist_label_offsetes.has_value() is true), renumber_map to
* query original vertices (size = # unique or aggregate # unique vertices for each label), and
* label offsets to the renumber map (size = @p num_labels + 1, valid only if @p
* edgelist_label_offsets.has_value() is true).
* @return Tuple of vectors storing renumbered edge sources, renumbered edge destinations, optional
* edge weights (valid only if @p edgelist_weights.has_value() is true), optional renumbered edge
* IDs (valid only if @p edgelist_edge_ids.has_value() is true), optional (label, edge type, hop)
* offset values to the renumbered and sorted edges (size = @p num_labels * @p num_edge_types * @p
* num_hops + 1, valid only when @p edgelist_edge_types.has_value(), @p edgelist_hops.has_value(),
* or @p edgelist_label_offsetes.has_value() is true), renumber_map to query original vertices (size
* = # unique or aggregate # unique vertices for each label), (label, vertex type) offsets to the
* vertex renumber map (size = @p num_labels * @p num_vertex_types + 1), optional renumber_map to
* query original edge IDs (size = # unique (edge_type, edge ID) pairs, valid only if @p
* edgelist_edge_ids.has_value() is true), and optional (label, edge type) offsets to the edge ID
* renumber map (size = @p num_labels + @p num_edge_types + 1, valid only if @p
* edgelist_edge_ids.has_value() is true). We do not explicitly return edge source & destination
* vertex types as we assume that source & destination vertex type are implicilty determined for a
* given edge type.
*/
template <typename vertex_t,
typename weight_t,
typename edge_id_t,
typename edge_type_t>
std::tuple<
rmm::device_uvector<vertex_t>, // srcs
rmm::device_uvector<vertex_t>, // dsts
std::optional<rmm::device_uvector<weight_t>>, // weights
std::optional<rmm::device_uvector<edge_id_t>>, // edge IDs
std::optional<rmm::device_uvector<edge_type_t>>, // edge types
std::optional<rmm::device_uvector<size_t>>, // (label, edge type, hop) offsets to the edges
rmm::device_uvector<vertex_t>, // vertex renumber map
std::optional<rmm::device_uvector<size_t>>, // (label, type) offsets to the vertex renumber map
rmm::device_uvector<vertex_t>, // srcs
rmm::device_uvector<vertex_t>, // dsts
std::optional<rmm::device_uvector<weight_t>>, // weights
std::optional<rmm::device_uvector<edge_id_t>>, // edge IDs
std::optional<rmm::device_uvector<size_t>>, // (label, edge type, hop) offsets to the edges
rmm::device_uvector<vertex_t>, // vertex renumber map
rmm::device_uvector<size_t>, // (label, vertex type) offsets to the vertex renumber map
std::optional<rmm::device_uvector<edge_id_t>>, // edge ID renumber map
std::optional<rmm::device_uvector<size_t>>> // (label, type) offsets to the edge ID renumber map
std::optional<
rmm::device_uvector<size_t>>> // (label, edge type) offsets to the edge ID renumber map
heterogeneous_renumber_and_sort_sampled_edgelist(
raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& edgelist_srcs,
Expand All @@ -585,11 +596,10 @@ heterogeneous_renumber_and_sort_sampled_edgelist(
std::optional<rmm::device_uvector<edge_id_t>>&& edgelist_edge_ids,
std::optional<rmm::device_uvector<edge_type_t>>&& edgelist_edge_types,
std::optional<rmm::device_uvector<int32_t>>&& edgelist_hops,
std::optional<raft::device_span<size_t const>> edgelist_label_offsets,
std::optional<raft::device_span<vertex_t const>> seed_vertices,
std::optional<raft::device_span<size_t const>> seed_vertex_label_offsets,
raft::device_span<vertex_t const> ext_vertices,
raft::device_span<vertex_t const> ext_vertex_type_offsets,
std::optional<raft::device_span<size_t const>> edgelist_label_offsets,
raft::device_span<vertex_t const> vertex_type_offsets,
size_t num_labels,
size_t num_hops,
size_t num_vertex_types,
Expand Down
17 changes: 17 additions & 0 deletions cpp/src/detail/utility_wrappers_32.cu
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,23 @@ template void sequence_fill(rmm::cuda_stream_view const& stream_view,
size_t size,
int32_t start_value);

template void sequence_fill(rmm::cuda_stream_view const& stream_view,
uint32_t* d_value,
size_t size,
uint32_t start_value);

template void stride_fill(rmm::cuda_stream_view const& stream_view,
int32_t* d_value,
size_t size,
int32_t start_value,
int32_t stride);

template void stride_fill(rmm::cuda_stream_view const& stream_view,
uint32_t* d_value,
size_t size,
uint32_t start_value,
uint32_t stride);

template int32_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view,
int32_t const* d_edgelist_srcs,
int32_t const* d_edgelist_dsts,
Expand Down
12 changes: 12 additions & 0 deletions cpp/src/detail/utility_wrappers_64.cu
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,18 @@ template void sequence_fill(rmm::cuda_stream_view const& stream_view,
size_t size,
uint64_t start_value);

template void stride_fill(rmm::cuda_stream_view const& stream_view,
int64_t* d_value,
size_t size,
int64_t start_value,
int64_t stride);

template void stride_fill(rmm::cuda_stream_view const& stream_view,
uint64_t* d_value,
size_t size,
uint64_t start_value,
uint64_t stride);

template int64_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view,
int64_t const* d_edgelist_srcs,
int64_t const* d_edgelist_dsts,
Expand Down
16 changes: 16 additions & 0 deletions cpp/src/detail/utility_wrappers_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,22 @@ void sequence_fill(rmm::cuda_stream_view const& stream_view,
thrust::sequence(rmm::exec_policy(stream_view), d_value, d_value + size, start_value);
}

template <typename value_t>
void stride_fill(rmm::cuda_stream_view const& stream_view,
value_t* d_value,
size_t size,
value_t start_value,
value_t stride)
{
thrust::transform(rmm::exec_policy(stream_view),
thrust::make_counting_iterator(size_t{0}),
thrust::make_counting_iterator(size),
d_value,
cuda::proclaim_return_type<value_t>([start_value, stride] __device__(size_t i) {
return static_cast<value_t>(start_value + stride * i);
}));
}

template <typename vertex_t>
vertex_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view,
vertex_t const* d_edgelist_srcs,
Expand Down
Loading
Loading