Fix Leiden refinement phase (#3990)

- Normalization factor was missing in the equation to decide if a node and a refined community is strongly connected inside their Louvain community. This PR adds that factor. - Disable random moves in the refinement phase. We plan to expose a flag to enable/disable random moves in a future PR. - Adds new function to flatten Leiden dendrogram as dendrogram flattening process needs additional info to unroll hierarchical leiden clustering Closes #3850 Closes #3749 Authors: - Naim (https://github.com/naimnv) - Alex Barghi (https://github.com/alexbarghi-nv) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Seunghwa Kang (https://github.com/seunghwak) - Brad Rees (https://github.com/BradReesWork) URL: #3990
rapidsai · Nov 20, 2023 · 8549b54 · 8549b54
1 parent d34e3d6
commit 8549b54
Show file tree

Hide file tree

Showing 8 changed files with 242 additions and 207 deletions.
diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh
@@ -52,7 +52,7 @@ struct is_bitwise_comparable<cuco::pair<int32_t, float>> : std::true_type {};
 namespace cugraph {
 namespace detail {
 
-// a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used
+// FIXME: a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used
 template <typename vertex_t, typename weight_t>
 struct key_aggregated_edge_op_t {
   weight_t total_edge_weight{};
@@ -80,7 +80,7 @@ struct key_aggregated_edge_op_t {
   }
 };
 
-// a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used
+// FIXME: a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used
 template <typename vertex_t, typename weight_t>
 struct reduce_op_t {
   using type                          = thrust::tuple<vertex_t, weight_t>;
@@ -100,7 +100,28 @@ struct reduce_op_t {
   }
 };
 
-// a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used
+// FIXME: a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used
+template <typename vertex_t, typename weight_t>
+struct count_updown_moves_op_t {
+  bool up_down{};
+  __device__ auto operator()(thrust::tuple<vertex_t, thrust::tuple<vertex_t, weight_t>> p) const
+  {
+    vertex_t old_cluster       = thrust::get<0>(p);
+    auto new_cluster_gain_pair = thrust::get<1>(p);
+    vertex_t new_cluster       = thrust::get<0>(new_cluster_gain_pair);
+    weight_t delta_modularity  = thrust::get<1>(new_cluster_gain_pair);
+
+    auto result_assignment =
+      (delta_modularity > weight_t{0})
+        ? (((new_cluster > old_cluster) != up_down) ? old_cluster : new_cluster)
+        : old_cluster;
+
+    return (delta_modularity > weight_t{0})
+             ? (((new_cluster > old_cluster) != up_down) ? false : true)
+             : false;
+  }
+};
+// FIXME: a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used
 template <typename vertex_t, typename weight_t>
 struct cluster_update_op_t {
   bool up_down{};
@@ -115,7 +136,7 @@ struct cluster_update_op_t {
   }
 };
 
-// a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used
+// FIXME: a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used
 template <typename vertex_t, typename weight_t>
 struct return_edge_weight_t {
   __device__ auto operator()(
@@ -125,7 +146,7 @@ struct return_edge_weight_t {
   }
 };
 
-// a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used
+// FIXME: a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used
 template <typename vertex_t, typename weight_t>
 struct return_one_t {
   __device__ auto operator()(
@@ -394,6 +415,21 @@ rmm::device_uvector<vertex_t> update_clustering_by_delta_modularity(
     detail::reduce_op_t<vertex_t, weight_t>{},
     cugraph::get_dataframe_buffer_begin(output_buffer));
 
+  int nr_moves = thrust::count_if(
+    handle.get_thrust_policy(),
+    thrust::make_zip_iterator(thrust::make_tuple(
+      next_clusters_v.begin(), cugraph::get_dataframe_buffer_begin(output_buffer))),
+    thrust::make_zip_iterator(
+      thrust::make_tuple(next_clusters_v.end(), cugraph::get_dataframe_buffer_end(output_buffer))),
+    detail::count_updown_moves_op_t<vertex_t, weight_t>{up_down});
+
+  if (multi_gpu) {
+    nr_moves = host_scalar_allreduce(
+      handle.get_comms(), nr_moves, raft::comms::op_t::SUM, handle.get_stream());
+  }
+
+  if (nr_moves == 0) { up_down = !up_down; }
+
   thrust::transform(handle.get_thrust_policy(),
                     next_clusters_v.begin(),
                     next_clusters_v.end(),

diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh
@@ -89,20 +89,21 @@ struct leiden_key_aggregated_edge_op_t {
 
     // E(Cr, S-Cr) > ||Cr||*(||S|| -||Cr||)
     bool is_dst_leiden_cluster_well_connected =
-      dst_leiden_cut_to_louvain >
-      resolution * dst_leiden_volume * (louvain_cluster_volume - dst_leiden_volume);
+      dst_leiden_cut_to_louvain > resolution * dst_leiden_volume *
+                                    (louvain_cluster_volume - dst_leiden_volume) /
+                                    total_edge_weight;
 
     // E(v, Cr-v) - ||v||* ||Cr-v||/||V(G)||
     // aggregated_weight_to_neighboring_leiden_cluster == E(v, Cr-v)?
 
     weight_t mod_gain = -1.0;
     if (is_src_active > 0) {
       if ((louvain_of_dst_leiden_cluster == src_louvain_cluster) &&
-          is_dst_leiden_cluster_well_connected) {
+          (dst_leiden_cluster_id != src_leiden_cluster) && is_dst_leiden_cluster_well_connected) {
         mod_gain = aggregated_weight_to_neighboring_leiden_cluster -
-                   resolution * src_weighted_deg * (dst_leiden_volume - src_weighted_deg) /
-                     total_edge_weight;
-
+                   resolution * src_weighted_deg * dst_leiden_volume / total_edge_weight;
+// FIXME: Disable random moves in refinement phase for now.
+#if 0
         weight_t random_number{0.0};
         if (mod_gain > 0.0) {
           auto flat_id = uint64_t{threadIdx.x + blockIdx.x * blockDim.x};
@@ -117,6 +118,8 @@ struct leiden_key_aggregated_edge_op_t {
                      ? __expf(static_cast<float>((2.0 * mod_gain) / (theta * total_edge_weight))) *
                          random_number
                      : -1.0;
+#endif
+        mod_gain = mod_gain > 0.0 ? mod_gain : -1.0;
       }
     }
 
@@ -240,11 +243,12 @@ refine_clustering(
                     wcut_deg_and_cluster_vol_triple_begin,
                     wcut_deg_and_cluster_vol_triple_end,
                     singleton_and_connected_flags.begin(),
-                    [resolution] __device__(auto wcut_wdeg_and_louvain_volume) {
+                    [resolution, total_edge_weight] __device__(auto wcut_wdeg_and_louvain_volume) {
                       auto wcut           = thrust::get<0>(wcut_wdeg_and_louvain_volume);
                       auto wdeg           = thrust::get<1>(wcut_wdeg_and_louvain_volume);
                       auto louvain_volume = thrust::get<2>(wcut_wdeg_and_louvain_volume);
-                      return wcut > (resolution * wdeg * (louvain_volume - wdeg));
+                      return wcut >
+                             (resolution * wdeg * (louvain_volume - wdeg) / total_edge_weight);
                     });
 
   edge_src_property_t<GraphViewType, weight_t> src_louvain_cluster_weight_cache(handle);
@@ -478,7 +482,7 @@ refine_clustering(
     auto values_for_leiden_cluster_keys = thrust::make_zip_iterator(
       thrust::make_tuple(refined_community_volumes.begin(),
                          refined_community_cuts.begin(),
-                         leiden_keys_used_in_edge_reduction.begin(),  // redundant
+                         leiden_keys_used_in_edge_reduction.begin(),
                          louvain_of_leiden_keys_used_in_edge_reduction.begin()));
 
     using value_t = thrust::tuple<weight_t, weight_t, vertex_t, vertex_t>;

diff --git a/cpp/src/community/flatten_dendrogram.hpp b/cpp/src/community/flatten_dendrogram.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -59,4 +59,31 @@ void partition_at_level(raft::handle_t const& handle,
     });
 }
 
+template <typename vertex_t, bool multi_gpu>
+void leiden_partition_at_level(raft::handle_t const& handle,
+                               Dendrogram<vertex_t> const& dendrogram,
+                               vertex_t* d_partition,
+                               size_t level)
+{
+  vertex_t local_num_verts = dendrogram.get_level_size_nocheck(0);
+  raft::copy(
+    d_partition, dendrogram.get_level_ptr_nocheck(0), local_num_verts, handle.get_stream());
+
+  rmm::device_uvector<vertex_t> local_vertex_ids_v(local_num_verts, handle.get_stream());
+
+  std::for_each(
+    thrust::make_counting_iterator<size_t>(0),
+    thrust::make_counting_iterator<size_t>((level - 1) / 2),
+    [&handle, &dendrogram, &local_vertex_ids_v, &d_partition, local_num_verts](size_t l) {
+      cugraph::relabel<vertex_t, false>(
+        handle,
+        std::tuple<vertex_t const*, vertex_t const*>(dendrogram.get_level_ptr_nocheck(2 * l + 1),
+                                                     dendrogram.get_level_ptr_nocheck(2 * l + 2)),
+        dendrogram.get_level_size_nocheck(2 * l + 1),
+        d_partition,
+        local_num_verts,
+        false);
+    });
+}
+
 }  // namespace cugraph