diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh index 40435b9580..2025a15ecf 100644 --- a/cpp/include/raft/cluster/kmeans.cuh +++ b/cpp/include/raft/cluster/kmeans.cuh @@ -21,7 +21,483 @@ #include #include +namespace raft::cluster::kmeans { + +/** + * @brief Find clusters with k-means algorithm. + * Initial centroids are chosen with k-means++ algorithm. Empty + * clusters are reinitialized by choosing new centroids with + * k-means++ algorithm. + * @tparam DataT the type of data used for weights, distances. + * @tparam IndexT the type of data used for indexing. + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X Training instances to cluster. The data must + * be in row-major format. + * [dim = n_samples x n_features] + * @param[in] sample_weight Optional weights for each observation in X. + * [len = n_samples] + * @param[inout] centroids [in] When init is InitMethod::Array, use + * centroids as the initial cluster centers. + * [out] The generated centroids from the + * kmeans algorithm are stored at the address + * pointed by 'centroids'. + * [dim = n_clusters x n_features] + * @param[out] inertia Sum of squared distances of samples to their + * closest cluster center. + * @param[out] n_iter Number of iterations run. + */ +template +void fit(handle_t const& handle, + const KMeansParams& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) +{ + detail::kmeans_fit(handle, params, X, sample_weight, centroids, inertia, n_iter); +} + +template +void fit(handle_t const& handle, + const KMeansParams& params, + const DataT* X, + const DataT* sample_weight, + DataT* centroids, + IndexT n_samples, + IndexT n_features, + DataT& inertia, + IndexT& n_iter) +{ + detail::kmeans_fit( + handle, params, X, sample_weight, centroids, n_samples, n_features, inertia, n_iter); +} + +/** + * @brief Predict the closest cluster each sample in X belongs to. + * @tparam DataT the type of data used for weights, distances. + * @tparam IndexT the type of data used for indexing. + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X New data to predict. + * [dim = n_samples x n_features] + * @param[in] sample_weight Optional weights for each observation in X. + * [len = n_samples] + * @param[in] centroids Cluster centroids. The data must be in + * row-major format. + * [dim = n_clusters x n_features] + * @param[in] normalize_weight True if the weights should be normalized + * @param[out] labels Index of the cluster each sample in X + * belongs to. + * [len = n_samples] + * @param[out] inertia Sum of squared distances of samples to + * their closest cluster center. + */ +template +void predict(handle_t const& handle, + const KMeansParams& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::device_vector_view labels, + bool normalize_weight, + raft::host_scalar_view inertia) +{ + detail::kmeans_predict( + handle, params, X, sample_weight, centroids, labels, normalize_weight, inertia); +} + +template +void predict(handle_t const& handle, + const KMeansParams& params, + const DataT* X, + const DataT* sample_weight, + const DataT* centroids, + IndexT n_samples, + IndexT n_features, + IndexT* labels, + bool normalize_weight, + DataT& inertia) +{ + detail::kmeans_predict(handle, + params, + X, + sample_weight, + centroids, + n_samples, + n_features, + labels, + normalize_weight, + inertia); +} + +/** + * @brief Compute k-means clustering and predicts cluster index for each sample + * in the input. + * + * @tparam DataT the type of data used for weights, distances. + * @tparam IndexT the type of data used for indexing. + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X Training instances to cluster. The data must be + * in row-major format. + * [dim = n_samples x n_features] + * @param[in] sample_weight Optional weights for each observation in X. + * [len = n_samples] + * @param[inout] centroids Optional + * [in] When init is InitMethod::Array, use + * centroids as the initial cluster centers + * [out] The generated centroids from the + * kmeans algorithm are stored at the address + * pointed by 'centroids'. + * [dim = n_clusters x n_features] + * @param[out] labels Index of the cluster each sample in X belongs + * to. + * [len = n_samples] + * @param[out] inertia Sum of squared distances of samples to their + * closest cluster center. + * @param[out] n_iter Number of iterations run. + */ +template +void fit_predict(handle_t const& handle, + const KMeansParams& params, + raft::device_matrix_view X, + std::optional> sample_weight, + std::optional> centroids, + raft::device_vector_view labels, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) +{ + detail::kmeans_fit_predict( + handle, params, X, sample_weight, centroids, labels, inertia, n_iter); +} + +template +void fit_predict(handle_t const& handle, + const KMeansParams& params, + const DataT* X, + const DataT* sample_weight, + DataT* centroids, + IndexT n_samples, + IndexT n_features, + IndexT* labels, + DataT& inertia, + IndexT& n_iter) +{ + detail::kmeans_fit_predict( + handle, params, X, sample_weight, centroids, n_samples, n_features, labels, inertia, n_iter); +} + +/** + * @brief Transform X to a cluster-distance space. + * + * @tparam DataT the type of data used for weights, distances. + * @tparam IndexT the type of data used for indexing. + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X Training instances to cluster. The data must + * be in row-major format + * [dim = n_samples x n_features] + * @param[in] centroids Cluster centroids. The data must be in row-major format. + * [dim = n_clusters x n_features] + * @param[out] X_new X transformed in the new space. + * [dim = n_samples x n_features] + */ +template +void transform(const raft::handle_t& handle, + const KMeansParams& params, + raft::device_matrix_view X, + raft::device_matrix_view centroids, + raft::device_matrix_view X_new) +{ + detail::kmeans_transform(handle, params, X, centroids, X_new); +} + +template +void transform(const raft::handle_t& handle, + const KMeansParams& params, + const DataT* X, + const DataT* centroids, + IndexT n_samples, + IndexT n_features, + DataT* X_new) +{ + detail::kmeans_transform( + handle, params, X, centroids, n_samples, n_features, X_new); +} + +template +using SamplingOp = detail::SamplingOp; + +template +using KeyValueIndexOp = detail::KeyValueIndexOp; + +/** + * @brief Select centroids according to a sampling operation + * + * @tparam DataT the type of data used for weights, distances. + * @tparam IndexT the type of data used for indexing. + * + * @param[in] handle The raft handle + * @param[in] X The data in row-major format + * [dim = n_samples x n_features] + * @param[in] minClusterDistance Distance for every sample to it's nearest centroid + * [dim = n_samples] + * @param[in] isSampleCentroid Flag the sample choosen as initial centroid + * [dim = n_samples] + * @param[in] select_op The sampling operation used to select the centroids + * @param[out] inRankCp The sampled centroids + * [dim = n_selected_centroids x n_features] + * @param[in] workspace Temporary workspace buffer which can get resized + * + */ +template +void sample_centroids(const raft::handle_t& handle, + raft::device_matrix_view X, + raft::device_vector_view minClusterDistance, + raft::device_vector_view isSampleCentroid, + SamplingOp& select_op, + rmm::device_uvector& inRankCp, + rmm::device_uvector& workspace) +{ + detail::sampleCentroids( + handle, X, minClusterDistance, isSampleCentroid, select_op, inRankCp, workspace); +} + +/** + * @brief Compute cluster cost + * + * @tparam DataT the type of data used for weights, distances. + * @tparam ReductionOpT the type of data used for the reduction operation. + * + * @param[in] handle The raft handle + * @param[in] minClusterDistance Distance for every sample to it's nearest centroid + * [dim = n_samples] + * @param[in] workspace Temporary workspace buffer which can get resized + * @param[out] clusterCost Resulting cluster cost + * @param[in] reduction_op The reduction operation used for the cost + * + */ +template +void cluster_cost(const raft::handle_t& handle, + raft::device_vector_view minClusterDistance, + rmm::device_uvector workspace, + raft::device_scalar_view clusterCost, + ReductionOpT reduction_op) +{ + detail::computeClusterCost( + handle, minClusterDistance, workspace, clusterCost, reduction_op); +} + +/** + * @brief Compute distance for every sample to it's nearest centroid + * + * @tparam DataT the type of data used for weights, distances. + * @tparam IndexT the type of data used for indexing. + * + * @param[in] handle The raft handle + * @param[in] params The parameters for KMeans + * @param[in] X The data in row-major format + * [dim = n_samples x n_features] + * @param[in] centroids Centroids data + * [dim = n_cluster x n_features] + * @param[out] minClusterDistance Distance for every sample to it's nearest centroid + * [dim = n_samples] + * @param[in] L2NormX L2 norm of X : ||x||^2 + * [dim = n_samples] + * @param[out] L2NormBuf_OR_DistBuf Resizable buffer to store L2 norm of centroids or distance + * matrix + * @param[in] workspace Temporary workspace buffer which can get resized + * + */ +template +void min_cluster_distance(const raft::handle_t& handle, + const KMeansParams& params, + raft::device_matrix_view X, + raft::device_matrix_view centroids, + raft::device_vector_view minClusterDistance, + raft::device_vector_view L2NormX, + rmm::device_uvector& L2NormBuf_OR_DistBuf, + rmm::device_uvector& workspace) +{ + detail::minClusterDistanceCompute( + handle, params, X, centroids, minClusterDistance, L2NormX, L2NormBuf_OR_DistBuf, workspace); +} + +/** + * @brief Calculates a pair for every sample in input 'X' where key is an + * index of one of the 'centroids' (index of the nearest centroid) and 'value' + * is the distance between the sample and the 'centroid[key]' + * + * @tparam DataT the type of data used for weights, distances. + * @tparam IndexT the type of data used for indexing. + * + * @param[in] handle The raft handle + * @param[in] params The parameters for KMeans + * @param[in] X The data in row-major format + * [dim = n_samples x n_features] + * @param[in] centroids Centroids data + * [dim = n_cluster x n_features] + * @param[out] minClusterAndDistance Distance vector that contains for every sample, the nearest + * centroid and it's distance + * [dim = n_samples] + * @param[in] L2NormX L2 norm of X : ||x||^2 + * [dim = n_samples] + * @param[out] L2NormBuf_OR_DistBuf Resizable buffer to store L2 norm of centroids or distance + * matrix + * @param[in] workspace Temporary workspace buffer which can get resized + * + */ +template +void min_cluster_and_distance( + const raft::handle_t& handle, + const KMeansParams& params, + raft::device_matrix_view X, + raft::device_matrix_view centroids, + raft::device_vector_view, IndexT> minClusterAndDistance, + raft::device_vector_view L2NormX, + rmm::device_uvector& L2NormBuf_OR_DistBuf, + rmm::device_uvector& workspace) +{ + detail::minClusterAndDistanceCompute( + handle, params, X, centroids, minClusterAndDistance, L2NormX, L2NormBuf_OR_DistBuf, workspace); +} + +/** + * @brief Shuffle and randomly select 'n_samples_to_gather' from input 'in' and stores + * in 'out' does not modify the input + * + * @tparam DataT the type of data used for weights, distances. + * @tparam IndexT the type of data used for indexing. + * + * @param[in] handle The raft handle + * @param[in] in The data to shuffle and gather + * [dim = n_samples x n_features] + * @param[out] out The sampled data + * [dim = n_samples_to_gather x n_features] + * @param[in] n_samples_to_gather Number of sample to gather + * @param[in] seed Seed for the shuffle + * @param[in] workspace Temporary workspace buffer which can get resized + * + */ +template +void shuffle_and_gather(const raft::handle_t& handle, + raft::device_matrix_view in, + raft::device_matrix_view out, + uint32_t n_samples_to_gather, + uint64_t seed, + rmm::device_uvector* workspace = nullptr) +{ + detail::shuffleAndGather(handle, in, out, n_samples_to_gather, seed, workspace); +} + +/** + * @brief Count the number of samples in each cluster + * + * @tparam DataT the type of data used for weights, distances. + * @tparam IndexT the type of data used for indexing. + * + * @param[in] handle The raft handle + * @param[in] params The parameters for KMeans + * @param[in] X The data in row-major format + * [dim = n_samples x n_features] + * @param[in] L2NormX L2 norm of X : ||x||^2 + * [dim = n_samples] + * @param[in] centroids Centroids data + * [dim = n_cluster x n_features] + * @param[in] workspace Temporary workspace buffer which can get resized + * @param[out] sampleCountInCluster The count for each centroid + * [dim = n_cluster] + * + */ +template +void count_samples_in_cluster(const raft::handle_t& handle, + const KMeansParams& params, + raft::device_matrix_view X, + raft::device_vector_view L2NormX, + raft::device_matrix_view centroids, + rmm::device_uvector& workspace, + raft::device_vector_view sampleCountInCluster) +{ + detail::countSamplesInCluster( + handle, params, X, L2NormX, centroids, workspace, sampleCountInCluster); +} + +/* + * @brief Selects 'n_clusters' samples from the input X using kmeans++ algorithm. + + * @note This is the algorithm described in + * "k-means++: the advantages of careful seeding". 2007, Arthur, D. and Vassilvitskii, S. + * ACM-SIAM symposium on Discrete algorithms. + * + * @tparam DataT the type of data used for weights, distances. + * @tparam IndexT the type of data used for indexing. + * + * @param[in] handle The raft handle + * @param[in] params The parameters for KMeans + * @param[in] X The data in row-major format + * [dim = n_samples x n_features] + * @param[out] centroids Centroids data + * [dim = n_cluster x n_features] + * @param[in] workspace Temporary workspace buffer which can get resized + */ +template +void init_plus_plus(const raft::handle_t& handle, + const KMeansParams& params, + raft::device_matrix_view X, + raft::device_matrix_view centroidsRawData, + rmm::device_uvector& workspace) +{ + detail::kmeansPlusPlus(handle, params, X, centroidsRawData, workspace); +} + +/* + * @brief Main function used to fit KMeans (after cluster initialization) + * + * @tparam DataT the type of data used for weights, distances. + * @tparam IndexT the type of data used for indexing. + * + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X Training instances to cluster. The data must + * be in row-major format. + * [dim = n_samples x n_features] + * @param[in] sample_weight Weights for each observation in X. + * [len = n_samples] + * @param[inout] centroids [in] Initial cluster centers. + * [out] The generated centroids from the + * kmeans algorithm are stored at the address + * pointed by 'centroids'. + * [dim = n_clusters x n_features] + * @param[out] inertia Sum of squared distances of samples to their + * closest cluster center. + * @param[out] n_iter Number of iterations run. + * @param[in] workspace Temporary workspace buffer which can get resized + */ +template +void fit_main(const raft::handle_t& handle, + const KMeansParams& params, + raft::device_matrix_view X, + raft::device_vector_view weight, + raft::device_matrix_view centroidsRawData, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter, + rmm::device_uvector& workspace) +{ + detail::kmeans_fit_main( + handle, params, X, weight, centroidsRawData, inertia, n_iter, workspace); +} + +}; // end namespace raft::cluster::kmeans + namespace raft::cluster { + +/** + * Note: All of the functions below in raft::cluster are deprecated and will + * be removed in a future release. Please use raft::cluster::kmeans instead. + */ + /** * @brief Find clusters with k-means algorithm. * Initial centroids are chosen with k-means++ algorithm. Empty @@ -55,7 +531,7 @@ void kmeans_fit(handle_t const& handle, raft::host_scalar_view inertia, raft::host_scalar_view n_iter) { - detail::kmeans_fit(handle, params, X, sample_weight, centroids, inertia, n_iter); + kmeans::fit(handle, params, X, sample_weight, centroids, inertia, n_iter); } template @@ -69,7 +545,7 @@ void kmeans_fit(handle_t const& handle, DataT& inertia, IndexT& n_iter) { - detail::kmeans_fit( + kmeans::fit( handle, params, X, sample_weight, centroids, n_samples, n_features, inertia, n_iter); } @@ -103,7 +579,7 @@ void kmeans_predict(handle_t const& handle, bool normalize_weight, raft::host_scalar_view inertia) { - detail::kmeans_predict( + kmeans::predict( handle, params, X, sample_weight, centroids, labels, normalize_weight, inertia); } @@ -119,16 +595,16 @@ void kmeans_predict(handle_t const& handle, bool normalize_weight, DataT& inertia) { - detail::kmeans_predict(handle, - params, - X, - sample_weight, - centroids, - n_samples, - n_features, - labels, - normalize_weight, - inertia); + kmeans::predict(handle, + params, + X, + sample_weight, + centroids, + n_samples, + n_features, + labels, + normalize_weight, + inertia); } /** @@ -168,7 +644,7 @@ void kmeans_fit_predict(handle_t const& handle, raft::host_scalar_view inertia, raft::host_scalar_view n_iter) { - detail::kmeans_fit_predict( + kmeans::fit_predict( handle, params, X, sample_weight, centroids, labels, inertia, n_iter); } @@ -184,7 +660,7 @@ void kmeans_fit_predict(handle_t const& handle, DataT& inertia, IndexT& n_iter) { - detail::kmeans_fit_predict( + kmeans::fit_predict( handle, params, X, sample_weight, centroids, n_samples, n_features, labels, inertia, n_iter); } @@ -210,7 +686,7 @@ void kmeans_transform(const raft::handle_t& handle, raft::device_matrix_view centroids, raft::device_matrix_view X_new) { - detail::kmeans_transform(handle, params, X, centroids, X_new); + kmeans::transform(handle, params, X, centroids, X_new); } template @@ -222,15 +698,14 @@ void kmeans_transform(const raft::handle_t& handle, IndexT n_features, DataT* X_new) { - detail::kmeans_transform( - handle, params, X, centroids, n_samples, n_features, X_new); + kmeans::transform(handle, params, X, centroids, n_samples, n_features, X_new); } template -using SamplingOp = detail::SamplingOp; +using SamplingOp = kmeans::SamplingOp; template -using KeyValueIndexOp = detail::KeyValueIndexOp; +using KeyValueIndexOp = kmeans::KeyValueIndexOp; /** * @brief Select centroids according to a sampling operation @@ -260,7 +735,7 @@ void sampleCentroids(const raft::handle_t& handle, rmm::device_uvector& inRankCp, rmm::device_uvector& workspace) { - detail::sampleCentroids( + kmeans::sample_centroids( handle, X, minClusterDistance, isSampleCentroid, select_op, inRankCp, workspace); } @@ -285,7 +760,7 @@ void computeClusterCost(const raft::handle_t& handle, raft::device_scalar_view clusterCost, ReductionOpT reduction_op) { - detail::computeClusterCost( + kmeans::cluster_cost( handle, minClusterDistance, workspace, clusterCost, reduction_op); } @@ -320,7 +795,7 @@ void minClusterDistanceCompute(const raft::handle_t& handle, rmm::device_uvector& L2NormBuf_OR_DistBuf, rmm::device_uvector& workspace) { - detail::minClusterDistanceCompute( + kmeans::min_cluster_distance( handle, params, X, centroids, minClusterDistance, L2NormX, L2NormBuf_OR_DistBuf, workspace); } @@ -359,7 +834,7 @@ void minClusterAndDistanceCompute( rmm::device_uvector& L2NormBuf_OR_DistBuf, rmm::device_uvector& workspace) { - detail::minClusterAndDistanceCompute( + kmeans::min_cluster_and_distance( handle, params, X, centroids, minClusterAndDistance, L2NormX, L2NormBuf_OR_DistBuf, workspace); } @@ -388,7 +863,7 @@ void shuffleAndGather(const raft::handle_t& handle, uint64_t seed, rmm::device_uvector* workspace = nullptr) { - detail::shuffleAndGather(handle, in, out, n_samples_to_gather, seed, workspace); + kmeans::shuffle_and_gather(handle, in, out, n_samples_to_gather, seed, workspace); } /** @@ -419,7 +894,7 @@ void countSamplesInCluster(const raft::handle_t& handle, rmm::device_uvector& workspace, raft::device_vector_view sampleCountInCluster) { - detail::countSamplesInCluster( + kmeans::count_samples_in_cluster( handle, params, X, L2NormX, centroids, workspace, sampleCountInCluster); } @@ -448,7 +923,7 @@ void kmeansPlusPlus(const raft::handle_t& handle, raft::device_matrix_view centroidsRawData, rmm::device_uvector& workspace) { - detail::kmeansPlusPlus(handle, params, X, centroidsRawData, workspace); + kmeans::init_plus_plus(handle, params, X, centroidsRawData, workspace); } /* @@ -484,7 +959,7 @@ void kmeans_fit_main(const raft::handle_t& handle, raft::host_scalar_view n_iter, rmm::device_uvector& workspace) { - detail::kmeans_fit_main( + kmeans::fit_main( handle, params, X, weight, centroidsRawData, inertia, n_iter, workspace); } -} // namespace raft::cluster +}; // namespace raft::cluster diff --git a/cpp/include/raft/cluster/kmeans_types.hpp b/cpp/include/raft/cluster/kmeans_types.hpp index 87fc7c1880..d6eadd1ba6 100644 --- a/cpp/include/raft/cluster/kmeans_types.hpp +++ b/cpp/include/raft/cluster/kmeans_types.hpp @@ -18,8 +18,7 @@ #include #include -namespace raft { -namespace cluster { +namespace raft::cluster::kmeans { struct KMeansParams { enum InitMethod { KMeansPlusPlus, Random, Array }; @@ -69,5 +68,11 @@ struct KMeansParams { bool inertia_check = false; }; -} // namespace cluster -} // namespace raft + +} // namespace raft::cluster::kmeans + +namespace raft::cluster { + +using kmeans::KMeansParams; + +} // namespace raft::cluster diff --git a/cpp/include/raft/cluster/single_linkage.cuh b/cpp/include/raft/cluster/single_linkage.cuh index 8e33b8389d..2d74c364b2 100644 --- a/cpp/include/raft/cluster/single_linkage.cuh +++ b/cpp/include/raft/cluster/single_linkage.cuh @@ -21,7 +21,11 @@ namespace raft::cluster { -constexpr int DEFAULT_CONST_C = 15; +/** + * Note: All of the functions below in the raft::cluster namespace are deprecated + * and will be removed in a future release. Please use raft::cluster::hierarchy + * instead. + */ /** * Single-linkage clustering, capable of constructing a KNN graph to @@ -58,6 +62,11 @@ void single_linkage(const raft::handle_t& handle, detail::single_linkage( handle, X, m, n, metric, out, c, n_clusters); } +}; // namespace raft::cluster + +namespace raft::cluster::hierarchy { + +constexpr int DEFAULT_CONST_C = 15; /** * Single-linkage clustering, capable of constructing a KNN graph to @@ -90,14 +99,14 @@ void single_linkage(const raft::handle_t& handle, out_arrs.children = dendrogram.data_handle(); out_arrs.labels = labels.data_handle(); - single_linkage(handle, - X.data_handle(), - static_cast(X.extent(0)), - static_cast(X.extent(1)), - metric, - &out_arrs, - c.has_value() ? c.value() : DEFAULT_CONST_C, - n_clusters); + raft::cluster::single_linkage( + handle, + X.data_handle(), + static_cast(X.extent(0)), + static_cast(X.extent(1)), + metric, + &out_arrs, + c.has_value() ? c.value() : DEFAULT_CONST_C, + n_clusters); } - -}; // namespace raft::cluster +}; // namespace raft::cluster::hierarchy diff --git a/cpp/include/raft/cluster/single_linkage_types.hpp b/cpp/include/raft/cluster/single_linkage_types.hpp index 79f2ede482..55239ff6d6 100644 --- a/cpp/include/raft/cluster/single_linkage_types.hpp +++ b/cpp/include/raft/cluster/single_linkage_types.hpp @@ -18,9 +18,15 @@ #include +namespace raft::cluster::hierarchy { +enum LinkageDistance { PAIRWISE = 0, KNN_GRAPH = 1 }; + +}; // end namespace raft::cluster::hierarchy + +// The code below is legacy namespace raft::cluster { -enum LinkageDistance { PAIRWISE = 0, KNN_GRAPH = 1 }; +using hierarchy::LinkageDistance; /** * Simple POCO for consolidating linkage results. This closely diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 07ec85bf1e..0d5af9be5c 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -82,7 +82,7 @@ if(BUILD_TESTS) PATH test/cluster/kmeans.cu test/cluster_solvers.cu - test/sparse/linkage.cu + test/cluster/linkage.cu OPTIONAL DIST NN ) diff --git a/cpp/test/sparse/linkage.cu b/cpp/test/cluster/linkage.cu similarity index 98% rename from cpp/test/sparse/linkage.cu rename to cpp/test/cluster/linkage.cu index ce5741d06b..5533f552bd 100644 --- a/cpp/test/sparse/linkage.cu +++ b/cpp/test/cluster/linkage.cu @@ -180,20 +180,21 @@ class LinkageTest : public ::testing::TestWithParam> { raft::handle_t handle; - auto data_view = - raft::make_device_matrix_view(data.data(), params.n_row, params.n_col); + auto data_view = raft::make_device_matrix_view( + data.data(), params.n_row, params.n_col); auto dendrogram_view = raft::make_device_matrix_view(out_children.data(), params.n_row, 2); auto labels_view = raft::make_device_vector_view(labels.data(), params.n_row); - raft::cluster::single_linkage( - handle, - data_view, - dendrogram_view, - labels_view, - raft::distance::DistanceType::L2SqrtExpanded, - params.n_clusters, - std::make_optional(params.c)); + raft::cluster::hierarchy:: + single_linkage( + handle, + data_view, + dendrogram_view, + labels_view, + raft::distance::DistanceType::L2SqrtExpanded, + params.n_clusters, + std::make_optional(params.c)); handle.sync_stream(stream); diff --git a/cpp/test/cluster_solvers_deprecated.cu b/cpp/test/cluster_solvers_deprecated.cu index 1e9ec0c15b..167a710b34 100644 --- a/cpp/test/cluster_solvers_deprecated.cu +++ b/cpp/test/cluster_solvers_deprecated.cu @@ -20,7 +20,6 @@ #include #include -#include namespace raft { namespace spectral { @@ -54,52 +53,5 @@ TEST(Raft, ClusterSolvers) EXPECT_ANY_THROW(cluster_solver.solve(h, n, d, eigvecs, codes)); } -TEST(Raft, ModularitySolvers) -{ - using namespace matrix; - using index_type = int; - using value_type = double; - - handle_t h; - ASSERT_EQ(0, - h. - - get_device() - - ); - - index_type neigvs{10}; - index_type maxiter{100}; - index_type restart_iter{10}; - value_type tol{1.0e-10}; - bool reorthog{true}; - - // nullptr expected to trigger exceptions: - // - index_type* clusters{nullptr}; - value_type* eigvals{nullptr}; - value_type* eigvecs{nullptr}; - - unsigned long long seed{100110021003}; - - eigen_solver_config_t eig_cfg{ - neigvs, maxiter, restart_iter, tol, reorthog, seed}; - lanczos_solver_t eig_solver{eig_cfg}; - - index_type k{5}; - - cluster_solver_config_deprecated_t clust_cfg{k, maxiter, tol, seed}; - kmeans_solver_deprecated_t cluster_solver{clust_cfg}; - - auto stream = h.get_stream(); - sparse_matrix_t sm{h, nullptr, nullptr, nullptr, 0, 0}; - - EXPECT_ANY_THROW(spectral::modularity_maximization( - h, sm, eig_solver, cluster_solver, clusters, eigvals, eigvecs)); - - value_type modularity{0}; - EXPECT_ANY_THROW(spectral::analyzeModularity(h, sm, k, clusters, modularity)); -} - } // namespace spectral } // namespace raft diff --git a/docs/source/cpp_api.rst b/docs/source/cpp_api.rst index db139031a2..d10d9773a5 100644 --- a/docs/source/cpp_api.rst +++ b/docs/source/cpp_api.rst @@ -9,11 +9,12 @@ RAFT C++ API Reference :maxdepth: 4 cpp_api/core.rst - cpp_api/clustering.rst + cpp_api/cluster.rst + cpp_api/distance.rst cpp_api/linalg.rst cpp_api/matrix.rst - cpp_api/optimization.rst + cpp_api/neighbors.rst + cpp_api/solver.rst cpp_api/random.rst - cpp_api/spatial.rst cpp_api/sparse.rst cpp_api/stats.rst \ No newline at end of file diff --git a/docs/source/cpp_api/cluster.rst b/docs/source/cpp_api/cluster.rst new file mode 100644 index 0000000000..41816482cc --- /dev/null +++ b/docs/source/cpp_api/cluster.rst @@ -0,0 +1,27 @@ +Cluster +======= + +This page provides C++ class references for the publicly-exposed elements of the cluster package. + +K-Means +------- + +.. doxygennamespace:: raft::cluster::kmeans + :project: RAFT + :members: + + +Hierarchical Clustering +----------------------- + +.. doxygennamespace:: raft::cluster::hierarchy + :project: RAFT + :members: + + +Spectral Clustering +------------------- + +.. doxygennamespace:: raft::spectral + :project: RAFT + :members: \ No newline at end of file diff --git a/docs/source/cpp_api/clustering.rst b/docs/source/cpp_api/clustering.rst deleted file mode 100644 index 90ca786cc1..0000000000 --- a/docs/source/cpp_api/clustering.rst +++ /dev/null @@ -1,12 +0,0 @@ -Clustering -========== - -This page provides C++ class references for the publicly-exposed elements of the clustering package. - -.. doxygennamespace:: raft::cluster - :project: RAFT - :members: - -.. doxygennamespace:: raft::spectral - :project: RAFT - :members: \ No newline at end of file diff --git a/docs/source/cpp_api/core.rst b/docs/source/cpp_api/core.rst index ef6270556e..d4891bf0b3 100644 --- a/docs/source/cpp_api/core.rst +++ b/docs/source/cpp_api/core.rst @@ -4,7 +4,6 @@ Core This page provides C++ class references for the publicly-exposed elements of the core package. - handle_t ######## @@ -20,6 +19,13 @@ interruptible :project: RAFT :members: +NVTX +#### + +.. doxygennamespace:: raft::common::nvtx + :project: RAFT + :members: + mdarray ####### @@ -28,11 +34,64 @@ mdarray :project: RAFT :members: +.. doxygenclass:: raft::make_device_matrix + :project: RAFT + +.. doxygenclass:: raft::make_device_vector + :project: RAFT + +.. doxygenclass:: raft::make_device_scalar + :project: RAFT + +.. doxygenclass:: raft::make_host_matrix + :project: RAFT + +.. doxygenclass:: raft::make_host_vector + :project: RAFT + +.. doxygenclass:: raft::make_device_scalar + :project: RAFT + + +mdspan +####### + +.. doxygenfunction:: raft::make_device_mdspan + :project: RAFT + +.. doxygenfunction:: raft::make_device_matrix_view + :project: RAFT + +.. doxygenfunction:: raft::make_device_vector_view + :project: RAFT + +.. doxygenfunction:: raft::make_device_scalar_view + :project: RAFT + +.. doxygenfunction:: raft::make_host_matrix_view + :project: RAFT + +.. doxygenfunction:: raft::make_host_vector_view + :project: RAFT + +.. doxygenfunction:: raft::make_device_scalar_view + :project: RAFT span #### -.. doxygenclass:: raft::span +.. doxygenclass:: raft::device_span + :project: RAFT + :members: + +.. doxygenclass:: raft::host_span + :project: RAFT + :members: + +Key-Value Pair +############## + +.. doxygenclass:: raft::KeyValuePair :project: RAFT :members: diff --git a/docs/source/cpp_api/distance.rst b/docs/source/cpp_api/distance.rst new file mode 100644 index 0000000000..c2bce860d5 --- /dev/null +++ b/docs/source/cpp_api/distance.rst @@ -0,0 +1,10 @@ +Distance +======== + +This page provides C++ class references for the publicly-exposed elements of the distance package. + +Distance +######## + +.. doxygennamespace:: raft::distance + :project: RAFT diff --git a/docs/source/cpp_api/neighbors.rst b/docs/source/cpp_api/neighbors.rst new file mode 100644 index 0000000000..962bbd1efe --- /dev/null +++ b/docs/source/cpp_api/neighbors.rst @@ -0,0 +1,43 @@ +Neighbors +========= + +This page provides C++ class references for the publicly-exposed elements of the neighbors package. + + +Brute-force +----------- + +.. doxygennamespace:: raft::neighbors::brute_force + :project: RAFT + + +IVF-Flat +-------- + +.. doxygennamespace:: raft::neighbors::ivf_flat + :project: RAFT + :members: + + +IVF-PQ +-------- + +.. doxygennamespace:: raft::neighbors::ivf_pq + :project: RAFT + :members: + + +Epsilon Neighborhood +-------------------- + +.. doxygennamespace:: raft::neighbors::epsilon_neighborhood + :project: RAFT + :members: + + +Random Ball Cover +----------------- + +.. doxygennamespace:: raft::neighbors::ball_cover + :project: RAFT + :members: diff --git a/docs/source/cpp_api/optimization.rst b/docs/source/cpp_api/solver.rst similarity index 73% rename from docs/source/cpp_api/optimization.rst rename to docs/source/cpp_api/solver.rst index 75cec2494e..a8b93ca046 100644 --- a/docs/source/cpp_api/optimization.rst +++ b/docs/source/cpp_api/solver.rst @@ -7,13 +7,12 @@ This page provides C++ class references for the publicly-exposed elements of the Linear Assignment Problem ######################### -.. doxygenclass:: raft::lap::LinearAssignmentProblem +.. doxygenclass:: raft::solver::LinearAssignmentProblem :project: RAFT :members: Minimum Spanning Tree ##################### -.. doxygennamespace:: raft::mst +.. doxygenfunction:: raft::sparse::solver::mst :project: RAFT - :members: diff --git a/docs/source/cpp_api/spatial.rst b/docs/source/cpp_api/spatial.rst deleted file mode 100644 index 9bda00dab7..0000000000 --- a/docs/source/cpp_api/spatial.rst +++ /dev/null @@ -1,31 +0,0 @@ -Spatial -======= - -This page provides C++ class references for the publicly-exposed elements of the spatial package. - -Distance -######## - -.. doxygennamespace:: raft::distance - :project: RAFT - - -Nearest Neighbors -################# - -.. doxygenfunction:: raft::spatial::knn::brute_force_knn - :project: RAFT - -.. doxygenfunction:: raft::spatial::knn::select_k - :project: RAFT - -.. doxygenfunction:: raft::spatial::knn::knn_merge_parts - :project: RAFT - - -IVF-Flat --------- - -.. doxygennamespace:: raft::spatial::knn::ivf_flat - :project: RAFT - :members: