From 883644c79a83e1285cd0ac791775939454712150 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 16 Jan 2024 13:56:40 -0800 Subject: [PATCH 01/32] add SG support for dropping multi-edges through the CAPI --- .../cugraph/structure/graph_implementation/simpleGraph.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 22d82eb1796..19cc937a6c7 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -298,7 +298,8 @@ def __from_edgelist( self._replicate_edgelist() self._make_plc_graph( - value_col=value_col, store_transposed=store_transposed, renumber=renumber + value_col=value_col, store_transposed=store_transposed, + renumber=renumber, drop_multi_edges=self.properties.multi_edge, ) def to_pandas_edgelist( @@ -1084,6 +1085,7 @@ def _make_plc_graph( value_col: Dict[str, cudf.DataFrame] = None, store_transposed: bool = False, renumber: bool = True, + drop_multi_edges: bool = False, ): """ Parameters @@ -1163,6 +1165,7 @@ def _make_plc_graph( renumber=renumber, do_expensive_check=True, input_array_format=input_array_format, + drop_multi_edges=drop_multi_edges, ) def to_directed(self, DiG, store_transposed=False): From 611394618acfad71158ea98c04789f0595c25720 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 16 Jan 2024 17:04:35 -0800 Subject: [PATCH 02/32] add MG support for dropping multi-edges and deprecaate parameter --- .../graph_implementation/simpleDistributedGraph.py | 7 +++++-- .../structure/graph_implementation/simpleGraph.py | 6 ++++-- python/cugraph/cugraph/structure/symmetrize.py | 14 ++++++++++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index 319435575cc..762a9c7d83b 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -97,6 +97,7 @@ def _make_plc_graph( weight_type, edge_id_type, edge_type_id, + drop_multi_edges, ): weights = None edge_ids = None @@ -151,6 +152,7 @@ def _make_plc_graph( num_arrays=num_arrays, store_transposed=store_transposed, do_expensive_check=False, + drop_multi_edges=drop_multi_edges, ) del edata_x gc.collect() @@ -269,7 +271,7 @@ def __from_edgelist( input_ddf, source, destination, - multi=self.properties.multi_edge, + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) value_col = None @@ -279,7 +281,7 @@ def __from_edgelist( source, destination, value_col_names, - multi=self.properties.multi_edge, + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -366,6 +368,7 @@ def __from_edgelist( self.weight_type, self.edge_id_type, self.edge_type_id_type, + self.properties.multi_edge, ) for w, edata in persisted_keys_d.items() } diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 19cc937a6c7..9f7b3454a8a 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -264,7 +264,7 @@ def __from_edgelist( source, destination, edge_attr, - multi=self.properties.multi_edge, + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -279,7 +279,7 @@ def __from_edgelist( elist, source, destination, - multi=self.properties.multi_edge, + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -1102,6 +1102,8 @@ def _make_plc_graph( Whether to renumber the vertices of the graph. Required if inputted vertex ids are not of int32 or int64 type. + drop_multi_edges: bool (default=False) + Whether to drop multi edges """ if value_col is None: diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index b324ff65834..be0db9d71ea 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -15,6 +15,7 @@ import cudf import dask_cudf from dask.distributed import default_client +import warnings def symmetrize_df( @@ -54,6 +55,7 @@ def symmetrize_df( Name of the column in the data frame containing the weight ids multi : bool, optional (default=False) + Deprecated. Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -84,6 +86,11 @@ def symmetrize_df( if multi: return result else: + warnings.warn( + "'multi' is deprecated as the removal of multi edges is " + "only supported when creating a multi-graph", + FutureWarning, + ) vertex_col_name = src_name + dst_name result = result.groupby(by=[*vertex_col_name], as_index=False).min() return result @@ -128,6 +135,7 @@ def symmetrize_ddf( Name of the column in the data frame containing the weight ids multi : bool, optional (default=False) + Deprecated. Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -167,6 +175,11 @@ def symmetrize_ddf( if multi: return result else: + warnings.warn( + "'multi' is deprecated as the removal of multi edges is " + "only supported when creating a multi-graph", + FutureWarning, + ) vertex_col_name = src_name + dst_name result = _memory_efficient_drop_duplicates( result, vertex_col_name, len(workers) @@ -208,6 +221,7 @@ def symmetrize( weights column name. multi : bool, optional (default=False) + Deprecated. Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. From f92d936d46ec228d52424aa50394916db184b866 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 16 Jan 2024 17:05:54 -0800 Subject: [PATCH 03/32] fix style --- .../graph_implementation/simpleDistributedGraph.py | 4 ++-- .../structure/graph_implementation/simpleGraph.py | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index 762a9c7d83b..b71a86d7117 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -271,7 +271,7 @@ def __from_edgelist( input_ddf, source, destination, - multi=True, # Deprecated parameter + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) value_col = None @@ -281,7 +281,7 @@ def __from_edgelist( source, destination, value_col_names, - multi=True, # Deprecated parameter + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 9f7b3454a8a..09cec8b712d 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -264,7 +264,7 @@ def __from_edgelist( source, destination, edge_attr, - multi=True, # Deprecated parameter + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -279,7 +279,7 @@ def __from_edgelist( elist, source, destination, - multi=True, # Deprecated parameter + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -298,8 +298,10 @@ def __from_edgelist( self._replicate_edgelist() self._make_plc_graph( - value_col=value_col, store_transposed=store_transposed, - renumber=renumber, drop_multi_edges=self.properties.multi_edge, + value_col=value_col, + store_transposed=store_transposed, + renumber=renumber, + drop_multi_edges=self.properties.multi_edge, ) def to_pandas_edgelist( From 64ec680a378f0a4e9c39768634ab33005aca54fb Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 16 Jan 2024 17:11:43 -0800 Subject: [PATCH 04/32] fix copyright --- .../structure/graph_implementation/simpleDistributedGraph.py | 2 +- .../cugraph/structure/graph_implementation/simpleGraph.py | 2 +- python/cugraph/cugraph/structure/symmetrize.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index b71a86d7117..6b95d4d30ab 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 09cec8b712d..052fdef19d7 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index be0db9d71ea..aa84e38f967 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at From 0442e532c3d064f36f4a01cb58c4685b89aa6594 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 24 Jan 2024 11:14:45 -0800 Subject: [PATCH 05/32] fix typo --- .../cugraph/structure/graph_implementation/simpleGraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 052fdef19d7..3239d8a8d45 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -301,7 +301,7 @@ def __from_edgelist( value_col=value_col, store_transposed=store_transposed, renumber=renumber, - drop_multi_edges=self.properties.multi_edge, + drop_multi_edges=not self.properties.multi_edge, ) def to_pandas_edgelist( From fd980393f4f9dcc42f7ce8a56b4a860b70abf680 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 24 Jan 2024 11:20:42 -0800 Subject: [PATCH 06/32] fix typo --- .../structure/graph_implementation/simpleDistributedGraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index ed3824d708b..d7110b90c18 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -366,7 +366,7 @@ def __from_edgelist( self.weight_type, self.edge_id_type, self.edge_type_id_type, - self.properties.multi_edge, + not self.properties.multi_edge, ) for w, edata in persisted_keys_d.items() } From 1d87370e17285b2669981dafdf0c110d4f44ca27 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 24 Jan 2024 11:22:50 -0800 Subject: [PATCH 07/32] reorder arguments --- python/pylibcugraph/pylibcugraph/graphs.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pylibcugraph/pylibcugraph/graphs.pyx b/python/pylibcugraph/pylibcugraph/graphs.pyx index 76ad7690840..def47390ce5 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pyx +++ b/python/pylibcugraph/pylibcugraph/graphs.pyx @@ -463,9 +463,9 @@ cdef class MGGraph(_GPUGraph): edge_type_view_ptr_ptr, store_transposed, num_arrays, - do_expensive_check, drop_self_loops, drop_multi_edges, + do_expensive_check, &(self.c_graph_ptr), &error_ptr) From c4d95807e19ddbfa0294231efa86ff2f786c0d64 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 24 Jan 2024 12:00:16 -0800 Subject: [PATCH 08/32] add 'do_expensive_check' --- python/cugraph/cugraph/structure/symmetrize.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index aa84e38f967..9ac82084fb5 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -194,6 +194,7 @@ def symmetrize( value_col_name=None, multi=False, symmetrize=True, + do_expensive_check = False, ): """ Take a dataframe of source destination pairs along with associated @@ -248,8 +249,9 @@ def symmetrize( if "edge_id" in input_df.columns and symmetrize: raise ValueError("Edge IDs are not supported on undirected graphs") - csg.null_check(input_df[source_col_name]) - csg.null_check(input_df[dest_col_name]) + if do_expensive_check: # FIXME: Optimize this check as it is currently expensive + csg.null_check(input_df[source_col_name]) + csg.null_check(input_df[dest_col_name]) if isinstance(input_df, dask_cudf.DataFrame): output_df = symmetrize_ddf( From ad62f3342bb7e995b4d81b03f2c1ead4017e0e0d Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 24 Jan 2024 12:05:37 -0800 Subject: [PATCH 09/32] fix style --- python/cugraph/cugraph/structure/symmetrize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index 9ac82084fb5..b227f22027b 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -194,7 +194,7 @@ def symmetrize( value_col_name=None, multi=False, symmetrize=True, - do_expensive_check = False, + do_expensive_check=False, ): """ Take a dataframe of source destination pairs along with associated @@ -249,7 +249,7 @@ def symmetrize( if "edge_id" in input_df.columns and symmetrize: raise ValueError("Edge IDs are not supported on undirected graphs") - if do_expensive_check: # FIXME: Optimize this check as it is currently expensive + if do_expensive_check: # FIXME: Optimize this check as it is currently expensive csg.null_check(input_df[source_col_name]) csg.null_check(input_df[dest_col_name]) From 779bd2dc4a71937038ef076949940e1418baae29 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 24 Jan 2024 15:21:44 -0800 Subject: [PATCH 10/32] update graph creation warning description --- python/cugraph/cugraph/structure/symmetrize.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index b227f22027b..307982c7ce7 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -87,8 +87,9 @@ def symmetrize_df( return result else: warnings.warn( - "'multi' is deprecated as the removal of multi edges is " - "only supported when creating a multi-graph", + "Multi is deprecated and the removal of multi edges will no longer be " + "supported from 'symmetrize'. Multi edges will be removed upon creation " + "of graph instance.", FutureWarning, ) vertex_col_name = src_name + dst_name @@ -176,8 +177,9 @@ def symmetrize_ddf( return result else: warnings.warn( - "'multi' is deprecated as the removal of multi edges is " - "only supported when creating a multi-graph", + "Multi is deprecated and the removal of multi edges will no longer be " + "supported from 'symmetrize'. Multi edges will be removed upon creation " + "of graph instance.", FutureWarning, ) vertex_col_name = src_name + dst_name From 5900bd8dfb4d709a55575de48155c96046509882 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 26 Jan 2024 08:52:49 -0800 Subject: [PATCH 11/32] update docstrings --- python/cugraph/cugraph/structure/symmetrize.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index 307982c7ce7..effb0a421a2 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -55,7 +55,11 @@ def symmetrize_df( Name of the column in the data frame containing the weight ids multi : bool, optional (default=False) - Deprecated. + [Deprecated, Multi will be removed in future version, and the removal + of multi edges will no longer be supported from 'symmetrize'. + Multi edges will be removed upon creation of graph instance directly + based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -136,7 +140,11 @@ def symmetrize_ddf( Name of the column in the data frame containing the weight ids multi : bool, optional (default=False) - Deprecated. + [Deprecated, Multi will be removed in future version, and the removal + of multi edges will no longer be supported from 'symmetrize'. + Multi edges will be removed upon creation of graph instance directly + based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -224,7 +232,11 @@ def symmetrize( weights column name. multi : bool, optional (default=False) - Deprecated. + [Deprecated, Multi will be removed in future version, and the removal + of multi edges will no longer be supported from 'symmetrize'. + Multi edges will be removed upon creation of graph instance directly + based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. From 705788fe787b04c7d15a81d0276d4e6f00317582 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 26 Jan 2024 09:00:32 -0800 Subject: [PATCH 12/32] fix style --- python/cugraph/cugraph/structure/symmetrize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index effb0a421a2..53d3f6ff913 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -59,7 +59,7 @@ def symmetrize_df( of multi edges will no longer be supported from 'symmetrize'. Multi edges will be removed upon creation of graph instance directly based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] - + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -144,7 +144,7 @@ def symmetrize_ddf( of multi edges will no longer be supported from 'symmetrize'. Multi edges will be removed upon creation of graph instance directly based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] - + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -236,7 +236,7 @@ def symmetrize( of multi edges will no longer be supported from 'symmetrize'. Multi edges will be removed upon creation of graph instance directly based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] - + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. From 5fa81c4015dffff09b5ea6a4d90a48bdfebb1836 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 30 Jan 2024 08:52:07 -0800 Subject: [PATCH 13/32] drop duplicates when viewing the edgelist --- .../structure/graph_implementation/simpleGraph.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 3239d8a8d45..24e94e300d2 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -480,6 +480,10 @@ def view_edge_list(self): edgelist_df[simpleGraphImpl.srcCol] <= edgelist_df[simpleGraphImpl.dstCol] ] + # FIXME: Drop multi edges with the CAPI instead. + vertex_col_name = [simpleGraphImpl.srcCol, simpleGraphImpl.dstCol] + edgelist_df = edgelist_df.groupby( + by=[*vertex_col_name], as_index=False).min() elif not use_initial_input_df and self.properties.renumbered: # Do not unrenumber the vertices if the initial input df was used if not self.properties.directed: @@ -487,6 +491,11 @@ def view_edge_list(self): edgelist_df[simpleGraphImpl.srcCol] <= edgelist_df[simpleGraphImpl.dstCol] ] + + # FIXME: Drop multi edges with the CAPI instead. + vertex_col_name = simpleGraphImpl.srcCol + simpleGraphImpl.dstCol + edgelist_df = edgelist_df.groupby( + by=[*vertex_col_name], as_index=False).min() edgelist_df = self.renumber_map.unrenumber( edgelist_df, simpleGraphImpl.srcCol ) From c85f62241052e570586fb0afe9f321366c2c69e5 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 30 Jan 2024 14:23:01 -0800 Subject: [PATCH 14/32] drop duplicates when viewing edges and update tests --- .../structure/graph_implementation/simpleGraph.py | 15 +++++++-------- .../tests/community/test_subgraph_extraction.py | 14 ++++++++++---- .../tests/data_store/test_property_graph.py | 10 ++++++++-- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 24e94e300d2..147f4abd858 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -480,10 +480,7 @@ def view_edge_list(self): edgelist_df[simpleGraphImpl.srcCol] <= edgelist_df[simpleGraphImpl.dstCol] ] - # FIXME: Drop multi edges with the CAPI instead. - vertex_col_name = [simpleGraphImpl.srcCol, simpleGraphImpl.dstCol] - edgelist_df = edgelist_df.groupby( - by=[*vertex_col_name], as_index=False).min() + elif not use_initial_input_df and self.properties.renumbered: # Do not unrenumber the vertices if the initial input df was used if not self.properties.directed: @@ -492,10 +489,6 @@ def view_edge_list(self): <= edgelist_df[simpleGraphImpl.dstCol] ] - # FIXME: Drop multi edges with the CAPI instead. - vertex_col_name = simpleGraphImpl.srcCol + simpleGraphImpl.dstCol - edgelist_df = edgelist_df.groupby( - by=[*vertex_col_name], as_index=False).min() edgelist_df = self.renumber_map.unrenumber( edgelist_df, simpleGraphImpl.srcCol ) @@ -518,6 +511,12 @@ def view_edge_list(self): simpleGraphImpl.dstCol: dstCol, } ) + if not self.properties.multi_edge: + # Drop parallel edges for non MultiGraph + # FIXME: Drop multi edges with the CAPI instead. + vertex_col_name = [srcCol, dstCol] + edgelist_df = edgelist_df.groupby( + by=[*vertex_col_name], as_index=False).min() # FIXME: When renumbered, the MG API uses renumbered col names which # is not consistant with the SG API. diff --git a/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py b/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py index 8abab3179fe..8bd3b70b70b 100644 --- a/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py +++ b/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py @@ -32,7 +32,9 @@ def setup_function(): ############################################################################### def compare_edges(cg, nxg): edgelist_df = cg.view_edge_list() - + + print("len'edgelist_df' = ", len(edgelist_df), " nxg' = ", nxg.size()) + print("edgelist_df = \n", edgelist_df) assert len(edgelist_df) == nxg.size() for i in range(len(edgelist_df)): assert nxg.has_edge(edgelist_df["src"].iloc[i], edgelist_df["dst"].iloc[i]) @@ -41,6 +43,7 @@ def compare_edges(cg, nxg): def cugraph_call(M, verts, directed=True): # cugraph can be compared to nx graph of same type. + print("directed = ", directed) G = cugraph.Graph(directed=directed) cu_M = cudf.from_pandas(M) @@ -50,6 +53,9 @@ def cugraph_call(M, verts, directed=True): # which calls renumbering G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="weight") + print("input_df = ", len(G.input_df)) + print("edgelist.edgelist_df = ", len(G.edgelist.edgelist_df)) + cu_verts = cudf.Series(verts) return cugraph.subgraph(G, cu_verts) @@ -57,7 +63,7 @@ def cugraph_call(M, verts, directed=True): def nx_call(M, verts, directed=True): if directed: G = nx.from_pandas_edgelist( - M, source="0", target="1", create_using=nx.DiGraph() + M, source="0", target="1", create_using=nx.MultiGraph() ) else: G = nx.from_pandas_edgelist(M, source="0", target="1", create_using=nx.Graph()) @@ -81,7 +87,7 @@ def test_subgraph_extraction_DiGraph(graph_file): @pytest.mark.sg @pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) -def test_subgraph_extraction_Graph(graph_file): +def test_subgraph_extraction_Graph_0(graph_file): dataset_path = graph_file.get_path() M = utils.read_csv_for_nx(dataset_path) verts = np.zeros(3, dtype=np.int32) @@ -94,7 +100,7 @@ def test_subgraph_extraction_Graph(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", [DEFAULT_DATASETS[2]]) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_subgraph_extraction_Graph_nx(graph_file): directed = False verts = np.zeros(3, dtype=np.int32) diff --git a/python/cugraph/cugraph/tests/data_store/test_property_graph.py b/python/cugraph/cugraph/tests/data_store/test_property_graph.py index a33d4f753db..110618c7b11 100644 --- a/python/cugraph/cugraph/tests/data_store/test_property_graph.py +++ b/python/cugraph/cugraph/tests/data_store/test_property_graph.py @@ -25,6 +25,7 @@ from cugraph.datasets import cyber from cudf.testing import assert_frame_equal, assert_series_equal from pylibcugraph.testing.utils import gen_fixture_params_product +from cugraph.structure.symmetrize import _memory_efficient_drop_duplicates # If the rapids-pytest-benchmark plugin is installed, the "gpubenchmark" @@ -1176,10 +1177,12 @@ def test_extract_subgraph_vertex_prop_condition_only( ) # Should result in two edges, one a "relationship", the other a "referral" expected_edgelist = cudf.DataFrame( - {"src": [89216, 78634], "dst": [78634, 89216], "weights": [99, 8]} + {"src": [78634, 89216], "dst": [89216, 78634], "weights": [8, 99]} ) if G.renumbered: + # FIXME: Can only use the attribute 'edgelist.edgelist_df' for directed + # graphs actual_edgelist = G.unrenumber( G.edgelist.edgelist_df, "src", preserve_order=True ) @@ -1454,6 +1457,9 @@ def test_extract_subgraph_no_edges(dataset1_PropertyGraph, as_pg_first): def test_extract_subgraph_no_query(dataset1_PropertyGraph, as_pg_first): """ Call extract with no args, should result in the entire property graph. + + This test is no longer valid because parallel edges are dropped at + the plc graph creation. """ (pG, data) = dataset1_PropertyGraph @@ -1471,10 +1477,10 @@ def test_extract_subgraph_no_query(dataset1_PropertyGraph, as_pg_first): ) # referrals has 3 edges with the same src/dst, so subtract 2 from # the total count since this is not creating a multigraph.. - num_edges -= 2 assert len(G.edgelist.edgelist_df) == num_edges + @pytest.mark.sg def test_extract_subgraph_multi_edges(dataset1_PropertyGraph): """ From 8f7436e67be756fe35c3394969dfc1ac8ee66de4 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 30 Jan 2024 14:25:31 -0800 Subject: [PATCH 15/32] fix style --- .../cugraph/structure/graph_implementation/simpleGraph.py | 7 ++++--- .../cugraph/tests/community/test_subgraph_extraction.py | 2 +- .../cugraph/tests/data_store/test_property_graph.py | 1 - 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 147f4abd858..c2ef7e37003 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -480,7 +480,7 @@ def view_edge_list(self): edgelist_df[simpleGraphImpl.srcCol] <= edgelist_df[simpleGraphImpl.dstCol] ] - + elif not use_initial_input_df and self.properties.renumbered: # Do not unrenumber the vertices if the initial input df was used if not self.properties.directed: @@ -488,7 +488,7 @@ def view_edge_list(self): edgelist_df[simpleGraphImpl.srcCol] <= edgelist_df[simpleGraphImpl.dstCol] ] - + edgelist_df = self.renumber_map.unrenumber( edgelist_df, simpleGraphImpl.srcCol ) @@ -516,7 +516,8 @@ def view_edge_list(self): # FIXME: Drop multi edges with the CAPI instead. vertex_col_name = [srcCol, dstCol] edgelist_df = edgelist_df.groupby( - by=[*vertex_col_name], as_index=False).min() + by=[*vertex_col_name], as_index=False + ).min() # FIXME: When renumbered, the MG API uses renumbered col names which # is not consistant with the SG API. diff --git a/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py b/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py index 8bd3b70b70b..720a39e03d5 100644 --- a/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py +++ b/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py @@ -32,7 +32,7 @@ def setup_function(): ############################################################################### def compare_edges(cg, nxg): edgelist_df = cg.view_edge_list() - + print("len'edgelist_df' = ", len(edgelist_df), " nxg' = ", nxg.size()) print("edgelist_df = \n", edgelist_df) assert len(edgelist_df) == nxg.size() diff --git a/python/cugraph/cugraph/tests/data_store/test_property_graph.py b/python/cugraph/cugraph/tests/data_store/test_property_graph.py index 110618c7b11..2f2a629b399 100644 --- a/python/cugraph/cugraph/tests/data_store/test_property_graph.py +++ b/python/cugraph/cugraph/tests/data_store/test_property_graph.py @@ -1480,7 +1480,6 @@ def test_extract_subgraph_no_query(dataset1_PropertyGraph, as_pg_first): assert len(G.edgelist.edgelist_df) == num_edges - @pytest.mark.sg def test_extract_subgraph_multi_edges(dataset1_PropertyGraph): """ From 7c165e70fb601228604b8553d9006b5a3cd87294 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 30 Jan 2024 14:28:06 -0800 Subject: [PATCH 16/32] remove debug print --- .../cugraph/tests/community/test_subgraph_extraction.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py b/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py index 720a39e03d5..16c8f895a0c 100644 --- a/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py +++ b/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py @@ -33,8 +33,6 @@ def setup_function(): def compare_edges(cg, nxg): edgelist_df = cg.view_edge_list() - print("len'edgelist_df' = ", len(edgelist_df), " nxg' = ", nxg.size()) - print("edgelist_df = \n", edgelist_df) assert len(edgelist_df) == nxg.size() for i in range(len(edgelist_df)): assert nxg.has_edge(edgelist_df["src"].iloc[i], edgelist_df["dst"].iloc[i]) @@ -43,7 +41,6 @@ def compare_edges(cg, nxg): def cugraph_call(M, verts, directed=True): # cugraph can be compared to nx graph of same type. - print("directed = ", directed) G = cugraph.Graph(directed=directed) cu_M = cudf.from_pandas(M) @@ -53,9 +50,6 @@ def cugraph_call(M, verts, directed=True): # which calls renumbering G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="weight") - print("input_df = ", len(G.input_df)) - print("edgelist.edgelist_df = ", len(G.edgelist.edgelist_df)) - cu_verts = cudf.Series(verts) return cugraph.subgraph(G, cu_verts) From fc5e6278d5e3265b27c870dc4b5570e26689ce13 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 30 Jan 2024 14:33:05 -0800 Subject: [PATCH 17/32] remove unused import --- python/cugraph/cugraph/tests/data_store/test_property_graph.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cugraph/cugraph/tests/data_store/test_property_graph.py b/python/cugraph/cugraph/tests/data_store/test_property_graph.py index 2f2a629b399..af877f1261f 100644 --- a/python/cugraph/cugraph/tests/data_store/test_property_graph.py +++ b/python/cugraph/cugraph/tests/data_store/test_property_graph.py @@ -25,7 +25,6 @@ from cugraph.datasets import cyber from cudf.testing import assert_frame_equal, assert_series_equal from pylibcugraph.testing.utils import gen_fixture_params_product -from cugraph.structure.symmetrize import _memory_efficient_drop_duplicates # If the rapids-pytest-benchmark plugin is installed, the "gpubenchmark" From f3f1c3f4f1bf232d709808f377d8c321f65a964c Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 30 Jan 2024 21:25:09 -0800 Subject: [PATCH 18/32] undo changes to test --- .../cugraph/tests/community/test_subgraph_extraction.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py b/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py index 16c8f895a0c..8abab3179fe 100644 --- a/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py +++ b/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py @@ -57,7 +57,7 @@ def cugraph_call(M, verts, directed=True): def nx_call(M, verts, directed=True): if directed: G = nx.from_pandas_edgelist( - M, source="0", target="1", create_using=nx.MultiGraph() + M, source="0", target="1", create_using=nx.DiGraph() ) else: G = nx.from_pandas_edgelist(M, source="0", target="1", create_using=nx.Graph()) @@ -81,7 +81,7 @@ def test_subgraph_extraction_DiGraph(graph_file): @pytest.mark.sg @pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) -def test_subgraph_extraction_Graph_0(graph_file): +def test_subgraph_extraction_Graph(graph_file): dataset_path = graph_file.get_path() M = utils.read_csv_for_nx(dataset_path) verts = np.zeros(3, dtype=np.int32) @@ -94,7 +94,7 @@ def test_subgraph_extraction_Graph_0(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) +@pytest.mark.parametrize("graph_file", [DEFAULT_DATASETS[2]]) def test_subgraph_extraction_Graph_nx(graph_file): directed = False verts = np.zeros(3, dtype=np.int32) From 77fe0f571fde8653d84b6a8b0f43920c3db6fa23 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 30 Jan 2024 21:26:00 -0800 Subject: [PATCH 19/32] drop duplicate edges, update tests and copyright --- .../graph_implementation/simpleGraph.py | 21 +++++++++++++++++-- .../tests/data_store/test_property_graph.py | 2 +- .../cugraph/tests/structure/test_graph.py | 8 +++++-- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index c2ef7e37003..26978a10dbf 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -418,9 +418,10 @@ def view_edge_list(self): then containing the weight value for each edge """ if self.edgelist is None: + print("edgelist is None") src, dst, weights = graph_primtypes_wrapper.view_edge_list(self) self.edgelist = self.EdgeList(src, dst, weights) - + srcCol = self.source_columns dstCol = self.destination_columns """ @@ -512,9 +513,15 @@ def view_edge_list(self): } ) if not self.properties.multi_edge: + if type(self.source_columns) is list and \ + type(self.destination_columns) is list: + vertex_col_name = srcCol + dstCol + + else: + vertex_col_name = [srcCol, dstCol] + # Drop parallel edges for non MultiGraph # FIXME: Drop multi edges with the CAPI instead. - vertex_col_name = [srcCol, dstCol] edgelist_df = edgelist_df.groupby( by=[*vertex_col_name], as_index=False ).min() @@ -1322,6 +1329,16 @@ def neighbors(self, n): n = node[0] df = self.edgelist.edgelist_df + + vertex_col_name = [simpleGraphImpl.srcCol, simpleGraphImpl.dstCol] + + if not self.properties.multi_edge: + # Drop parallel edges for non MultiGraph + # FIXME: Drop multi edges with the CAPI instead. + df = df.groupby( + by=[*vertex_col_name], as_index=False + ).min() + neighbors = df[df[simpleGraphImpl.srcCol] == n][ simpleGraphImpl.dstCol ].reset_index(drop=True) diff --git a/python/cugraph/cugraph/tests/data_store/test_property_graph.py b/python/cugraph/cugraph/tests/data_store/test_property_graph.py index af877f1261f..a81ccf1f2ba 100644 --- a/python/cugraph/cugraph/tests/data_store/test_property_graph.py +++ b/python/cugraph/cugraph/tests/data_store/test_property_graph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/cugraph/tests/structure/test_graph.py b/python/cugraph/cugraph/tests/structure/test_graph.py index de306309ca4..352f9cca173 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph.py +++ b/python/cugraph/cugraph/tests/structure/test_graph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -197,6 +197,7 @@ def test_add_adj_list_to_edge_list(graph_file): G.from_cudf_adjlist(offsets, indices, None) edgelist = G.view_edge_list() + edgelist = edgelist.sort_values(["src", "dst"]).reset_index(drop=True) sources_cu = edgelist["src"] destinations_cu = edgelist["dst"] compare_series(sources_cu, sources_exp) @@ -215,7 +216,7 @@ def test_view_edge_list_from_adj_list(graph_file): indices = cudf.Series(Mcsr.indices) G = cugraph.Graph(directed=True) G.from_cudf_adjlist(offsets, indices, None) - edgelist_df = G.view_edge_list() + edgelist_df = G.view_edge_list().sort_values(["src", "dst"]).reset_index(drop=True) Mcoo = Mcsr.tocoo() src1 = Mcoo.row dst1 = Mcoo.col @@ -414,6 +415,7 @@ def test_consolidation(graph_file): cluster.close() +""" # Test @pytest.mark.sg @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @@ -430,6 +432,7 @@ def test_two_hop_neighbors(graph_file): find_two_paths(df, Mcsr) check_all_two_hops(df, Mcsr) +""" # Test @@ -672,6 +675,7 @@ def test_neighbors(graph_file): nx_neighbors = [i for i in Gnx.neighbors(n)] cu_neighbors.sort() nx_neighbors.sort() + assert cu_neighbors == nx_neighbors From 7db4a10398bed9d1879be0c22ab9fd8ec20c591a Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 30 Jan 2024 21:26:50 -0800 Subject: [PATCH 20/32] fix style --- .../structure/graph_implementation/simpleGraph.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 26978a10dbf..ee65acd6bcb 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -421,7 +421,7 @@ def view_edge_list(self): print("edgelist is None") src, dst, weights = graph_primtypes_wrapper.view_edge_list(self) self.edgelist = self.EdgeList(src, dst, weights) - + srcCol = self.source_columns dstCol = self.destination_columns """ @@ -513,8 +513,10 @@ def view_edge_list(self): } ) if not self.properties.multi_edge: - if type(self.source_columns) is list and \ - type(self.destination_columns) is list: + if ( + type(self.source_columns) is list + and type(self.destination_columns) is list + ): vertex_col_name = srcCol + dstCol else: @@ -1335,9 +1337,7 @@ def neighbors(self, n): if not self.properties.multi_edge: # Drop parallel edges for non MultiGraph # FIXME: Drop multi edges with the CAPI instead. - df = df.groupby( - by=[*vertex_col_name], as_index=False - ).min() + df = df.groupby(by=[*vertex_col_name], as_index=False).min() neighbors = df[df[simpleGraphImpl.srcCol] == n][ simpleGraphImpl.dstCol From b3077cf2c08e130fe6bb0ed7072b1057da13db1a Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 30 Jan 2024 21:30:11 -0800 Subject: [PATCH 21/32] uncommment test --- python/cugraph/cugraph/tests/structure/test_graph.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cugraph/cugraph/tests/structure/test_graph.py b/python/cugraph/cugraph/tests/structure/test_graph.py index 352f9cca173..24521b38406 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph.py +++ b/python/cugraph/cugraph/tests/structure/test_graph.py @@ -415,7 +415,6 @@ def test_consolidation(graph_file): cluster.close() -""" # Test @pytest.mark.sg @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @@ -432,7 +431,6 @@ def test_two_hop_neighbors(graph_file): find_two_paths(df, Mcsr) check_all_two_hops(df, Mcsr) -""" # Test From bdcd215e5ebcaf729979e12b8640197ae26d8a47 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 30 Jan 2024 21:36:31 -0800 Subject: [PATCH 22/32] fix typo --- python/cugraph/cugraph/tests/data_store/test_property_graph.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/tests/data_store/test_property_graph.py b/python/cugraph/cugraph/tests/data_store/test_property_graph.py index a81ccf1f2ba..1a97a9ac949 100644 --- a/python/cugraph/cugraph/tests/data_store/test_property_graph.py +++ b/python/cugraph/cugraph/tests/data_store/test_property_graph.py @@ -1476,7 +1476,8 @@ def test_extract_subgraph_no_query(dataset1_PropertyGraph, as_pg_first): ) # referrals has 3 edges with the same src/dst, so subtract 2 from # the total count since this is not creating a multigraph.. - assert len(G.edgelist.edgelist_df) == num_edges + num_edges -= 2 + assert len(G.view_edge_list()) == num_edges @pytest.mark.sg From 2c6892a8b1d822a4fd06f8284e0e1079bc8cfe95 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 30 Jan 2024 21:37:16 -0800 Subject: [PATCH 23/32] remove outdated comment --- python/cugraph/cugraph/tests/data_store/test_property_graph.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cugraph/cugraph/tests/data_store/test_property_graph.py b/python/cugraph/cugraph/tests/data_store/test_property_graph.py index 1a97a9ac949..5a4aa3de373 100644 --- a/python/cugraph/cugraph/tests/data_store/test_property_graph.py +++ b/python/cugraph/cugraph/tests/data_store/test_property_graph.py @@ -1456,9 +1456,6 @@ def test_extract_subgraph_no_edges(dataset1_PropertyGraph, as_pg_first): def test_extract_subgraph_no_query(dataset1_PropertyGraph, as_pg_first): """ Call extract with no args, should result in the entire property graph. - - This test is no longer valid because parallel edges are dropped at - the plc graph creation. """ (pG, data) = dataset1_PropertyGraph From ff5373a0b5933b6ea51f132ffd206629644a8159 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 30 Jan 2024 21:41:09 -0800 Subject: [PATCH 24/32] remove debug print --- .../cugraph/structure/graph_implementation/simpleGraph.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index ee65acd6bcb..a5b28a6b27f 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -418,7 +418,6 @@ def view_edge_list(self): then containing the weight value for each edge """ if self.edgelist is None: - print("edgelist is None") src, dst, weights = graph_primtypes_wrapper.view_edge_list(self) self.edgelist = self.EdgeList(src, dst, weights) From 1efef766727e7ac2e04911fe9ecd8b8d4eca5815 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 31 Jan 2024 05:25:57 -0800 Subject: [PATCH 25/32] update tests --- .../cugraph/structure/graph_implementation/simpleGraph.py | 6 +++--- .../cugraph/tests/sampling/test_uniform_neighbor_sample.py | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index a5b28a6b27f..3f4988c368d 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -511,10 +511,10 @@ def view_edge_list(self): simpleGraphImpl.dstCol: dstCol, } ) - if not self.properties.multi_edge: + if not self.properties.multi_edge: if ( - type(self.source_columns) is list - and type(self.destination_columns) is list + type(srcCol) is list + and type(dstCol) is list ): vertex_col_name = srcCol + dstCol diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py index 206898088ab..5c4ea0f9318 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py @@ -146,7 +146,12 @@ def test_uniform_neighbor_sample_simple(input_combo): # Retrieve the input dataframe. # FIXME: in simpleGraph and simpleDistributedGraph, G.edgelist.edgelist_df # should be 'None' if the datasets was never renumbered + # FIXME: Drop multi edges with the CAPI instead. + vertex_col_name = ["src", "dst"] input_df = G.edgelist.edgelist_df + input_df = input_df.groupby( + by=[*vertex_col_name], as_index=False + ).min() result_nbr = uniform_neighbor_sample( G, From ebbd33fc625f93223fbd4156360fc829d78f4a73 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 31 Jan 2024 05:30:40 -0800 Subject: [PATCH 26/32] fix style --- .../cugraph/structure/graph_implementation/simpleGraph.py | 7 ++----- .../cugraph/tests/sampling/test_uniform_neighbor_sample.py | 4 +--- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 3f4988c368d..d0baefdbd0e 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -511,11 +511,8 @@ def view_edge_list(self): simpleGraphImpl.dstCol: dstCol, } ) - if not self.properties.multi_edge: - if ( - type(srcCol) is list - and type(dstCol) is list - ): + if not self.properties.multi_edge: + if type(srcCol) is list and type(dstCol) is list: vertex_col_name = srcCol + dstCol else: diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py index 5c4ea0f9318..5b9f2d2cde4 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py @@ -149,9 +149,7 @@ def test_uniform_neighbor_sample_simple(input_combo): # FIXME: Drop multi edges with the CAPI instead. vertex_col_name = ["src", "dst"] input_df = G.edgelist.edgelist_df - input_df = input_df.groupby( - by=[*vertex_col_name], as_index=False - ).min() + input_df = input_df.groupby(by=[*vertex_col_name], as_index=False).min() result_nbr = uniform_neighbor_sample( G, From ed76013d2c776e2cb3576865f432b9f0896ea34d Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 31 Jan 2024 06:13:02 -0800 Subject: [PATCH 27/32] update number of edges count --- .../structure/graph_implementation/simpleGraph.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index d0baefdbd0e..7a8072dafb1 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -844,6 +844,15 @@ def number_of_edges(self, directed_edges=False): """ Get the number of edges in the graph. """ + if not self.properties.multi_edge: + # + # Drop parallel edges for non MultiGraph + # FIXME: Drop multi edges with the CAPI instead. + if self.edgelist is not None: + self.edgelist.edgelist_df = self.edgelist.edgelist_df.groupby( + by=[simpleGraphImpl.srcCol, simpleGraphImpl.dstCol], + as_index=False + ).min() # TODO: Move to Outer graphs? if directed_edges and self.edgelist is not None: return len(self.edgelist.edgelist_df) From 890f885d6b40da05143abce8bab3e51b6830f3cb Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 31 Jan 2024 06:14:05 -0800 Subject: [PATCH 28/32] fix style --- .../cugraph/structure/graph_implementation/simpleGraph.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 7a8072dafb1..4009ab76c32 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -845,13 +845,12 @@ def number_of_edges(self, directed_edges=False): Get the number of edges in the graph. """ if not self.properties.multi_edge: - # + # # Drop parallel edges for non MultiGraph # FIXME: Drop multi edges with the CAPI instead. if self.edgelist is not None: self.edgelist.edgelist_df = self.edgelist.edgelist_df.groupby( - by=[simpleGraphImpl.srcCol, simpleGraphImpl.dstCol], - as_index=False + by=[simpleGraphImpl.srcCol, simpleGraphImpl.dstCol], as_index=False ).min() # TODO: Move to Outer graphs? if directed_edges and self.edgelist is not None: From ccc51892689046d539427c0c3968b70d4d1d955d Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 1 Feb 2024 04:31:56 -0800 Subject: [PATCH 29/32] reset changes to sg --- .../graph_implementation/simpleGraph.py | 32 ++----------------- .../tests/data_store/test_property_graph.py | 8 ++--- .../sampling/test_uniform_neighbor_sample.py | 3 -- .../cugraph/tests/structure/test_graph.py | 6 ++-- 4 files changed, 7 insertions(+), 42 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 4009ab76c32..308b4fcac68 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -264,7 +264,7 @@ def __from_edgelist( source, destination, edge_attr, - multi=True, # Deprecated parameter + multi=self.properties.multi_edge, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -279,7 +279,7 @@ def __from_edgelist( elist, source, destination, - multi=True, # Deprecated parameter + multi=self.properties.multi_edge, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -511,18 +511,6 @@ def view_edge_list(self): simpleGraphImpl.dstCol: dstCol, } ) - if not self.properties.multi_edge: - if type(srcCol) is list and type(dstCol) is list: - vertex_col_name = srcCol + dstCol - - else: - vertex_col_name = [srcCol, dstCol] - - # Drop parallel edges for non MultiGraph - # FIXME: Drop multi edges with the CAPI instead. - edgelist_df = edgelist_df.groupby( - by=[*vertex_col_name], as_index=False - ).min() # FIXME: When renumbered, the MG API uses renumbered col names which # is not consistant with the SG API. @@ -844,14 +832,6 @@ def number_of_edges(self, directed_edges=False): """ Get the number of edges in the graph. """ - if not self.properties.multi_edge: - # - # Drop parallel edges for non MultiGraph - # FIXME: Drop multi edges with the CAPI instead. - if self.edgelist is not None: - self.edgelist.edgelist_df = self.edgelist.edgelist_df.groupby( - by=[simpleGraphImpl.srcCol, simpleGraphImpl.dstCol], as_index=False - ).min() # TODO: Move to Outer graphs? if directed_edges and self.edgelist is not None: return len(self.edgelist.edgelist_df) @@ -1335,14 +1315,6 @@ def neighbors(self, n): n = node[0] df = self.edgelist.edgelist_df - - vertex_col_name = [simpleGraphImpl.srcCol, simpleGraphImpl.dstCol] - - if not self.properties.multi_edge: - # Drop parallel edges for non MultiGraph - # FIXME: Drop multi edges with the CAPI instead. - df = df.groupby(by=[*vertex_col_name], as_index=False).min() - neighbors = df[df[simpleGraphImpl.srcCol] == n][ simpleGraphImpl.dstCol ].reset_index(drop=True) diff --git a/python/cugraph/cugraph/tests/data_store/test_property_graph.py b/python/cugraph/cugraph/tests/data_store/test_property_graph.py index 5a4aa3de373..a33d4f753db 100644 --- a/python/cugraph/cugraph/tests/data_store/test_property_graph.py +++ b/python/cugraph/cugraph/tests/data_store/test_property_graph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -1176,12 +1176,10 @@ def test_extract_subgraph_vertex_prop_condition_only( ) # Should result in two edges, one a "relationship", the other a "referral" expected_edgelist = cudf.DataFrame( - {"src": [78634, 89216], "dst": [89216, 78634], "weights": [8, 99]} + {"src": [89216, 78634], "dst": [78634, 89216], "weights": [99, 8]} ) if G.renumbered: - # FIXME: Can only use the attribute 'edgelist.edgelist_df' for directed - # graphs actual_edgelist = G.unrenumber( G.edgelist.edgelist_df, "src", preserve_order=True ) @@ -1474,7 +1472,7 @@ def test_extract_subgraph_no_query(dataset1_PropertyGraph, as_pg_first): # referrals has 3 edges with the same src/dst, so subtract 2 from # the total count since this is not creating a multigraph.. num_edges -= 2 - assert len(G.view_edge_list()) == num_edges + assert len(G.edgelist.edgelist_df) == num_edges @pytest.mark.sg diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py index 5b9f2d2cde4..206898088ab 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py @@ -146,10 +146,7 @@ def test_uniform_neighbor_sample_simple(input_combo): # Retrieve the input dataframe. # FIXME: in simpleGraph and simpleDistributedGraph, G.edgelist.edgelist_df # should be 'None' if the datasets was never renumbered - # FIXME: Drop multi edges with the CAPI instead. - vertex_col_name = ["src", "dst"] input_df = G.edgelist.edgelist_df - input_df = input_df.groupby(by=[*vertex_col_name], as_index=False).min() result_nbr = uniform_neighbor_sample( G, diff --git a/python/cugraph/cugraph/tests/structure/test_graph.py b/python/cugraph/cugraph/tests/structure/test_graph.py index 24521b38406..de306309ca4 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph.py +++ b/python/cugraph/cugraph/tests/structure/test_graph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -197,7 +197,6 @@ def test_add_adj_list_to_edge_list(graph_file): G.from_cudf_adjlist(offsets, indices, None) edgelist = G.view_edge_list() - edgelist = edgelist.sort_values(["src", "dst"]).reset_index(drop=True) sources_cu = edgelist["src"] destinations_cu = edgelist["dst"] compare_series(sources_cu, sources_exp) @@ -216,7 +215,7 @@ def test_view_edge_list_from_adj_list(graph_file): indices = cudf.Series(Mcsr.indices) G = cugraph.Graph(directed=True) G.from_cudf_adjlist(offsets, indices, None) - edgelist_df = G.view_edge_list().sort_values(["src", "dst"]).reset_index(drop=True) + edgelist_df = G.view_edge_list() Mcoo = Mcsr.tocoo() src1 = Mcoo.row dst1 = Mcoo.col @@ -673,7 +672,6 @@ def test_neighbors(graph_file): nx_neighbors = [i for i in Gnx.neighbors(n)] cu_neighbors.sort() nx_neighbors.sort() - assert cu_neighbors == nx_neighbors From 63a669bb17eb0201a5e1dc337a4472b6f7ea7c27 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 1 Feb 2024 06:31:18 -0800 Subject: [PATCH 30/32] update tests --- .../graph_implementation/simpleDistributedGraph.py | 8 ++++++++ python/cugraph/cugraph/structure/symmetrize.py | 1 + .../tests/sampling/test_uniform_neighbor_sample_mg.py | 8 ++++++++ 3 files changed, 17 insertions(+) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index d7110b90c18..9a7a53a5dc9 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -39,6 +39,7 @@ ) from cugraph.dask.common.mg_utils import run_gc_on_dask_cluster import cugraph.dask.comms.comms as Comms +from cugraph.structure.symmetrize import _memory_efficient_drop_duplicates class simpleDistributedGraphImpl: @@ -458,6 +459,13 @@ def view_edge_list(self): else: is_multi_column = True + if not self.properties.multi_edge: + # Drop parallel edges for non MultiGraph + # FIXME: Drop multi edges with the CAPI instead. + _client = default_client() + workers = _client.scheduler_info()["workers"] + edgelist_df = _memory_efficient_drop_duplicates(edgelist_df, [srcCol, dstCol], len(workers)) + edgelist_df[srcCol], edgelist_df[dstCol] = edgelist_df[ [srcCol, dstCol] ].min(axis=1), edgelist_df[[srcCol, dstCol]].max(axis=1) diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index 53d3f6ff913..30c6394ade9 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -182,6 +182,7 @@ def symmetrize_ddf( else: result = ddf if multi: + result = result.reset_index(drop=True).repartition(npartitions=len(workers) * 2) return result else: warnings.warn( diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py index 460a25cbd14..adadadaea0d 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py @@ -26,6 +26,7 @@ from cugraph.testing import UNDIRECTED_DATASETS from cugraph.dask import uniform_neighbor_sample from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.structure.symmetrize import _memory_efficient_drop_duplicates from cugraph.datasets import email_Eu_core, small_tree from pylibcugraph.testing.utils import gen_fixture_params_product @@ -135,6 +136,13 @@ def test_mg_uniform_neighbor_sample_simple(dask_client, input_combo): dg = input_combo["MGGraph"] input_df = dg.input_df + # Drop parallel edges for non MultiGraph + # FIXME: Drop multi edges with the CAPI instead. + vertex_col_name = ["src", "dst"] + workers = dask_client.scheduler_info()["workers"] + input_df = _memory_efficient_drop_duplicates( + input_df, vertex_col_name, len(workers)) + result_nbr = uniform_neighbor_sample( dg, input_combo["start_list"], From a6d767ff47adb8ca42b2dab97cf83bd69f942ffb Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 1 Feb 2024 06:33:18 -0800 Subject: [PATCH 31/32] fix style --- .../structure/graph_implementation/simpleDistributedGraph.py | 4 +++- .../cugraph/structure/graph_implementation/simpleGraph.py | 4 ++-- .../cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index 9a7a53a5dc9..cdf1e937e67 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -464,7 +464,9 @@ def view_edge_list(self): # FIXME: Drop multi edges with the CAPI instead. _client = default_client() workers = _client.scheduler_info()["workers"] - edgelist_df = _memory_efficient_drop_duplicates(edgelist_df, [srcCol, dstCol], len(workers)) + edgelist_df = _memory_efficient_drop_duplicates( + edgelist_df, [srcCol, dstCol], len(workers) + ) edgelist_df[srcCol], edgelist_df[dstCol] = edgelist_df[ [srcCol, dstCol] diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 308b4fcac68..121a4c6245a 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -264,7 +264,7 @@ def __from_edgelist( source, destination, edge_attr, - multi=self.properties.multi_edge, # Deprecated parameter + multi=self.properties.multi_edge, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -279,7 +279,7 @@ def __from_edgelist( elist, source, destination, - multi=self.properties.multi_edge, # Deprecated parameter + multi=self.properties.multi_edge, # Deprecated parameter symmetrize=not self.properties.directed, ) diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py index adadadaea0d..3c964f3974e 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py @@ -141,7 +141,8 @@ def test_mg_uniform_neighbor_sample_simple(dask_client, input_combo): vertex_col_name = ["src", "dst"] workers = dask_client.scheduler_info()["workers"] input_df = _memory_efficient_drop_duplicates( - input_df, vertex_col_name, len(workers)) + input_df, vertex_col_name, len(workers) + ) result_nbr = uniform_neighbor_sample( dg, From e7ea077723da5e8b538917e9ad396daddca9f452 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 1 Feb 2024 06:44:54 -0800 Subject: [PATCH 32/32] update copyright --- .../cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py index 3c964f3974e..371410b8bd5 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at