Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add manual iterator implementations for custom vec iterables #1107

Merged
merged 6 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions releasenotes/notes/custom-iterators-dce8557a8f87e8c0.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
features:
- |
Rustworkx functions that return custom iterable objects, such as
:meth:`.PyDiGraph.node_indices`, now each have an associated custom
iterator and reversed-iterator object for these. This provides a speedup
of approximately 40% for iterating through the custom iterables.

These types are not directly nameable or constructable from Python space, and other than the
performance improvement, the behavior should largely not be noticable from Python space.
2 changes: 2 additions & 0 deletions rustworkx/rustworkx.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,8 @@ class _RustworkxCustomVecIter(Generic[_T_co], Sequence[_T_co], ABC):
def __ne__(self, other: object) -> bool: ...
def __setstate__(self, state: Sequence[_T_co]) -> None: ...
def __array__(self, _dt: np.dtype | None = ...) -> np.ndarray: ...
def __iter__(self) -> Iterator[_T_co]: ...
def __reversed__(self) -> Iterator[_T_co]: ...

class _RustworkxCustomHashMapIter(Generic[_S, _T_co], Mapping[_S, _T_co], ABC):
def __init__(self) -> None: ...
Expand Down
136 changes: 133 additions & 3 deletions src/iterators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
// There are two useful macros to quickly define a new custom return type:
//
// :`custom_vec_iter_impl` holds a `Vec<T>` and can be used as a
// read-only sequence/list. To use it, you should specify the name of the new type,
// the name of the vector that holds the data, the type `T` and a docstring.
// read-only sequence/list. To use it, you should specify the name of the new type for the
// iterable, a name for that new type's iterator, a name for the new type's reversed iterator, the
// name of the vector that holds the data, the type `T` and a docstring.
//
// e.g `custom_vec_iter_impl!(MyReadOnlyType, data, (usize, f64), "Docs");`
// defines a new type named `MyReadOnlyType` that holds a vector called `data`
Expand Down Expand Up @@ -473,7 +474,7 @@ impl PyConvertToPyArray for Vec<(usize, usize, PyObject)> {
}

macro_rules! custom_vec_iter_impl {
($name:ident, $data:ident, $T:ty, $doc:literal) => {
($name:ident, $iter:ident, $reversed:ident, $data:ident, $T:ty, $doc:literal) => {
#[doc = $doc]
#[pyclass(module = "rustworkx", sequence)]
#[derive(Clone)]
Expand Down Expand Up @@ -580,6 +581,20 @@ macro_rules! custom_vec_iter_impl {
}
}

fn __iter__(self_: Py<Self>, py: Python) -> $iter {
$iter {
inner: Some(self_.clone_ref(py)),
index: 0,
}
}

fn __reversed__(self_: Py<Self>, py: Python) -> $reversed {
$reversed {
inner: Some(self_.clone_ref(py)),
index: 0,
}
}

fn __array__(&self, py: Python, _dt: Option<&PyArrayDescr>) -> PyResult<PyObject> {
// Note: we accept the dtype argument on the signature but
// effictively do nothing with it to let Numpy handle the conversion itself
Expand All @@ -594,11 +609,114 @@ macro_rules! custom_vec_iter_impl {
PyGCProtocol::__clear__(self)
}
}

#[doc = concat!("Custom iterator class for :class:`.", stringify!($name), "`")]
// No module because this isn't constructable from Python space, and is only exposed as an
// implementation detail.
#[pyclass]
pub struct $iter {
inner: Option<Py<$name>>,
index: usize,
}

#[pymethods]
impl $iter {
fn __next__(&mut self, py: Python) -> Option<Py<PyAny>> {
let data = self.inner.as_ref().unwrap().borrow(py);
if self.index < data.$data.len() {
let out = data.$data[self.index].clone().into_py(py);
self.index += 1;
Some(out)
} else {
None
}
}

fn __iter__(self_: Py<Self>) -> Py<Self> {
// Python iterators typically just return themselves from this, though in principle
// we could return a separate object that iterates starting from the same point.
self_
}

fn __length_hint__(&self, py: Python) -> usize {
self.inner
.as_ref()
.unwrap()
.borrow(py)
.$data
.len()
.saturating_sub(self.index)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh that's a good trick to comply with __length_hint__ not returning a negative. Although as a usize I guess without this in release mode we would just overflow which wouldn't result in a ValueError although the hint would be wildly inaccurate (although PEP424 says the return is "not required to be accurate") for an empty or consumed iterator.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually wrote this initially for __len__ (which is required to be accurate) and reused it for __length_hint__, but then I remembered / realised that iterators don't typically implement __len__ so just renamed the method.

}

fn __traverse__(&self, vis: PyVisit) -> Result<(), PyTraverseError> {
if let Some(obj) = self.inner.as_ref() {
vis.call(obj)?
}
Ok(())
}

fn __clear__(&mut self) {
self.inner = None;
}
}

#[doc = concat!("Custom reversed iterator class for :class:`.", stringify!($name), "`")]
// No module because this isn't constructable from Python space, and is only exposed as an
// implementation detail.
#[pyclass]
pub struct $reversed {
inner: Option<Py<$name>>,
index: usize,
}

#[pymethods]
impl $reversed {
fn __next__(&mut self, py: Python) -> Option<Py<PyAny>> {
let data = self.inner.as_ref().unwrap().borrow(py);
let len = data.$data.len();
if self.index < len {
let out = data.$data[len - self.index - 1].clone().into_py(py);
self.index += 1;
Some(out)
} else {
None
}
}

fn __iter__(self_: Py<Self>) -> Py<Self> {
// Python iterators typically just return themselves from this, though in principle
// we could return a separate object that iterates starting from the same point.
self_
}

fn __length_hint__(&self, py: Python) -> usize {
self.inner
.as_ref()
.unwrap()
.borrow(py)
.$data
.len()
.saturating_sub(self.index)
}

fn __traverse__(&self, vis: PyVisit) -> Result<(), PyTraverseError> {
if let Some(obj) = self.inner.as_ref() {
vis.call(obj)?
}
Ok(())
}

fn __clear__(&mut self) {
self.inner = None;
}
}
};
}

custom_vec_iter_impl!(
BFSSuccessors,
BFSSuccessorsIter,
BFSSuccessorsRev,
bfs_successors,
(PyObject, Vec<PyObject>),
"A custom class for the return from :func:`rustworkx.bfs_successors`
Expand Down Expand Up @@ -651,6 +769,8 @@ impl PyGCProtocol for BFSSuccessors {

custom_vec_iter_impl!(
BFSPredecessors,
BFSPredecessorsIter,
BFSPredecessorsRev,
bfs_predecessors,
(PyObject, Vec<PyObject>),
"A custom class for the return from :func:`rustworkx.bfs_predecessors`
Expand Down Expand Up @@ -703,6 +823,8 @@ impl PyGCProtocol for BFSPredecessors {

custom_vec_iter_impl!(
NodeIndices,
NodeIndicesIter,
NodeIndicesRev,
nodes,
usize,
"A custom class for the return of node indices
Expand Down Expand Up @@ -735,6 +857,8 @@ impl PyGCProtocol for NodeIndices {}

custom_vec_iter_impl!(
EdgeList,
EdgeListIter,
EdgeListRev,
edges,
(usize, usize),
"A custom class for the return of edge lists
Expand Down Expand Up @@ -773,6 +897,8 @@ impl PyGCProtocol for EdgeList {}

custom_vec_iter_impl!(
WeightedEdgeList,
WeightedEdgeListIter,
WeightedEdgeListRev,
edges,
(usize, usize, PyObject),
"A custom class for the return of edge lists with weights
Expand Down Expand Up @@ -823,6 +949,8 @@ impl PyGCProtocol for WeightedEdgeList {

custom_vec_iter_impl!(
EdgeIndices,
EdgeIndicesIter,
EdgeIndicesRev,
edges,
usize,
"A custom class for the return of edge indices
Expand Down Expand Up @@ -875,6 +1003,8 @@ impl PyDisplay for EdgeList {

custom_vec_iter_impl!(
Chains,
ChainsIter,
ChainsRev,
chains,
EdgeList,
"A custom class for the return of a list of list of edges.
Expand Down
44 changes: 44 additions & 0 deletions tests/test_custom_return_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,16 @@ def test_slices(self):
slice_return = successors[0:3:2]
self.assertEqual([("a", ["c", "b"])], slice_return)

def test_iter(self):
self.dag.add_child(self.node_a, "c", "edge")
successors = rustworkx.bfs_successors(self.dag, 0)
self.assertEqual(list(successors), list(iter(successors)))

def test_reversed(self):
self.dag.add_child(self.node_a, "c", "edge")
successors = rustworkx.bfs_successors(self.dag, 0)
self.assertEqual(list(successors)[::-1], list(reversed(successors)))


class TestNodeIndicesComparisons(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -174,6 +184,10 @@ def test_slices_negatives(self):
self.assertEqual([2, 3], slice_return)
self.assertEqual([], indices[-1:-2])

def test_iter(self):
indices = self.dag.node_indices()
self.assertEqual(list(indices), list(iter(indices)))

def test_reversed(self):
indices = self.dag.node_indices()
reversed_slice = indices[::-1]
Expand Down Expand Up @@ -352,6 +366,14 @@ def test_slices(self):
slice_return = edges[0:-1]
self.assertEqual([0, 1], slice_return)

def test_iter(self):
indices = self.dag.edge_indices()
self.assertEqual(list(indices), list(iter(indices)))

def test_reversed(self):
indices = self.dag.edge_indices()
self.assertEqual(list(indices)[::-1], list(reversed(indices)))


class TestEdgeListComparisons(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -425,6 +447,14 @@ def test_numpy_conversion():
np.asarray(res, dtype=np.uintp), np.array([[0, 1], [0, 2], [0, 3], [0, 4]])
)

def test_iter(self):
edges = self.dag.edge_list()
self.assertEqual(list(edges), list(iter(edges)))

def test_reversed(self):
edges = self.dag.edge_list()
self.assertEqual(list(edges)[::-1], list(reversed(edges)))


class TestWeightedEdgeListComparisons(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -500,6 +530,14 @@ def test_numpy_conversion(self):
np.asarray(self.dag.weighted_edge_list()), np.array([(0, 1, "Edgy")], dtype=object)
)

def test_iter(self):
edges = self.dag.weighted_edge_list()
self.assertEqual(list(edges), list(iter(edges)))

def test_reversed(self):
edges = self.dag.weighted_edge_list()
self.assertEqual(list(edges)[::-1], list(reversed(edges)))


class TestPathMapping(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -1387,6 +1425,12 @@ def test_numpy_conversion(self):
# this test assumes the array is 1-dimensional which avoids issues with jagged arrays
self.assertTrue(np.asarray(self.chains).shape, (1,))

def test_iter(self):
self.assertEqual(list(self.chains), list(iter(self.chains)))

def test_reversed(self):
self.assertEqual(list(self.chains)[::-1], list(reversed(self.chains)))


class TestProductNodeMap(unittest.TestCase):
def setUp(self):
Expand Down
Loading