diff --git a/docs/generated/sparse.COO.elemwise.rst b/docs/generated/sparse.COO.elemwise.rst deleted file mode 100644 index 41d9e5f6..00000000 --- a/docs/generated/sparse.COO.elemwise.rst +++ /dev/null @@ -1,6 +0,0 @@ -COO\.elemwise -============= - -.. currentmodule:: sparse - -.. automethod:: COO.elemwise \ No newline at end of file diff --git a/docs/generated/sparse.COO.rst b/docs/generated/sparse.COO.rst index 94686cc5..1ca472e9 100644 --- a/docs/generated/sparse.COO.rst +++ b/docs/generated/sparse.COO.rst @@ -31,7 +31,6 @@ COO .. autosummary:: :toctree: - COO.elemwise COO.astype COO.round diff --git a/docs/generated/sparse.elemwise.rst b/docs/generated/sparse.elemwise.rst new file mode 100644 index 00000000..a553ff6d --- /dev/null +++ b/docs/generated/sparse.elemwise.rst @@ -0,0 +1,6 @@ +elemwise +======== + +.. currentmodule:: sparse + +.. autofunction:: elemwise \ No newline at end of file diff --git a/docs/generated/sparse.rst b/docs/generated/sparse.rst index be984601..58e4d2d7 100644 --- a/docs/generated/sparse.rst +++ b/docs/generated/sparse.rst @@ -27,6 +27,8 @@ API dot + elemwise + random stack diff --git a/docs/operations.rst b/docs/operations.rst index 041157f1..d68ef550 100644 --- a/docs/operations.rst +++ b/docs/operations.rst @@ -33,16 +33,20 @@ or numpy functions that are not yet implemented for sparse arrays This page describes those valid operations, and their limitations. -:obj:`COO.elemwise` +:obj:`elemwise` ~~~~~~~~~~~~~~~~~~~ This function allows you to apply any arbitrary unary or binary function where the first object is :obj:`COO`, and the second is a scalar, :obj:`COO`, or -a :doc:`Numpy arrays `. For example, the +a :obj:`scipy.sparse.spmatrix`. For example, the following will add two :obj:`COO` objects: .. code-block:: python - x.elemwise(np.add, y) + sparse.elemwise(np.add, x, y) + + +.. note:: Previously, :obj:`elemwise` was a method of the :obj:`COO` class. Now, + it has been moved to the :obj:`sparse` module. .. _operations-auto-densification: @@ -78,26 +82,8 @@ If densification is needed, it must be explicit. In other words, you must call :obj:`COO.todense` on the :obj:`COO` object. If both operands are :obj:`COO`, both must be densified. - -Operations with :doc:`Numpy arrays ` ------------------------------------------------------------------------ -Certain operations with :obj:`numpy.ndarray` are also supported. For example, -the following are all allowed if :code:`x` is a :obj:`numpy.ndarray` and -:code:`(x == 0).all()` evaluates to :code:`True`: - -.. code-block:: python - - x + y - x - y - -The following is true so long as there are no infinities or NaNs in :code:`x`: - -.. code-block:: python - - x * y - -In general, if operating on the :code:`numpy.ndarray` with a zero would produce -all-zeros then the operation is supported. +.. note:: Previously, operations with Numpy arrays were sometimes supported. Now, + it is necessary to convert Numpy arrays to :obj:`COO` objects. Operations with :obj:`scipy.sparse.spmatrix` diff --git a/sparse/__init__.py b/sparse/__init__.py index e8faae43..a37844ef 100644 --- a/sparse/__init__.py +++ b/sparse/__init__.py @@ -1,4 +1,4 @@ -from .coo import COO, tensordot, concatenate, stack, dot, triu, tril +from .coo import COO, elemwise, tensordot, concatenate, stack, dot, triu, tril from .dok import DOK from .sparse_array import SparseArray from .utils import random diff --git a/sparse/coo.py b/sparse/coo.py index 71dd4502..29cee457 100644 --- a/sparse/coo.py +++ b/sparse/coo.py @@ -1,7 +1,8 @@ from __future__ import absolute_import, division, print_function from collections import Iterable, defaultdict, deque -from functools import reduce, partial +from functools import reduce +from itertools import product import numbers import operator @@ -10,7 +11,7 @@ from numpy.lib.mixins import NDArrayOperatorsMixin from .slicing import normalize_index -from .utils import _zero_of_dtype +from .utils import _zero_of_dtype, isscalar, PositinalArgumentPartial from .sparse_array import SparseArray from .compatibility import int, zip_longest, range, zip @@ -242,7 +243,7 @@ def __init__(self, coords, data=None, shape=None, has_duplicates=True, super(COO, self).__init__(shape) if self.shape: - dtype = np.min_scalar_type(max(self.shape)) + dtype = np.min_scalar_type(max(max(self.shape) - 1, 0)) else: dtype = np.uint8 self.coords = self.coords.astype(dtype) @@ -1139,7 +1140,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return NotImplemented if method == '__call__': - return COO._elemwise(ufunc, *inputs, **kwargs) + return elemwise(ufunc, *inputs, **kwargs) elif method == 'reduce': return COO._reduce(ufunc, *inputs, **kwargs) else: @@ -1439,70 +1440,6 @@ def sum_duplicates(self): self.coords = coords self.has_duplicates = False - @staticmethod - def _elemwise(func, *args, **kwargs): - if len(args) == 0: - return func() - - self = args[0] - if np.isscalar(self) or (isinstance(self, np.ndarray) - and self.ndim == 0): - func = partial(func, self) - other = args[1] - return _elemwise_unary(func, other, *args[2:], **kwargs) - - if isinstance(self, scipy.sparse.spmatrix): - self = COO.from_scipy_sparse(self) - - if len(args) == 1: - return _elemwise_unary(func, self, *args[1:], **kwargs) - else: - other = args[1] - if isinstance(other, scipy.sparse.spmatrix): - other = COO.from_scipy_sparse(other) - - if isinstance(other, COO) or isinstance(other, np.ndarray): - return _elemwise_binary(func, self, other, *args[2:], **kwargs) - else: - return _elemwise_unary(func, self, *args[1:], **kwargs) - - def elemwise(self, func, *args, **kwargs): - """ - Apply a function to one or two arguments. - - Parameters - ---------- - func : Callable - The function to apply to one or two arguments. - args : tuple, optional - The extra arguments to pass to the function. If :code:`args[0]` is a COO object, - or a scalar; the function will be treated as a binary - function. Otherwise, it will be treated as a unary function. - kwargs : dict, optional - The kwargs to pass to the function. - - Returns - ------- - COO - The result of applying the function. - - Raises - ------ - ValueError - If the operation would result in a dense matrix. - - See Also - -------- - :obj:`numpy.ufunc` : A similar Numpy construct. Note that any :code:`ufunc` can be used - as the :code:`func` input to this function. - - Notes - ----- - In the future, this function may support functions of more than two arguments. Therefore, - it is best to pass in any additional arguments as keyword arguments. - """ - return COO._elemwise(func, self, *args, **kwargs) - def broadcast_to(self, shape): """ Performs the equivalent of :obj:`numpy.broadcast_to` for :obj:`COO`. Note that @@ -1527,6 +1464,9 @@ def broadcast_to(self, shape): -------- :obj:`numpy.broadcast_to` : NumPy equivalent function """ + if shape == self.shape: + return self + result_shape = _get_broadcast_shape(self.shape, shape, is_result=True) params = _get_broadcast_parameters(self.shape, result_shape) coords, data = _get_expanded_coords_data(self.coords, self.data, params, result_shape) @@ -1550,7 +1490,7 @@ def round(self, decimals=0, out=None): actually supported. """ assert out is None - return self.elemwise(np.round, decimals) + return elemwise(np.round, self, decimals=decimals) def astype(self, dtype, out=None): """ @@ -1569,7 +1509,7 @@ def astype(self, dtype, out=None): actually supported. """ assert out is None - return self.elemwise(np.ndarray.astype, dtype=dtype) + return elemwise(np.ndarray.astype, self, dtype=dtype) def maybe_densify(self, max_size=1000, min_density=0.25): """ @@ -2034,113 +1974,125 @@ def _grouped_reduce(x, groups, method, **kwargs): return result, inv_idx, counts -def _elemwise_binary(func, self, other, *args, **kwargs): - check = kwargs.pop('check', True) - self_zero = _zero_of_dtype(self.dtype) - other_zero = _zero_of_dtype(other.dtype) +def elemwise(func, *args, **kwargs): + """ + Apply a function to any number of arguments. - func_value = func(self_zero, other_zero, *args, **kwargs) - func_zero = _zero_of_dtype(np.dtype(func_value)) - if check and func(self_zero, other_zero, *args, **kwargs) != func_zero: - raise ValueError("Performing this operation would produce " - "a dense result: %s" % str(func)) + Parameters + ---------- + func : Callable + The function to apply. Must support broadcasting. + args : tuple, optional + The arguments to the function. Can be :obj:`scipy.sparse.spmatrix` objects, + :obj:`SparseArray` objects or :obj:`numpy.ndarray` objects. + kwargs : dict, optional + Any additional arguments to pass to the function. - if not isinstance(self, COO): - if not check or np.array_equiv(func(self, other_zero, *args, **kwargs), func_zero): - return _elemwise_binary_self_dense(func, self, other, *args, **kwargs) - else: - raise ValueError("Performing this operation would produce " - "a dense result: %s" % str(func)) + Returns + ------- + COO + The result of applying the function. - if not isinstance(other, COO): - if not check or np.array_equiv(func(self_zero, other, *args, **kwargs), func_zero): - temp_func = _reverse_self_other(func) - return _elemwise_binary_self_dense(temp_func, other, self, *args, **kwargs) - else: + Raises + ------ + ValueError + If the operation would result in a dense matrix, or if the operands + don't have broadcastable shapes. + + See Also + -------- + :obj:`numpy.ufunc` : A similar Numpy construct. Note that any :code:`ufunc` can be used + as the :code:`func` input to this function. + """ + # Because we need to mutate args. + args = list(args) + posargs = [] + pos = [] + for i, arg in enumerate(args): + if isinstance(arg, scipy.sparse.spmatrix): + args[i] = COO.from_scipy_sparse(arg) + elif isscalar(arg) or (isinstance(arg, np.ndarray) + and not arg.shape): + # Faster and more reliable to pass ()-shaped ndarrays as scalars. + if isinstance(arg, np.ndarray): + args[i] = arg[()] + + pos.append(i) + posargs.append(args[i]) + elif isinstance(arg, SparseArray) and not isinstance(arg, COO): + args[i] = COO(arg) + elif not isinstance(arg, COO): raise ValueError("Performing this operation would produce " "a dense result: %s" % str(func)) - self_shape, other_shape = self.shape, other.shape - - result_shape = _get_broadcast_shape(self_shape, other_shape) - self_params = _get_broadcast_parameters(self.shape, result_shape) - other_params = _get_broadcast_parameters(other.shape, result_shape) - combined_params = [p1 and p2 for p1, p2 in zip(self_params, other_params)] - self_reduce_params = combined_params[-self.ndim:] - other_reduce_params = combined_params[-other.ndim:] - - self.sum_duplicates() # TODO: document side-effect or make copy - other.sum_duplicates() # TODO: document side-effect or make copy - - self_coords = self.coords - self_data = self.data - - self_reduced_coords, self_reduced_shape = \ - _get_reduced_coords(self_coords, self_shape, - self_reduce_params) - self_reduced_linear = _linear_loc(self_reduced_coords, self_reduced_shape) - i = np.argsort(self_reduced_linear) - self_reduced_linear = self_reduced_linear[i] - self_coords = self_coords[:, i] - self_data = self_data[i] - - # Store coords - other_coords = other.coords - other_data = other.data - - other_reduced_coords, other_reduced_shape = \ - _get_reduced_coords(other_coords, other_shape, - other_reduce_params) - other_reduced_linear = _linear_loc(other_reduced_coords, other_reduced_shape) - i = np.argsort(other_reduced_linear) - other_reduced_linear = other_reduced_linear[i] - other_coords = other_coords[:, i] - other_data = other_data[i] - - # Find matches between self.coords and other.coords - matched_self, matched_other = _match_arrays(self_reduced_linear, - other_reduced_linear) - - # Start with an empty list. This may reduce computation in many cases. - data_list = [] - coords_list = [] + # Filter out scalars as they are 'baked' into the function. + func = PositinalArgumentPartial(func, pos, posargs) + args = [arg for arg in args if not isscalar(arg)] + + if len(args) == 0: + return func(**kwargs) + + if len(args) == 1: + return _elemwise_unary(func, args[0], **kwargs) + + return _elemwise_n_ary(func, *args, **kwargs) + - # Add the matched part. - matched_coords = _get_matching_coords(self_coords[:, matched_self], - other_coords[:, matched_other], - self_shape, other_shape) +def _elemwise_n_ary(func, *args, **kwargs): + """ + Apply a function to any number of arguments with broadcasting. + + Parameters + ---------- + func : Callable + The function to apply to arguments. Must support broadcasting. + args : list + Input :obj:`COO` or :obj:`numpy.ndarray`s. + kwargs : dict + Additional arguments to pass to the function. + + Returns + ------- + COO + The output array. + + Raises + ------ + ValueError + If the input shapes aren't compatible or the result will be dense. + """ + args = list(args) + + for arg in args: + if isinstance(arg, COO): + arg.sum_duplicates() - data_list.append(func(self_data[matched_self], - other_data[matched_other], - *args, **kwargs)) - coords_list.append(matched_coords) + args_zeros = tuple(_zero_of_dtype(arg.dtype)[()] for arg in args) - self_func = func(self_data, other_zero, *args, **kwargs) - # Add unmatched parts as necessary. - if (self_func != func_zero).any(): - self_unmatched_coords, self_unmatched_func = \ - _get_unmatched_coords_data(self_coords, self_func, self_shape, - result_shape, matched_self, - matched_coords) + func_value = func(*args_zeros, **kwargs) + func_zero = _zero_of_dtype(np.dtype(func_value)) + if func_value != func_zero: + raise ValueError("Performing this operation would produce " + "a dense result: %s" % str(func)) + data_list = [] + coords_list = [] - data_list.extend(self_unmatched_func) - coords_list.extend(self_unmatched_coords) + cache = {} + for mask in product([True, False], repeat=len(args)): + if not any(mask): + continue - other_func = func(self_zero, other_data, *args, **kwargs) + ci, di = _unmatch_coo(func, args, mask, cache, **kwargs) - if (other_func != func_zero).any(): - other_unmatched_coords, other_unmatched_func = \ - _get_unmatched_coords_data(other_coords, other_func, other_shape, - result_shape, matched_other, - matched_coords) + coords_list.extend(ci) + data_list.extend(di) - coords_list.extend(other_unmatched_coords) - data_list.extend(other_unmatched_func) + result_shape = _get_nary_broadcast_shape(*[arg.shape for arg in args]) # Concatenate matches and mismatches - data = np.concatenate(data_list) if len(data_list) else np.empty((0,), dtype=self.dtype) + data = np.concatenate(data_list) if len(data_list) else np.empty((0,), dtype=args[0].dtype) coords = np.concatenate(coords_list, axis=1) if len(coords_list) else \ - np.empty((0, len(result_shape)), dtype=self.coords.dtype) + np.empty((0, len(result_shape)), dtype=np.min_scalar_type(max(result_shape) - 1)) nonzero = data != func_zero data = data[nonzero] @@ -2149,101 +2101,174 @@ def _elemwise_binary(func, self, other, *args, **kwargs): return COO(coords, data, shape=result_shape, has_duplicates=False) -def _elemwise_binary_self_dense(func, self, other, *args, **kwargs): - assert isinstance(self, np.ndarray) - assert isinstance(other, COO) +def _match_coo(*args, **kwargs): + """ + Matches the coordinates for any number of input :obj:`COO` arrays. + Equivalent to "sparse" broadcasting for all arrays. + + Parameters + ---------- + args : Tuple[COO] + The input :obj:`COO` arrays. + return_midx : bool + Whether to return matched indices or matched arrays. Matching + only supported for two arrays. ``False`` by default. + cache : dict + Cache of things already matched. No cache by default. + + Returns + ------- + matched_idx : List[ndarray] + The indices of matched elements in the original arrays. Only returned if + ``return_midx`` is ``True``. + matched_arrays : List[COO] + The expanded, matched :obj:`COO` objects. Only returned if + ``return_midx`` is ``False``. + """ + return_midx = kwargs.pop('return_midx', False) + cache = kwargs.pop('cache', None) - result_shape = _get_broadcast_shape(self.shape, other.shape) + if kwargs: + raise ValueError('Unknown kwargs %s' % kwargs.keys()) - if result_shape != other.shape: - other = other.broadcast_to(result_shape) + if return_midx and (len(args) != 2 or cache is not None): + raise NotImplementedError('Matching indices only supported for two args, and no cache.') - self = np.broadcast_to(self, result_shape) + matched_arrays = [args[0]] + cache_key = [id(args[0])] + for arg2 in args[1:]: + cache_key.append(id(arg2)) + key = tuple(cache_key) + if cache is not None and key in cache: + matched_arrays = cache[key] + continue - self_coords = tuple([other.coords[i, :] for i in range(other.ndim)]) + cargs = [matched_arrays[0], arg2] + current_shape = _get_broadcast_shape(matched_arrays[0].shape, arg2.shape) + params = [_get_broadcast_parameters(arg.shape, current_shape) for arg in cargs] + reduced_params = [all(p) for p in zip(*params)] + reduced_shape = _get_reduced_shape(arg2.shape, + reduced_params[-arg2.ndim:]) - self_data = self[self_coords] + reduced_coords = [_get_reduced_coords(arg.coords, reduced_params[-arg.ndim:]) + for arg in cargs] - func_data = func(self_data, other.data, *args, **kwargs) - mask = func_data != 0 - func_data = func_data[mask] - func_coords = other.coords[:, mask] + linear = [_linear_loc(rc, reduced_shape) for rc in reduced_coords] + sorted_idx = [np.argsort(idx) for idx in linear] + linear = [idx[s] for idx, s in zip(linear, sorted_idx)] + matched_idx = _match_arrays(*linear) - return COO(func_coords, func_data, shape=result_shape, - has_duplicates=other.has_duplicates, - sorted=other.sorted) + if return_midx: + matched_idx = [sidx[midx] for sidx, midx in zip(sorted_idx, matched_idx)] + return matched_idx + coords = [arg.coords[:, s] for arg, s in zip(cargs, sorted_idx)] + mcoords = [c[:, idx] for c, idx in zip(coords, matched_idx)] + mcoords = _get_matching_coords(mcoords, params, current_shape) + mdata = [arg.data[sorted_idx[0]][matched_idx[0]] for arg in matched_arrays] + mdata.append(arg2.data[sorted_idx[1]][matched_idx[1]]) + matched_arrays = [COO(mcoords, md, shape=current_shape) for md in mdata] -def _reverse_self_other(func): - def wrapper(*args, **kwargs): - return func(args[1], args[0], *args[2:], **kwargs) + if cache is not None: + cache[key] = matched_arrays - return wrapper + return matched_arrays -def _get_unmatched_coords_data(coords, data, shape, result_shape, matched_idx, - matched_coords): +def _unmatch_coo(func, args, mask, cache, **kwargs): """ - Get the unmatched coordinates and data - both those that are unmatched with - any point of the other data as well as those which are added because of - broadcasting. + Matches the coordinates for any number of input :obj:`COO` arrays. + + First computes the matches, then filters out the non-matches. Parameters ---------- - coords : np.ndarray - The coordinates to get the unmatched coordinates from. - data : np.ndarray - The data corresponding to these coordinates. - shape : tuple[int] - The shape corresponding to these coordinates. - result_shape : tuple[int] - The result broadcasting shape. - matched_idx : np.ndarray - The indices into the coords array where it matches with the other array. - matched_coords : np.ndarray - The overall coordinates that match from both arrays. + func : Callable + The function to compute matches + args : tuple[COO] + The input :obj:`COO` arrays. + mask : tuple[bool] + Specifies the inputs that are zero and the ones that are + nonzero. + kwargs: dict + Extra keyword arguments to pass to func. Returns ------- - coords_list : list[np.ndarray] - The list of unmatched/broadcasting coordinates. - data_list : list[np.ndarray] - The data corresponding to the coordinates. + matched_idx : list[ndarray] + The indices of matched elements. + matched_coords : list[ndarray] + The matched coordinates. + matched_data : list[ndarray] + The matched data. """ - params = _get_broadcast_parameters(shape, result_shape) - matched = np.zeros(len(data), dtype=np.bool) - matched[matched_idx] = True - unmatched = ~matched - data_zero = _zero_of_dtype(data.dtype) - nonzero = data != data_zero + matched_args = [a for a, m in zip(args, mask) if m] + unmatched_args = [a for a, m in zip(args, mask) if not m] - unmatched &= nonzero - matched &= nonzero + matched_arrays = _match_coo(*matched_args, cache=cache) - coords_list = [] - data_list = [] + pos = tuple(i for i, m in enumerate(mask) if not m) + posargs = [_zero_of_dtype(arg.dtype)[()] for arg, m in zip(args, mask) if not m] + result_shape = _get_nary_broadcast_shape(*[arg.shape for arg in args]) + + partial = PositinalArgumentPartial(func, pos, posargs) + matched_func = partial(*[a.data for a in matched_arrays], **kwargs) + + unmatched_mask = matched_func != _zero_of_dtype(matched_func.dtype) + + if not unmatched_mask.any(): + return [], [] + + func_data = matched_func[unmatched_mask] + func_coords = matched_arrays[0].coords[:, unmatched_mask] - unmatched_coords, unmatched_data = \ - _get_expanded_coords_data(coords[:, unmatched], - data[unmatched], - params, - result_shape) + func_array = COO(func_coords, func_data, shape=matched_arrays[0].shape).broadcast_to(result_shape) - coords_list.append(unmatched_coords) - data_list.append(unmatched_data) + if all(mask): + return [func_array.coords], [func_array.data] - if shape != result_shape: - broadcast_coords, broadcast_data = \ - _get_broadcast_coords_data(coords[:, matched], - matched_coords, - data[matched], - params, - result_shape) + unmatched_mask = np.ones(func_array.nnz, dtype=np.bool) - coords_list.append(broadcast_coords) - data_list.append(broadcast_data) + for arg in unmatched_args: + matched_idx = _match_coo(func_array, arg, return_midx=True)[0] + unmatched_mask[matched_idx] = False - return coords_list, data_list + coords = np.asarray(func_array.coords[:, unmatched_mask], order='C') + data = np.asarray(func_array.data[unmatched_mask], order='C') + + return [coords], [data] + + +def _get_nary_broadcast_shape(*shapes): + """ + Broadcast any number of shapes to a result shape. + + Parameters + ---------- + shapes : tuple[tuple[int]] + The shapes to broadcast. + + Returns + ------- + tuple[int] + The output shape. + + Raises + ------ + ValueError + If the input shapes cannot be broadcast to a single shape. + """ + result_shape = () + + for shape in shapes: + try: + result_shape = _get_broadcast_shape(shape, result_shape) + except ValueError: + shapes_str = ', '.join(str(shape) for shape in shapes) + raise ValueError('operands could not be broadcast together with shapes %s' + % shapes_str) + + return result_shape def _get_broadcast_shape(shape1, shape2, is_result=False): @@ -2302,7 +2327,7 @@ def _get_broadcast_parameters(shape, broadcast_shape): return params -def _get_reduced_coords(coords, shape, params): +def _get_reduced_coords(coords, params): """ Gets only those dimensions of the coordinates that don't need to be broadcast. @@ -2318,10 +2343,31 @@ def _get_reduced_coords(coords, shape, params): reduced_coords : np.ndarray The reduced coordinates. """ + reduced_params = [bool(param) for param in params] + + return coords[reduced_params] + + +def _get_reduced_shape(shape, params): + """ + Gets only those dimensions of the coordinates that don't need to be broadcast. + + Parameters + ---------- + coords : np.ndarray + The coordinates to reduce. + params : list + The params from which to check which dimensions to get. + + Returns + ------- + reduced_coords : np.ndarray + The reduced coordinates. + """ reduced_shape = tuple(l for l, p in zip(shape, params) if p) - return coords[reduced_params], reduced_shape + return reduced_shape def _get_expanded_coords_data(coords, data, params, broadcast_shape): @@ -2412,8 +2458,9 @@ def _cartesian_product(*arrays): def _elemwise_unary(func, self, *args, **kwargs): check = kwargs.pop('check', True) data_zero = _zero_of_dtype(self.dtype) - func_zero = _zero_of_dtype(func(data_zero, *args, **kwargs).dtype) - if check and func(data_zero, *args, **kwargs) != func_zero: + zero_func = func(data_zero, *args, **kwargs) + func_zero = _zero_of_dtype(zero_func.dtype) + if check and zero_func != func_zero: raise ValueError("Performing this operation would produce " "a dense result: %s" % str(func)) @@ -2426,80 +2473,39 @@ def _elemwise_unary(func, self, *args, **kwargs): sorted=self.sorted) -def _get_matching_coords(coords1, coords2, shape1, shape2): +def _get_matching_coords(coords, params, shape): """ - Takes in the matching coordinates in both dimensions (only those dimensions that - don't need to be broadcast in both arrays and returns the coordinates that will - overlap in the output array, i.e., the coordinates for which both broadcast arrays - will be nonzero. + Get the matching coords across a number of broadcast operands. Parameters ---------- - coords1, coords2 : np.ndarray - shape1, shape2 : tuple[int] - + coords : list[numpy.ndarray] + The input coordinates. + params : list[Union[bool, none]] + The broadcast parameters. Returns ------- - matching_coords : np.ndarray - The coordinates of the output array for which both inputs will be nonzero. + numpy.ndarray + The broacasted coordinates """ - result_shape = _get_broadcast_shape(shape1, shape2) - params1 = _get_broadcast_parameters(shape1, result_shape) - params2 = _get_broadcast_parameters(shape2, result_shape) - matching_coords = [] - dim1 = 0 - dim2 = 0 + dims = np.zeros(len(coords), dtype=np.uint8) - for p1, p2 in zip(params1, params2): - if p1: - matching_coords.append(coords1[dim1]) + for p_all in zip(*params): + for i, p in enumerate(p_all): + if p: + matching_coords.append(coords[i][dims[i]]) + break else: - matching_coords.append(coords2[dim2]) - - if p1 is not None: - dim1 += 1 - - if p2 is not None: - dim2 += 1 - - return np.asarray(matching_coords) - - -def _get_broadcast_coords_data(coords, matched_coords, data, params, broadcast_shape): - """ - Get data that matched in the reduced coordinates but still had a partial overlap because of - the broadcast, i.e., it didn't match in one of the other dimensions. - - Parameters - ---------- - coords : np.ndarray - The list of coordinates of the required array. Must be sorted. - matched_coords : np.ndarray - The list of coordinates that match. Must be sorted. - data : np.ndarray - The data corresponding to coords. - params : list - The broadcast parameters. - broadcast_shape : tuple[int] - The shape to get the broadcast coordinates. + matching_coords.append(coords[dims[0]]) - Returns - ------- - broadcast_coords : np.ndarray - The broadcasted coordinates. Is sorted. - broadcasted_data : np.ndarray - The data corresponding to those coordinates. - """ - full_coords, full_data = _get_expanded_coords_data(coords, data, params, broadcast_shape) - linear_full_coords = _linear_loc(full_coords, broadcast_shape) - linear_matched_coords = _linear_loc(matched_coords, broadcast_shape) + for i, p in enumerate(p_all): + if p is not None: + dims[i] += 1 - overlapping_coords, _ = _match_arrays(linear_full_coords, linear_matched_coords) - mask = np.ones(full_coords.shape[1], dtype=np.bool) - mask[overlapping_coords] = False + dtype = np.min_scalar_type(max(shape) - 1) - return full_coords[:, mask], full_data[mask] + return np.asarray(matching_coords, dtype=dtype) def _linear_loc(coords, shape, signed=False): @@ -2511,7 +2517,6 @@ def _linear_loc(coords, shape, signed=False): tmp = np.zeros(coords.shape[1], dtype=dtype) strides = 1 for i, d in enumerate(shape[::-1]): - # out += self.coords[-(i + 1), :].astype(dtype) * strides np.multiply(coords[-(i + 1), :], strides, out=tmp, dtype=dtype) np.add(tmp, out, out=out) strides *= d diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index b7fab974..f8b4b6dc 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -25,7 +25,7 @@ def test_reductions(reduction, axis, keepdims, kwargs, eqkwargs): y = x.todense() xx = getattr(x, reduction)(axis=axis, keepdims=keepdims, **kwargs) yy = getattr(y, reduction)(axis=axis, keepdims=keepdims, **kwargs) - assert_eq(xx, yy, **eqkwargs) + assert_eq(xx, yy, check_nnz=False, **eqkwargs) @pytest.mark.parametrize('reduction,kwargs,eqkwargs', [ @@ -42,7 +42,7 @@ def test_ufunc_reductions(reduction, axis, keepdims, kwargs, eqkwargs): y = x.todense() xx = reduction(x, axis=axis, keepdims=keepdims, **kwargs) yy = reduction(y, axis=axis, keepdims=keepdims, **kwargs) - assert_eq(xx, yy, **eqkwargs) + assert_eq(xx, yy, check_nnz=False, **eqkwargs) @pytest.mark.parametrize('axis', [ @@ -213,8 +213,8 @@ def test_elemwise(func): x = s.todense() fs = func(s) - assert isinstance(fs, COO) + assert fs.nnz <= s.nnz assert_eq(func(x), fs) @@ -234,6 +234,242 @@ def test_elemwise_binary(func, shape): assert_eq(func(xs, ys), func(x, y)) +@pytest.mark.parametrize('func', [ + lambda x, y, z: x + y + z, + lambda x, y, z: x * y * z, + lambda x, y, z: x + y * z, + lambda x, y, z: (x + y) * z +]) +@pytest.mark.parametrize('shape', [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +def test_elemwise_trinary(func, shape): + xs = sparse.random(shape, density=0.5) + ys = sparse.random(shape, density=0.5) + zs = sparse.random(shape, density=0.5) + + x = xs.todense() + y = ys.todense() + z = zs.todense() + + fs = sparse.elemwise(func, xs, ys, zs) + assert isinstance(fs, COO) + + assert_eq(fs, func(x, y, z)) + + +@pytest.mark.parametrize('func', [operator.add, operator.mul]) +@pytest.mark.parametrize('shape1,shape2', [((2, 3, 4), (3, 4)), + ((3, 4), (2, 3, 4)), + ((3, 1, 4), (3, 2, 4)), + ((1, 3, 4), (3, 4)), + ((3, 4, 1), (3, 4, 2)), + ((1, 5), (5, 1)), + ((3, 1), (3, 4)), + ((3, 1), (1, 4)), + ((1, 4), (3, 4))]) +def test_binary_broadcasting(func, shape1, shape2): + xs = sparse.random(shape1, density=0.5) + x = xs.todense() + + ys = sparse.random(shape2, density=0.5) + y = ys.todense() + + expected = func(x, y) + actual = func(xs, ys) + + assert isinstance(actual, COO) + assert_eq(expected, actual) + + assert np.count_nonzero(expected) == actual.nnz + + +@pytest.mark.parametrize('shape1,shape2', [((3, 4), (2, 3, 4)), + ((3, 1, 4), (3, 2, 4)), + ((3, 4, 1), (3, 4, 2))]) +def test_broadcast_to(shape1, shape2): + a = sparse.random(shape1, density=0.5) + x = a.todense() + + assert_eq(np.broadcast_to(x, shape2), a.broadcast_to(shape2)) + + +@pytest.mark.parametrize('shapes', [ + [ + (2,), + (3, 2), + (4, 3, 2), + ], + [ + (3,), + (2, 3), + (2, 2, 3), + ], + [ + (2,), + (2, 2), + (2, 2, 2), + ], + [ + (4,), + (4, 4), + (4, 4, 4), + ], + [ + (4,), + (4, 4), + (4, 4, 4), + ], + [ + (4,), + (4, 4), + (4, 4, 4), + ], + [ + (1, 1, 2), + (1, 3, 1), + (4, 1, 1) + ], + [ + (2,), + (2, 1), + (2, 1, 1) + ], +]) +@pytest.mark.parametrize('func', [ + lambda x, y, z: (x + y) * z, + lambda x, y, z: x * (y + z), + lambda x, y, z: x * y * z, + lambda x, y, z: x + y + z, + lambda x, y, z: x + y - z, + lambda x, y, z: x - y + z, +]) +def test_trinary_broadcasting(shapes, func): + args = [sparse.random(s, density=0.5) for s in shapes] + dense_args = [arg.todense() for arg in args] + + fs = sparse.elemwise(func, *args) + assert isinstance(fs, COO) + + assert_eq(fs, func(*dense_args)) + + +@pytest.mark.parametrize('shapes, func', [ + ([ + (2,), + (3, 2), + (4, 3, 2), + ], lambda x, y, z: (x + y) * z), + ([ + (3,), + (2, 3), + (2, 2, 3), + ], lambda x, y, z: x * (y + z)), + ([ + (2,), + (2, 2), + (2, 2, 2), + ], lambda x, y, z: x * y * z), + ([ + (4,), + (4, 4), + (4, 4, 4), + ], lambda x, y, z: x + y + z), +]) +@pytest.mark.parametrize('value', [ + np.nan, + np.inf, + -np.inf +]) +@pytest.mark.parametrize('path_fraction', [0.25, 0.5, 0.75, 1.0]) +@pytest.mark.filterwarnings('ignore:invalid value') +def test_trinary_broadcasting_pathological(shapes, func, value, path_fraction): + def value_array(n): + i = int(n * path_fraction) + + ar = np.empty((n,), dtype=np.float_) + ar[:i] = value + ar[i:] = np.random.rand(n - i) + return ar + + args = [sparse.random(s, density=0.5, data_rvs=value_array) for s in shapes] + dense_args = [arg.todense() for arg in args] + + fs = sparse.elemwise(func, *args) + assert isinstance(fs, COO) + + assert_eq(fs, func(*dense_args), equal_nan=True) + + +def test_sparse_broadcasting(monkeypatch): + orig_unmatch_coo = sparse.coo._unmatch_coo + + state = {'num_matches': 0} + + xs = sparse.random((3, 4), density=0.5) + ys = sparse.random((3, 4), density=0.5) + + def mock_unmatch_coo(*args, **kwargs): + result = orig_unmatch_coo(*args, **kwargs) + state['num_matches'] += len(result[0]) + return result + + monkeypatch.setattr(sparse.coo, '_unmatch_coo', mock_unmatch_coo) + + xs * ys + + # Less than in case there's absolutely no overlap in some cases. + assert state['num_matches'] <= 1 + + +def test_dense_broadcasting(monkeypatch): + orig_unmatch_coo = sparse.coo._unmatch_coo + + state = {'num_matches': 0} + + xs = sparse.random((3, 4), density=0.5) + ys = sparse.random((3, 4), density=0.5) + + def mock_unmatch_coo(*args, **kwargs): + result = orig_unmatch_coo(*args, **kwargs) + state['num_matches'] += len(result[0]) + return result + + monkeypatch.setattr(sparse.coo, '_unmatch_coo', mock_unmatch_coo) + + xs + ys + + # Less than in case there's absolutely no overlap in some cases. + assert state['num_matches'] <= 3 + + +@pytest.mark.parametrize('format', ['coo', 'dok']) +def test_sparsearray_elemwise(format): + xs = sparse.random((3, 4), density=0.5, format=format) + ys = sparse.random((3, 4), density=0.5, format=format) + + x = xs.todense() + y = ys.todense() + + fs = sparse.elemwise(operator.add, xs, ys) + assert isinstance(fs, COO) + + assert_eq(fs, x + y) + + +def test_ndarray_densification_fails(): + xs = sparse.random((3, 4), density=0.5) + y = np.random.rand(3, 4) + + with pytest.raises(ValueError): + xs + y + + +def test_elemwise_noargs(): + def func(): + return np.float_(5.0) + + assert sparse.elemwise(func) == func() + + @pytest.mark.parametrize('func', [ operator.pow, operator.truediv, operator.floordiv, operator.ge, operator.le, operator.eq, operator.mod @@ -472,48 +708,6 @@ def test_bitwise_binary_bool(func, shape): assert_eq(func(xs, ys), func(x, y)) -@pytest.mark.parametrize('func', [operator.mul]) -@pytest.mark.parametrize('shape', [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_numpy_mixed_binary(func, shape): - xs = sparse.random(shape, density=0.5) - y = np.random.rand(*shape) - - x = xs.todense() - - fs1 = func(xs, y) - - assert isinstance(fs1, COO) - assert fs1.nnz <= xs.nnz - assert_eq(fs1, func(x, y)) - - fs2 = func(y, xs) - - assert isinstance(fs2, COO) - assert fs2.nnz <= xs.nnz - assert_eq(fs2, func(y, x)) - - -@pytest.mark.parametrize('func', [operator.and_]) -@pytest.mark.parametrize('shape', [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_numpy_mixed_binary_bitwise(func, shape): - xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) - y = np.random.randint(100, size=shape) - - x = xs.todense() - - fs1 = func(xs, y) - - assert isinstance(fs1, COO) - assert fs1.nnz <= xs.nnz - assert_eq(fs1, func(x, y)) - - fs2 = func(y, xs) - - assert isinstance(fs2, COO) - assert fs2.nnz <= xs.nnz - assert_eq(fs2, func(y, x)) - - def test_elemwise_binary_empty(): x = COO({}, shape=(10, 10)) y = sparse.random((10, 10), density=0.5) @@ -648,7 +842,7 @@ def test_canonical(): [1, 0, 3], [0, 1, 0], [1, 0, 3]]).T - data = np.arange(5) + data = np.arange(5) + 1 old = COO(coords, data, shape=(2, 2, 5)) x = COO(coords, data, shape=(2, 2, 5)) @@ -761,61 +955,6 @@ def test_addition_not_ok_when_large_and_sparse(): np.exp(x) -@pytest.mark.parametrize('func', [operator.add, operator.mul]) -@pytest.mark.parametrize('shape1,shape2', [((2, 3, 4), (3, 4)), - ((3, 4), (2, 3, 4)), - ((3, 1, 4), (3, 2, 4)), - ((1, 3, 4), (3, 4)), - ((3, 4, 1), (3, 4, 2)), - ((1, 5), (5, 1))]) -def test_broadcasting(func, shape1, shape2): - xs = sparse.random(shape1, density=0.5) - x = xs.todense() - - ys = sparse.random(shape2, density=0.5) - y = ys.todense() - - expected = func(x, y) - actual = func(xs, ys) - - assert_eq(expected, actual) - - assert np.count_nonzero(expected) == actual.nnz - - -@pytest.mark.parametrize('func', [operator.mul]) -@pytest.mark.parametrize('shape1,shape2', [((2, 3, 4), (3, 4)), - ((3, 4), (2, 3, 4)), - ((3, 1, 4), (3, 2, 4)), - ((1, 3, 4), (3, 4)), - ((3, 4, 1), (3, 4, 2)), - ((1, 5), (5, 1))]) -def test_numpy_mixed_broadcasting(func, shape1, shape2): - xs = sparse.random(shape1, density=0.5) - x = xs.todense() - - y = np.random.rand(*shape2) - - expected = func(x, y) - actual = func(xs, y) - - assert isinstance(actual, COO) - - assert_eq(expected, actual) - - assert np.count_nonzero(expected) == actual.nnz - - -@pytest.mark.parametrize('shape1,shape2', [((3, 4), (2, 3, 4)), - ((3, 1, 4), (3, 2, 4)), - ((3, 4, 1), (3, 4, 2))]) -def test_broadcast_to(shape1, shape2): - a = sparse.random(shape1, density=0.5) - x = a.todense() - - assert_eq(np.broadcast_to(x, shape2), a.broadcast_to(shape2)) - - @pytest.mark.parametrize('scalar', [2, 2.5, np.float32(2.0), np.int8(3)]) def test_scalar_multiplication(scalar): a = sparse.random((2, 3, 4), density=0.5) @@ -879,13 +1018,13 @@ def test_scipy_sparse_interface(): x = scipy.sparse.coo_matrix(inp) xx = sparse.COO(inp) - assert_eq(x, xx) - assert_eq(x.T, xx.T) - assert_eq(xx.to_scipy_sparse(), x) - assert_eq(COO.from_scipy_sparse(xx.to_scipy_sparse()), xx) + assert_eq(x, xx, check_nnz=False) + assert_eq(x.T, xx.T, check_nnz=False) + assert_eq(xx.to_scipy_sparse(), x, check_nnz=False) + assert_eq(COO.from_scipy_sparse(xx.to_scipy_sparse()), xx, check_nnz=False) - assert_eq(x, xx) - assert_eq(x.T.dot(x), xx.T.dot(xx)) + assert_eq(x, xx, check_nnz=False) + assert_eq(x.T.dot(x), xx.T.dot(xx), check_nnz=False) assert isinstance(x + xx, COO) assert isinstance(xx + x, COO) diff --git a/sparse/utils.py b/sparse/utils.py index 3fd3df62..04a3b82c 100644 --- a/sparse/utils.py +++ b/sparse/utils.py @@ -1,8 +1,9 @@ import numpy as np from numbers import Integral +from collections import Iterable -def assert_eq(x, y, **kwargs): +def assert_eq(x, y, check_nnz=True, **kwargs): from .coo import COO assert x.shape == y.shape assert x.dtype == y.dtype @@ -16,10 +17,14 @@ def assert_eq(x, y, **kwargs): if hasattr(x, 'todense'): xx = x.todense() + if check_nnz: + assert (xx != 0).sum() == x.nnz else: xx = x if hasattr(y, 'todense'): yy = y.todense() + if check_nnz: + assert (yy != 0).sum() == y.nnz else: yy = y assert np.allclose(xx, yy, **kwargs) @@ -146,3 +151,44 @@ def random( ar = DOK(ar) return ar + + +def isscalar(x): + from .sparse_array import SparseArray + return not isinstance(x, SparseArray) and np.isscalar(x) + + +class PositinalArgumentPartial(object): + def __init__(self, func, pos, posargs): + if not isinstance(pos, Iterable): + pos = (pos,) + posargs = (posargs,) + + n_partial_args = len(pos) + + self.pos = pos + self.posargs = posargs + self.func = func + + self.n = n_partial_args + + self.__doc__ = func.__doc__ + + def __call__(self, *args, **kwargs): + j = 0 + totargs = [] + + for i in range(len(args) + self.n): + if j >= self.n or i != self.pos[j]: + totargs.append(args[i - j]) + else: + totargs.append(self.posargs[j]) + j += 1 + + return self.func(*totargs, **kwargs) + + def __str__(self): + return str(self.func) + + def __repr__(self): + return repr(self.func)