diff --git a/docs/generated/sparse.COO.elemwise.rst b/docs/generated/sparse.COO.elemwise.rst
deleted file mode 100644
index 41d9e5f6..00000000
--- a/docs/generated/sparse.COO.elemwise.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-COO\.elemwise
-=============
-
-.. currentmodule:: sparse
-
-.. automethod:: COO.elemwise
\ No newline at end of file
diff --git a/docs/generated/sparse.COO.rst b/docs/generated/sparse.COO.rst
index 94686cc5..1ca472e9 100644
--- a/docs/generated/sparse.COO.rst
+++ b/docs/generated/sparse.COO.rst
@@ -31,7 +31,6 @@ COO
    .. autosummary::
       :toctree:
 
-      COO.elemwise
       COO.astype
       COO.round
 
diff --git a/docs/generated/sparse.elemwise.rst b/docs/generated/sparse.elemwise.rst
new file mode 100644
index 00000000..a553ff6d
--- /dev/null
+++ b/docs/generated/sparse.elemwise.rst
@@ -0,0 +1,6 @@
+elemwise
+========
+
+.. currentmodule:: sparse
+
+.. autofunction:: elemwise
\ No newline at end of file
diff --git a/docs/generated/sparse.rst b/docs/generated/sparse.rst
index be984601..58e4d2d7 100644
--- a/docs/generated/sparse.rst
+++ b/docs/generated/sparse.rst
@@ -27,6 +27,8 @@ API
 
     dot
 
+    elemwise
+
     random
 
     stack
diff --git a/docs/operations.rst b/docs/operations.rst
index 041157f1..d68ef550 100644
--- a/docs/operations.rst
+++ b/docs/operations.rst
@@ -33,16 +33,20 @@ or numpy functions that are not yet implemented for sparse arrays
 
 This page describes those valid operations, and their limitations.
 
-:obj:`COO.elemwise`
+:obj:`elemwise`
 ~~~~~~~~~~~~~~~~~~~
 This function allows you to apply any arbitrary unary or binary function where
 the first object is :obj:`COO`, and the second is a scalar, :obj:`COO`, or
-a :doc:`Numpy arrays <reference/generated/numpy.ndarray>`. For example, the
+a :obj:`scipy.sparse.spmatrix`. For example, the
 following will add two :obj:`COO` objects:
 
 .. code-block:: python
 
-   x.elemwise(np.add, y)
+   sparse.elemwise(np.add, x, y)
+
+
+.. note:: Previously, :obj:`elemwise` was a method of the :obj:`COO` class. Now,
+   it has been moved to the :obj:`sparse` module.
 
 .. _operations-auto-densification:
 
@@ -78,26 +82,8 @@ If densification is needed, it must be explicit. In other words, you must call
 :obj:`COO.todense` on the :obj:`COO` object. If both operands are :obj:`COO`,
 both must be densified.
 
-
-Operations with :doc:`Numpy arrays <reference/generated/numpy.ndarray>`
------------------------------------------------------------------------
-Certain operations with :obj:`numpy.ndarray` are also supported. For example,
-the following are all allowed if :code:`x` is a :obj:`numpy.ndarray` and
-:code:`(x == 0).all()` evaluates to :code:`True`:
-
-.. code-block:: python
-
-   x + y
-   x - y
-
-The following is true so long as there are no infinities or NaNs in :code:`x`:
-
-.. code-block:: python
-
-   x * y
-
-In general, if operating on the :code:`numpy.ndarray` with a zero would produce
-all-zeros then the operation is supported.
+.. note:: Previously, operations with Numpy arrays were sometimes supported. Now,
+   it is necessary to convert Numpy arrays to :obj:`COO` objects.
 
 
 Operations with :obj:`scipy.sparse.spmatrix`
diff --git a/sparse/__init__.py b/sparse/__init__.py
index e8faae43..a37844ef 100644
--- a/sparse/__init__.py
+++ b/sparse/__init__.py
@@ -1,4 +1,4 @@
-from .coo import COO, tensordot, concatenate, stack, dot, triu, tril
+from .coo import COO, elemwise, tensordot, concatenate, stack, dot, triu, tril
 from .dok import DOK
 from .sparse_array import SparseArray
 from .utils import random
diff --git a/sparse/coo.py b/sparse/coo.py
index 71dd4502..29cee457 100644
--- a/sparse/coo.py
+++ b/sparse/coo.py
@@ -1,7 +1,8 @@
 from __future__ import absolute_import, division, print_function
 
 from collections import Iterable, defaultdict, deque
-from functools import reduce, partial
+from functools import reduce
+from itertools import product
 import numbers
 import operator
 
@@ -10,7 +11,7 @@
 from numpy.lib.mixins import NDArrayOperatorsMixin
 
 from .slicing import normalize_index
-from .utils import _zero_of_dtype
+from .utils import _zero_of_dtype, isscalar, PositinalArgumentPartial
 from .sparse_array import SparseArray
 from .compatibility import int, zip_longest, range, zip
 
@@ -242,7 +243,7 @@ def __init__(self, coords, data=None, shape=None, has_duplicates=True,
 
         super(COO, self).__init__(shape)
         if self.shape:
-            dtype = np.min_scalar_type(max(self.shape))
+            dtype = np.min_scalar_type(max(max(self.shape) - 1, 0))
         else:
             dtype = np.uint8
         self.coords = self.coords.astype(dtype)
@@ -1139,7 +1140,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
             return NotImplemented
 
         if method == '__call__':
-            return COO._elemwise(ufunc, *inputs, **kwargs)
+            return elemwise(ufunc, *inputs, **kwargs)
         elif method == 'reduce':
             return COO._reduce(ufunc, *inputs, **kwargs)
         else:
@@ -1439,70 +1440,6 @@ def sum_duplicates(self):
         self.coords = coords
         self.has_duplicates = False
 
-    @staticmethod
-    def _elemwise(func, *args, **kwargs):
-        if len(args) == 0:
-            return func()
-
-        self = args[0]
-        if np.isscalar(self) or (isinstance(self, np.ndarray)
-                                 and self.ndim == 0):
-            func = partial(func, self)
-            other = args[1]
-            return _elemwise_unary(func, other, *args[2:], **kwargs)
-
-        if isinstance(self, scipy.sparse.spmatrix):
-            self = COO.from_scipy_sparse(self)
-
-        if len(args) == 1:
-            return _elemwise_unary(func, self, *args[1:], **kwargs)
-        else:
-            other = args[1]
-            if isinstance(other, scipy.sparse.spmatrix):
-                other = COO.from_scipy_sparse(other)
-
-            if isinstance(other, COO) or isinstance(other, np.ndarray):
-                return _elemwise_binary(func, self, other, *args[2:], **kwargs)
-            else:
-                return _elemwise_unary(func, self, *args[1:], **kwargs)
-
-    def elemwise(self, func, *args, **kwargs):
-        """
-        Apply a function to one or two arguments.
-
-        Parameters
-        ----------
-        func : Callable
-            The function to apply to one or two arguments.
-        args : tuple, optional
-            The extra arguments to pass to the function. If :code:`args[0]` is a COO object,
-            or a scalar; the function will be treated as a binary
-            function. Otherwise, it will be treated as a unary function.
-        kwargs : dict, optional
-            The kwargs to pass to the function.
-
-        Returns
-        -------
-        COO
-            The result of applying the function.
-
-        Raises
-        ------
-        ValueError
-            If the operation would result in a dense matrix.
-
-        See Also
-        --------
-        :obj:`numpy.ufunc` : A similar Numpy construct. Note that any :code:`ufunc` can be used
-            as the :code:`func` input to this function.
-
-        Notes
-        -----
-        In the future, this function may support functions of more than two arguments. Therefore,
-        it is best to pass in any additional arguments as keyword arguments.
-        """
-        return COO._elemwise(func, self, *args, **kwargs)
-
     def broadcast_to(self, shape):
         """
         Performs the equivalent of :obj:`numpy.broadcast_to` for :obj:`COO`. Note that
@@ -1527,6 +1464,9 @@ def broadcast_to(self, shape):
         --------
         :obj:`numpy.broadcast_to` : NumPy equivalent function
         """
+        if shape == self.shape:
+            return self
+
         result_shape = _get_broadcast_shape(self.shape, shape, is_result=True)
         params = _get_broadcast_parameters(self.shape, result_shape)
         coords, data = _get_expanded_coords_data(self.coords, self.data, params, result_shape)
@@ -1550,7 +1490,7 @@ def round(self, decimals=0, out=None):
         actually supported.
         """
         assert out is None
-        return self.elemwise(np.round, decimals)
+        return elemwise(np.round, self, decimals=decimals)
 
     def astype(self, dtype, out=None):
         """
@@ -1569,7 +1509,7 @@ def astype(self, dtype, out=None):
         actually supported.
         """
         assert out is None
-        return self.elemwise(np.ndarray.astype, dtype=dtype)
+        return elemwise(np.ndarray.astype, self, dtype=dtype)
 
     def maybe_densify(self, max_size=1000, min_density=0.25):
         """
@@ -2034,113 +1974,125 @@ def _grouped_reduce(x, groups, method, **kwargs):
     return result, inv_idx, counts
 
 
-def _elemwise_binary(func, self, other, *args, **kwargs):
-    check = kwargs.pop('check', True)
-    self_zero = _zero_of_dtype(self.dtype)
-    other_zero = _zero_of_dtype(other.dtype)
+def elemwise(func, *args, **kwargs):
+    """
+    Apply a function to any number of arguments.
 
-    func_value = func(self_zero, other_zero, *args, **kwargs)
-    func_zero = _zero_of_dtype(np.dtype(func_value))
-    if check and func(self_zero, other_zero, *args, **kwargs) != func_zero:
-        raise ValueError("Performing this operation would produce "
-                         "a dense result: %s" % str(func))
+    Parameters
+    ----------
+    func : Callable
+        The function to apply. Must support broadcasting.
+    args : tuple, optional
+        The arguments to the function. Can be :obj:`scipy.sparse.spmatrix` objects,
+        :obj:`SparseArray` objects or :obj:`numpy.ndarray` objects.
+    kwargs : dict, optional
+        Any additional arguments to pass to the function.
 
-    if not isinstance(self, COO):
-        if not check or np.array_equiv(func(self, other_zero, *args, **kwargs), func_zero):
-            return _elemwise_binary_self_dense(func, self, other, *args, **kwargs)
-        else:
-            raise ValueError("Performing this operation would produce "
-                             "a dense result: %s" % str(func))
+    Returns
+    -------
+    COO
+        The result of applying the function.
 
-    if not isinstance(other, COO):
-        if not check or np.array_equiv(func(self_zero, other, *args, **kwargs), func_zero):
-            temp_func = _reverse_self_other(func)
-            return _elemwise_binary_self_dense(temp_func, other, self, *args, **kwargs)
-        else:
+    Raises
+    ------
+    ValueError
+        If the operation would result in a dense matrix, or if the operands
+        don't have broadcastable shapes.
+
+    See Also
+    --------
+    :obj:`numpy.ufunc` : A similar Numpy construct. Note that any :code:`ufunc` can be used
+        as the :code:`func` input to this function.
+    """
+    # Because we need to mutate args.
+    args = list(args)
+    posargs = []
+    pos = []
+    for i, arg in enumerate(args):
+        if isinstance(arg, scipy.sparse.spmatrix):
+            args[i] = COO.from_scipy_sparse(arg)
+        elif isscalar(arg) or (isinstance(arg, np.ndarray)
+                               and not arg.shape):
+            # Faster and more reliable to pass ()-shaped ndarrays as scalars.
+            if isinstance(arg, np.ndarray):
+                args[i] = arg[()]
+
+            pos.append(i)
+            posargs.append(args[i])
+        elif isinstance(arg, SparseArray) and not isinstance(arg, COO):
+            args[i] = COO(arg)
+        elif not isinstance(arg, COO):
             raise ValueError("Performing this operation would produce "
                              "a dense result: %s" % str(func))
 
-    self_shape, other_shape = self.shape, other.shape
-
-    result_shape = _get_broadcast_shape(self_shape, other_shape)
-    self_params = _get_broadcast_parameters(self.shape, result_shape)
-    other_params = _get_broadcast_parameters(other.shape, result_shape)
-    combined_params = [p1 and p2 for p1, p2 in zip(self_params, other_params)]
-    self_reduce_params = combined_params[-self.ndim:]
-    other_reduce_params = combined_params[-other.ndim:]
-
-    self.sum_duplicates()  # TODO: document side-effect or make copy
-    other.sum_duplicates()  # TODO: document side-effect or make copy
-
-    self_coords = self.coords
-    self_data = self.data
-
-    self_reduced_coords, self_reduced_shape = \
-        _get_reduced_coords(self_coords, self_shape,
-                            self_reduce_params)
-    self_reduced_linear = _linear_loc(self_reduced_coords, self_reduced_shape)
-    i = np.argsort(self_reduced_linear)
-    self_reduced_linear = self_reduced_linear[i]
-    self_coords = self_coords[:, i]
-    self_data = self_data[i]
-
-    # Store coords
-    other_coords = other.coords
-    other_data = other.data
-
-    other_reduced_coords, other_reduced_shape = \
-        _get_reduced_coords(other_coords, other_shape,
-                            other_reduce_params)
-    other_reduced_linear = _linear_loc(other_reduced_coords, other_reduced_shape)
-    i = np.argsort(other_reduced_linear)
-    other_reduced_linear = other_reduced_linear[i]
-    other_coords = other_coords[:, i]
-    other_data = other_data[i]
-
-    # Find matches between self.coords and other.coords
-    matched_self, matched_other = _match_arrays(self_reduced_linear,
-                                                other_reduced_linear)
-
-    # Start with an empty list. This may reduce computation in many cases.
-    data_list = []
-    coords_list = []
+    # Filter out scalars as they are 'baked' into the function.
+    func = PositinalArgumentPartial(func, pos, posargs)
+    args = [arg for arg in args if not isscalar(arg)]
+
+    if len(args) == 0:
+        return func(**kwargs)
+
+    if len(args) == 1:
+        return _elemwise_unary(func, args[0], **kwargs)
+
+    return _elemwise_n_ary(func, *args, **kwargs)
+
 
-    # Add the matched part.
-    matched_coords = _get_matching_coords(self_coords[:, matched_self],
-                                          other_coords[:, matched_other],
-                                          self_shape, other_shape)
+def _elemwise_n_ary(func, *args, **kwargs):
+    """
+    Apply a function to any number of arguments with broadcasting.
+
+    Parameters
+    ----------
+    func : Callable
+        The function to apply to arguments. Must support broadcasting.
+    args : list
+        Input :obj:`COO` or :obj:`numpy.ndarray`s.
+    kwargs : dict
+        Additional arguments to pass to the function.
+
+    Returns
+    -------
+    COO
+        The output array.
+
+    Raises
+    ------
+    ValueError
+        If the input shapes aren't compatible or the result will be dense.
+    """
+    args = list(args)
+
+    for arg in args:
+        if isinstance(arg, COO):
+            arg.sum_duplicates()
 
-    data_list.append(func(self_data[matched_self],
-                          other_data[matched_other],
-                          *args, **kwargs))
-    coords_list.append(matched_coords)
+    args_zeros = tuple(_zero_of_dtype(arg.dtype)[()] for arg in args)
 
-    self_func = func(self_data, other_zero, *args, **kwargs)
-    # Add unmatched parts as necessary.
-    if (self_func != func_zero).any():
-        self_unmatched_coords, self_unmatched_func = \
-            _get_unmatched_coords_data(self_coords, self_func, self_shape,
-                                       result_shape, matched_self,
-                                       matched_coords)
+    func_value = func(*args_zeros, **kwargs)
+    func_zero = _zero_of_dtype(np.dtype(func_value))
+    if func_value != func_zero:
+        raise ValueError("Performing this operation would produce "
+                         "a dense result: %s" % str(func))
+    data_list = []
+    coords_list = []
 
-        data_list.extend(self_unmatched_func)
-        coords_list.extend(self_unmatched_coords)
+    cache = {}
+    for mask in product([True, False], repeat=len(args)):
+        if not any(mask):
+            continue
 
-    other_func = func(self_zero, other_data, *args, **kwargs)
+        ci, di = _unmatch_coo(func, args, mask, cache, **kwargs)
 
-    if (other_func != func_zero).any():
-        other_unmatched_coords, other_unmatched_func = \
-            _get_unmatched_coords_data(other_coords, other_func, other_shape,
-                                       result_shape, matched_other,
-                                       matched_coords)
+        coords_list.extend(ci)
+        data_list.extend(di)
 
-        coords_list.extend(other_unmatched_coords)
-        data_list.extend(other_unmatched_func)
+    result_shape = _get_nary_broadcast_shape(*[arg.shape for arg in args])
 
     # Concatenate matches and mismatches
-    data = np.concatenate(data_list) if len(data_list) else np.empty((0,), dtype=self.dtype)
+    data = np.concatenate(data_list) if len(data_list) else np.empty((0,), dtype=args[0].dtype)
     coords = np.concatenate(coords_list, axis=1) if len(coords_list) else \
-        np.empty((0, len(result_shape)), dtype=self.coords.dtype)
+        np.empty((0, len(result_shape)), dtype=np.min_scalar_type(max(result_shape) - 1))
 
     nonzero = data != func_zero
     data = data[nonzero]
@@ -2149,101 +2101,174 @@ def _elemwise_binary(func, self, other, *args, **kwargs):
     return COO(coords, data, shape=result_shape, has_duplicates=False)
 
 
-def _elemwise_binary_self_dense(func, self, other, *args, **kwargs):
-    assert isinstance(self, np.ndarray)
-    assert isinstance(other, COO)
+def _match_coo(*args, **kwargs):
+    """
+    Matches the coordinates for any number of input :obj:`COO` arrays.
+    Equivalent to "sparse" broadcasting for all arrays.
+
+    Parameters
+    ----------
+    args : Tuple[COO]
+        The input :obj:`COO` arrays.
+    return_midx : bool
+        Whether to return matched indices or matched arrays. Matching
+        only supported for two arrays. ``False`` by default.
+    cache : dict
+        Cache of things already matched. No cache by default.
+
+    Returns
+    -------
+    matched_idx : List[ndarray]
+        The indices of matched elements in the original arrays. Only returned if
+        ``return_midx`` is ``True``.
+    matched_arrays : List[COO]
+        The expanded, matched :obj:`COO` objects. Only returned if
+        ``return_midx`` is ``False``.
+    """
+    return_midx = kwargs.pop('return_midx', False)
+    cache = kwargs.pop('cache', None)
 
-    result_shape = _get_broadcast_shape(self.shape, other.shape)
+    if kwargs:
+        raise ValueError('Unknown kwargs %s' % kwargs.keys())
 
-    if result_shape != other.shape:
-        other = other.broadcast_to(result_shape)
+    if return_midx and (len(args) != 2 or cache is not None):
+        raise NotImplementedError('Matching indices only supported for two args, and no cache.')
 
-    self = np.broadcast_to(self, result_shape)
+    matched_arrays = [args[0]]
+    cache_key = [id(args[0])]
+    for arg2 in args[1:]:
+        cache_key.append(id(arg2))
+        key = tuple(cache_key)
+        if cache is not None and key in cache:
+            matched_arrays = cache[key]
+            continue
 
-    self_coords = tuple([other.coords[i, :] for i in range(other.ndim)])
+        cargs = [matched_arrays[0], arg2]
+        current_shape = _get_broadcast_shape(matched_arrays[0].shape, arg2.shape)
+        params = [_get_broadcast_parameters(arg.shape, current_shape) for arg in cargs]
+        reduced_params = [all(p) for p in zip(*params)]
+        reduced_shape = _get_reduced_shape(arg2.shape,
+                                           reduced_params[-arg2.ndim:])
 
-    self_data = self[self_coords]
+        reduced_coords = [_get_reduced_coords(arg.coords, reduced_params[-arg.ndim:])
+                          for arg in cargs]
 
-    func_data = func(self_data, other.data, *args, **kwargs)
-    mask = func_data != 0
-    func_data = func_data[mask]
-    func_coords = other.coords[:, mask]
+        linear = [_linear_loc(rc, reduced_shape) for rc in reduced_coords]
+        sorted_idx = [np.argsort(idx) for idx in linear]
+        linear = [idx[s] for idx, s in zip(linear, sorted_idx)]
+        matched_idx = _match_arrays(*linear)
 
-    return COO(func_coords, func_data, shape=result_shape,
-               has_duplicates=other.has_duplicates,
-               sorted=other.sorted)
+        if return_midx:
+            matched_idx = [sidx[midx] for sidx, midx in zip(sorted_idx, matched_idx)]
+            return matched_idx
 
+        coords = [arg.coords[:, s] for arg, s in zip(cargs, sorted_idx)]
+        mcoords = [c[:, idx] for c, idx in zip(coords, matched_idx)]
+        mcoords = _get_matching_coords(mcoords, params, current_shape)
+        mdata = [arg.data[sorted_idx[0]][matched_idx[0]] for arg in matched_arrays]
+        mdata.append(arg2.data[sorted_idx[1]][matched_idx[1]])
+        matched_arrays = [COO(mcoords, md, shape=current_shape) for md in mdata]
 
-def _reverse_self_other(func):
-    def wrapper(*args, **kwargs):
-        return func(args[1], args[0], *args[2:], **kwargs)
+        if cache is not None:
+            cache[key] = matched_arrays
 
-    return wrapper
+    return matched_arrays
 
 
-def _get_unmatched_coords_data(coords, data, shape, result_shape, matched_idx,
-                               matched_coords):
+def _unmatch_coo(func, args, mask, cache, **kwargs):
     """
-    Get the unmatched coordinates and data - both those that are unmatched with
-    any point of the other data as well as those which are added because of
-    broadcasting.
+    Matches the coordinates for any number of input :obj:`COO` arrays.
+
+    First computes the matches, then filters out the non-matches.
 
     Parameters
     ----------
-    coords : np.ndarray
-        The coordinates to get the unmatched coordinates from.
-    data : np.ndarray
-        The data corresponding to these coordinates.
-    shape : tuple[int]
-        The shape corresponding to these coordinates.
-    result_shape : tuple[int]
-        The result broadcasting shape.
-    matched_idx : np.ndarray
-        The indices into the coords array where it matches with the other array.
-    matched_coords : np.ndarray
-        The overall coordinates that match from both arrays.
+    func : Callable
+        The function to compute matches
+    args : tuple[COO]
+        The input :obj:`COO` arrays.
+    mask : tuple[bool]
+        Specifies the inputs that are zero and the ones that are
+        nonzero.
+    kwargs: dict
+        Extra keyword arguments to pass to func.
 
     Returns
     -------
-    coords_list : list[np.ndarray]
-        The list of unmatched/broadcasting coordinates.
-    data_list : list[np.ndarray]
-        The data corresponding to the coordinates.
+    matched_idx : list[ndarray]
+        The indices of matched elements.
+    matched_coords : list[ndarray]
+        The matched coordinates.
+    matched_data : list[ndarray]
+        The matched data.
     """
-    params = _get_broadcast_parameters(shape, result_shape)
-    matched = np.zeros(len(data), dtype=np.bool)
-    matched[matched_idx] = True
-    unmatched = ~matched
-    data_zero = _zero_of_dtype(data.dtype)
-    nonzero = data != data_zero
+    matched_args = [a for a, m in zip(args, mask) if m]
+    unmatched_args = [a for a, m in zip(args, mask) if not m]
 
-    unmatched &= nonzero
-    matched &= nonzero
+    matched_arrays = _match_coo(*matched_args, cache=cache)
 
-    coords_list = []
-    data_list = []
+    pos = tuple(i for i, m in enumerate(mask) if not m)
+    posargs = [_zero_of_dtype(arg.dtype)[()] for arg, m in zip(args, mask) if not m]
+    result_shape = _get_nary_broadcast_shape(*[arg.shape for arg in args])
+
+    partial = PositinalArgumentPartial(func, pos, posargs)
+    matched_func = partial(*[a.data for a in matched_arrays], **kwargs)
+
+    unmatched_mask = matched_func != _zero_of_dtype(matched_func.dtype)
+
+    if not unmatched_mask.any():
+        return [], []
+
+    func_data = matched_func[unmatched_mask]
+    func_coords = matched_arrays[0].coords[:, unmatched_mask]
 
-    unmatched_coords, unmatched_data = \
-        _get_expanded_coords_data(coords[:, unmatched],
-                                  data[unmatched],
-                                  params,
-                                  result_shape)
+    func_array = COO(func_coords, func_data, shape=matched_arrays[0].shape).broadcast_to(result_shape)
 
-    coords_list.append(unmatched_coords)
-    data_list.append(unmatched_data)
+    if all(mask):
+        return [func_array.coords], [func_array.data]
 
-    if shape != result_shape:
-        broadcast_coords, broadcast_data = \
-            _get_broadcast_coords_data(coords[:, matched],
-                                       matched_coords,
-                                       data[matched],
-                                       params,
-                                       result_shape)
+    unmatched_mask = np.ones(func_array.nnz, dtype=np.bool)
 
-        coords_list.append(broadcast_coords)
-        data_list.append(broadcast_data)
+    for arg in unmatched_args:
+        matched_idx = _match_coo(func_array, arg, return_midx=True)[0]
+        unmatched_mask[matched_idx] = False
 
-    return coords_list, data_list
+    coords = np.asarray(func_array.coords[:, unmatched_mask], order='C')
+    data = np.asarray(func_array.data[unmatched_mask], order='C')
+
+    return [coords], [data]
+
+
+def _get_nary_broadcast_shape(*shapes):
+    """
+    Broadcast any number of shapes to a result shape.
+
+    Parameters
+    ----------
+    shapes : tuple[tuple[int]]
+        The shapes to broadcast.
+
+    Returns
+    -------
+    tuple[int]
+        The output shape.
+
+    Raises
+    ------
+    ValueError
+        If the input shapes cannot be broadcast to a single shape.
+    """
+    result_shape = ()
+
+    for shape in shapes:
+        try:
+            result_shape = _get_broadcast_shape(shape, result_shape)
+        except ValueError:
+            shapes_str = ', '.join(str(shape) for shape in shapes)
+            raise ValueError('operands could not be broadcast together with shapes %s'
+                             % shapes_str)
+
+    return result_shape
 
 
 def _get_broadcast_shape(shape1, shape2, is_result=False):
@@ -2302,7 +2327,7 @@ def _get_broadcast_parameters(shape, broadcast_shape):
     return params
 
 
-def _get_reduced_coords(coords, shape, params):
+def _get_reduced_coords(coords, params):
     """
     Gets only those dimensions of the coordinates that don't need to be broadcast.
 
@@ -2318,10 +2343,31 @@ def _get_reduced_coords(coords, shape, params):
     reduced_coords : np.ndarray
         The reduced coordinates.
     """
+
     reduced_params = [bool(param) for param in params]
+
+    return coords[reduced_params]
+
+
+def _get_reduced_shape(shape, params):
+    """
+    Gets only those dimensions of the coordinates that don't need to be broadcast.
+
+    Parameters
+    ----------
+    coords : np.ndarray
+        The coordinates to reduce.
+    params : list
+        The params from which to check which dimensions to get.
+
+    Returns
+    -------
+    reduced_coords : np.ndarray
+        The reduced coordinates.
+    """
     reduced_shape = tuple(l for l, p in zip(shape, params) if p)
 
-    return coords[reduced_params], reduced_shape
+    return reduced_shape
 
 
 def _get_expanded_coords_data(coords, data, params, broadcast_shape):
@@ -2412,8 +2458,9 @@ def _cartesian_product(*arrays):
 def _elemwise_unary(func, self, *args, **kwargs):
     check = kwargs.pop('check', True)
     data_zero = _zero_of_dtype(self.dtype)
-    func_zero = _zero_of_dtype(func(data_zero, *args, **kwargs).dtype)
-    if check and func(data_zero, *args, **kwargs) != func_zero:
+    zero_func = func(data_zero, *args, **kwargs)
+    func_zero = _zero_of_dtype(zero_func.dtype)
+    if check and zero_func != func_zero:
         raise ValueError("Performing this operation would produce "
                          "a dense result: %s" % str(func))
 
@@ -2426,80 +2473,39 @@ def _elemwise_unary(func, self, *args, **kwargs):
                sorted=self.sorted)
 
 
-def _get_matching_coords(coords1, coords2, shape1, shape2):
+def _get_matching_coords(coords, params, shape):
     """
-    Takes in the matching coordinates in both dimensions (only those dimensions that
-    don't need to be broadcast in both arrays and returns the coordinates that will
-    overlap in the output array, i.e., the coordinates for which both broadcast arrays
-    will be nonzero.
+    Get the matching coords across a number of broadcast operands.
 
     Parameters
     ----------
-    coords1, coords2 : np.ndarray
-    shape1, shape2 : tuple[int]
-
+    coords : list[numpy.ndarray]
+        The input coordinates.
+    params : list[Union[bool, none]]
+        The broadcast parameters.
     Returns
     -------
-    matching_coords : np.ndarray
-        The coordinates of the output array for which both inputs will be nonzero.
+    numpy.ndarray
+        The broacasted coordinates
     """
-    result_shape = _get_broadcast_shape(shape1, shape2)
-    params1 = _get_broadcast_parameters(shape1, result_shape)
-    params2 = _get_broadcast_parameters(shape2, result_shape)
-
     matching_coords = []
-    dim1 = 0
-    dim2 = 0
+    dims = np.zeros(len(coords), dtype=np.uint8)
 
-    for p1, p2 in zip(params1, params2):
-        if p1:
-            matching_coords.append(coords1[dim1])
+    for p_all in zip(*params):
+        for i, p in enumerate(p_all):
+            if p:
+                matching_coords.append(coords[i][dims[i]])
+                break
         else:
-            matching_coords.append(coords2[dim2])
-
-        if p1 is not None:
-            dim1 += 1
-
-        if p2 is not None:
-            dim2 += 1
-
-    return np.asarray(matching_coords)
-
-
-def _get_broadcast_coords_data(coords, matched_coords, data, params, broadcast_shape):
-    """
-    Get data that matched in the reduced coordinates but still had a partial overlap because of
-    the broadcast, i.e., it didn't match in one of the other dimensions.
-
-    Parameters
-    ----------
-    coords : np.ndarray
-        The list of coordinates of the required array. Must be sorted.
-    matched_coords : np.ndarray
-        The list of coordinates that match. Must be sorted.
-    data : np.ndarray
-        The data corresponding to coords.
-    params : list
-        The broadcast parameters.
-    broadcast_shape : tuple[int]
-        The shape to get the broadcast coordinates.
+            matching_coords.append(coords[dims[0]])
 
-    Returns
-    -------
-    broadcast_coords : np.ndarray
-        The broadcasted coordinates. Is sorted.
-    broadcasted_data : np.ndarray
-        The data corresponding to those coordinates.
-    """
-    full_coords, full_data = _get_expanded_coords_data(coords, data, params, broadcast_shape)
-    linear_full_coords = _linear_loc(full_coords, broadcast_shape)
-    linear_matched_coords = _linear_loc(matched_coords, broadcast_shape)
+        for i, p in enumerate(p_all):
+            if p is not None:
+                dims[i] += 1
 
-    overlapping_coords, _ = _match_arrays(linear_full_coords, linear_matched_coords)
-    mask = np.ones(full_coords.shape[1], dtype=np.bool)
-    mask[overlapping_coords] = False
+    dtype = np.min_scalar_type(max(shape) - 1)
 
-    return full_coords[:, mask], full_data[mask]
+    return np.asarray(matching_coords, dtype=dtype)
 
 
 def _linear_loc(coords, shape, signed=False):
@@ -2511,7 +2517,6 @@ def _linear_loc(coords, shape, signed=False):
     tmp = np.zeros(coords.shape[1], dtype=dtype)
     strides = 1
     for i, d in enumerate(shape[::-1]):
-        # out += self.coords[-(i + 1), :].astype(dtype) * strides
         np.multiply(coords[-(i + 1), :], strides, out=tmp, dtype=dtype)
         np.add(tmp, out, out=out)
         strides *= d
diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py
index b7fab974..f8b4b6dc 100644
--- a/sparse/tests/test_coo.py
+++ b/sparse/tests/test_coo.py
@@ -25,7 +25,7 @@ def test_reductions(reduction, axis, keepdims, kwargs, eqkwargs):
     y = x.todense()
     xx = getattr(x, reduction)(axis=axis, keepdims=keepdims, **kwargs)
     yy = getattr(y, reduction)(axis=axis, keepdims=keepdims, **kwargs)
-    assert_eq(xx, yy, **eqkwargs)
+    assert_eq(xx, yy, check_nnz=False, **eqkwargs)
 
 
 @pytest.mark.parametrize('reduction,kwargs,eqkwargs', [
@@ -42,7 +42,7 @@ def test_ufunc_reductions(reduction, axis, keepdims, kwargs, eqkwargs):
     y = x.todense()
     xx = reduction(x, axis=axis, keepdims=keepdims, **kwargs)
     yy = reduction(y, axis=axis, keepdims=keepdims, **kwargs)
-    assert_eq(xx, yy, **eqkwargs)
+    assert_eq(xx, yy, check_nnz=False, **eqkwargs)
 
 
 @pytest.mark.parametrize('axis', [
@@ -213,8 +213,8 @@ def test_elemwise(func):
     x = s.todense()
 
     fs = func(s)
-
     assert isinstance(fs, COO)
+    assert fs.nnz <= s.nnz
 
     assert_eq(func(x), fs)
 
@@ -234,6 +234,242 @@ def test_elemwise_binary(func, shape):
     assert_eq(func(xs, ys), func(x, y))
 
 
+@pytest.mark.parametrize('func', [
+    lambda x, y, z: x + y + z,
+    lambda x, y, z: x * y * z,
+    lambda x, y, z: x + y * z,
+    lambda x, y, z: (x + y) * z
+])
+@pytest.mark.parametrize('shape', [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+def test_elemwise_trinary(func, shape):
+    xs = sparse.random(shape, density=0.5)
+    ys = sparse.random(shape, density=0.5)
+    zs = sparse.random(shape, density=0.5)
+
+    x = xs.todense()
+    y = ys.todense()
+    z = zs.todense()
+
+    fs = sparse.elemwise(func, xs, ys, zs)
+    assert isinstance(fs, COO)
+
+    assert_eq(fs, func(x, y, z))
+
+
+@pytest.mark.parametrize('func', [operator.add, operator.mul])
+@pytest.mark.parametrize('shape1,shape2', [((2, 3, 4), (3, 4)),
+                                           ((3, 4), (2, 3, 4)),
+                                           ((3, 1, 4), (3, 2, 4)),
+                                           ((1, 3, 4), (3, 4)),
+                                           ((3, 4, 1), (3, 4, 2)),
+                                           ((1, 5), (5, 1)),
+                                           ((3, 1), (3, 4)),
+                                           ((3, 1), (1, 4)),
+                                           ((1, 4), (3, 4))])
+def test_binary_broadcasting(func, shape1, shape2):
+    xs = sparse.random(shape1, density=0.5)
+    x = xs.todense()
+
+    ys = sparse.random(shape2, density=0.5)
+    y = ys.todense()
+
+    expected = func(x, y)
+    actual = func(xs, ys)
+
+    assert isinstance(actual, COO)
+    assert_eq(expected, actual)
+
+    assert np.count_nonzero(expected) == actual.nnz
+
+
+@pytest.mark.parametrize('shape1,shape2', [((3, 4), (2, 3, 4)),
+                                           ((3, 1, 4), (3, 2, 4)),
+                                           ((3, 4, 1), (3, 4, 2))])
+def test_broadcast_to(shape1, shape2):
+    a = sparse.random(shape1, density=0.5)
+    x = a.todense()
+
+    assert_eq(np.broadcast_to(x, shape2), a.broadcast_to(shape2))
+
+
+@pytest.mark.parametrize('shapes', [
+    [
+        (2,),
+        (3, 2),
+        (4, 3, 2),
+    ],
+    [
+        (3,),
+        (2, 3),
+        (2, 2, 3),
+    ],
+    [
+        (2,),
+        (2, 2),
+        (2, 2, 2),
+    ],
+    [
+        (4,),
+        (4, 4),
+        (4, 4, 4),
+    ],
+    [
+        (4,),
+        (4, 4),
+        (4, 4, 4),
+    ],
+    [
+        (4,),
+        (4, 4),
+        (4, 4, 4),
+    ],
+    [
+        (1, 1, 2),
+        (1, 3, 1),
+        (4, 1, 1)
+    ],
+    [
+        (2,),
+        (2, 1),
+        (2, 1, 1)
+    ],
+])
+@pytest.mark.parametrize('func', [
+    lambda x, y, z: (x + y) * z,
+    lambda x, y, z: x * (y + z),
+    lambda x, y, z: x * y * z,
+    lambda x, y, z: x + y + z,
+    lambda x, y, z: x + y - z,
+    lambda x, y, z: x - y + z,
+])
+def test_trinary_broadcasting(shapes, func):
+    args = [sparse.random(s, density=0.5) for s in shapes]
+    dense_args = [arg.todense() for arg in args]
+
+    fs = sparse.elemwise(func, *args)
+    assert isinstance(fs, COO)
+
+    assert_eq(fs, func(*dense_args))
+
+
+@pytest.mark.parametrize('shapes, func', [
+    ([
+         (2,),
+         (3, 2),
+         (4, 3, 2),
+     ], lambda x, y, z: (x + y) * z),
+    ([
+         (3,),
+         (2, 3),
+         (2, 2, 3),
+     ], lambda x, y, z: x * (y + z)),
+    ([
+         (2,),
+         (2, 2),
+         (2, 2, 2),
+     ], lambda x, y, z: x * y * z),
+    ([
+         (4,),
+         (4, 4),
+         (4, 4, 4),
+     ], lambda x, y, z: x + y + z),
+])
+@pytest.mark.parametrize('value', [
+    np.nan,
+    np.inf,
+    -np.inf
+])
+@pytest.mark.parametrize('path_fraction', [0.25, 0.5, 0.75, 1.0])
+@pytest.mark.filterwarnings('ignore:invalid value')
+def test_trinary_broadcasting_pathological(shapes, func, value, path_fraction):
+    def value_array(n):
+        i = int(n * path_fraction)
+
+        ar = np.empty((n,), dtype=np.float_)
+        ar[:i] = value
+        ar[i:] = np.random.rand(n - i)
+        return ar
+
+    args = [sparse.random(s, density=0.5, data_rvs=value_array) for s in shapes]
+    dense_args = [arg.todense() for arg in args]
+
+    fs = sparse.elemwise(func, *args)
+    assert isinstance(fs, COO)
+
+    assert_eq(fs, func(*dense_args), equal_nan=True)
+
+
+def test_sparse_broadcasting(monkeypatch):
+    orig_unmatch_coo = sparse.coo._unmatch_coo
+
+    state = {'num_matches': 0}
+
+    xs = sparse.random((3, 4), density=0.5)
+    ys = sparse.random((3, 4), density=0.5)
+
+    def mock_unmatch_coo(*args, **kwargs):
+        result = orig_unmatch_coo(*args, **kwargs)
+        state['num_matches'] += len(result[0])
+        return result
+
+    monkeypatch.setattr(sparse.coo, '_unmatch_coo', mock_unmatch_coo)
+
+    xs * ys
+
+    # Less than in case there's absolutely no overlap in some cases.
+    assert state['num_matches'] <= 1
+
+
+def test_dense_broadcasting(monkeypatch):
+    orig_unmatch_coo = sparse.coo._unmatch_coo
+
+    state = {'num_matches': 0}
+
+    xs = sparse.random((3, 4), density=0.5)
+    ys = sparse.random((3, 4), density=0.5)
+
+    def mock_unmatch_coo(*args, **kwargs):
+        result = orig_unmatch_coo(*args, **kwargs)
+        state['num_matches'] += len(result[0])
+        return result
+
+    monkeypatch.setattr(sparse.coo, '_unmatch_coo', mock_unmatch_coo)
+
+    xs + ys
+
+    # Less than in case there's absolutely no overlap in some cases.
+    assert state['num_matches'] <= 3
+
+
+@pytest.mark.parametrize('format', ['coo', 'dok'])
+def test_sparsearray_elemwise(format):
+    xs = sparse.random((3, 4), density=0.5, format=format)
+    ys = sparse.random((3, 4), density=0.5, format=format)
+
+    x = xs.todense()
+    y = ys.todense()
+
+    fs = sparse.elemwise(operator.add, xs, ys)
+    assert isinstance(fs, COO)
+
+    assert_eq(fs, x + y)
+
+
+def test_ndarray_densification_fails():
+    xs = sparse.random((3, 4), density=0.5)
+    y = np.random.rand(3, 4)
+
+    with pytest.raises(ValueError):
+        xs + y
+
+
+def test_elemwise_noargs():
+    def func():
+        return np.float_(5.0)
+
+    assert sparse.elemwise(func) == func()
+
+
 @pytest.mark.parametrize('func', [
     operator.pow, operator.truediv, operator.floordiv,
     operator.ge, operator.le, operator.eq, operator.mod
@@ -472,48 +708,6 @@ def test_bitwise_binary_bool(func, shape):
     assert_eq(func(xs, ys), func(x, y))
 
 
-@pytest.mark.parametrize('func', [operator.mul])
-@pytest.mark.parametrize('shape', [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_numpy_mixed_binary(func, shape):
-    xs = sparse.random(shape, density=0.5)
-    y = np.random.rand(*shape)
-
-    x = xs.todense()
-
-    fs1 = func(xs, y)
-
-    assert isinstance(fs1, COO)
-    assert fs1.nnz <= xs.nnz
-    assert_eq(fs1, func(x, y))
-
-    fs2 = func(y, xs)
-
-    assert isinstance(fs2, COO)
-    assert fs2.nnz <= xs.nnz
-    assert_eq(fs2, func(y, x))
-
-
-@pytest.mark.parametrize('func', [operator.and_])
-@pytest.mark.parametrize('shape', [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_numpy_mixed_binary_bitwise(func, shape):
-    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
-    y = np.random.randint(100, size=shape)
-
-    x = xs.todense()
-
-    fs1 = func(xs, y)
-
-    assert isinstance(fs1, COO)
-    assert fs1.nnz <= xs.nnz
-    assert_eq(fs1, func(x, y))
-
-    fs2 = func(y, xs)
-
-    assert isinstance(fs2, COO)
-    assert fs2.nnz <= xs.nnz
-    assert_eq(fs2, func(y, x))
-
-
 def test_elemwise_binary_empty():
     x = COO({}, shape=(10, 10))
     y = sparse.random((10, 10), density=0.5)
@@ -648,7 +842,7 @@ def test_canonical():
                        [1, 0, 3],
                        [0, 1, 0],
                        [1, 0, 3]]).T
-    data = np.arange(5)
+    data = np.arange(5) + 1
 
     old = COO(coords, data, shape=(2, 2, 5))
     x = COO(coords, data, shape=(2, 2, 5))
@@ -761,61 +955,6 @@ def test_addition_not_ok_when_large_and_sparse():
         np.exp(x)
 
 
-@pytest.mark.parametrize('func', [operator.add, operator.mul])
-@pytest.mark.parametrize('shape1,shape2', [((2, 3, 4), (3, 4)),
-                                           ((3, 4), (2, 3, 4)),
-                                           ((3, 1, 4), (3, 2, 4)),
-                                           ((1, 3, 4), (3, 4)),
-                                           ((3, 4, 1), (3, 4, 2)),
-                                           ((1, 5), (5, 1))])
-def test_broadcasting(func, shape1, shape2):
-    xs = sparse.random(shape1, density=0.5)
-    x = xs.todense()
-
-    ys = sparse.random(shape2, density=0.5)
-    y = ys.todense()
-
-    expected = func(x, y)
-    actual = func(xs, ys)
-
-    assert_eq(expected, actual)
-
-    assert np.count_nonzero(expected) == actual.nnz
-
-
-@pytest.mark.parametrize('func', [operator.mul])
-@pytest.mark.parametrize('shape1,shape2', [((2, 3, 4), (3, 4)),
-                                           ((3, 4), (2, 3, 4)),
-                                           ((3, 1, 4), (3, 2, 4)),
-                                           ((1, 3, 4), (3, 4)),
-                                           ((3, 4, 1), (3, 4, 2)),
-                                           ((1, 5), (5, 1))])
-def test_numpy_mixed_broadcasting(func, shape1, shape2):
-    xs = sparse.random(shape1, density=0.5)
-    x = xs.todense()
-
-    y = np.random.rand(*shape2)
-
-    expected = func(x, y)
-    actual = func(xs, y)
-
-    assert isinstance(actual, COO)
-
-    assert_eq(expected, actual)
-
-    assert np.count_nonzero(expected) == actual.nnz
-
-
-@pytest.mark.parametrize('shape1,shape2', [((3, 4), (2, 3, 4)),
-                                           ((3, 1, 4), (3, 2, 4)),
-                                           ((3, 4, 1), (3, 4, 2))])
-def test_broadcast_to(shape1, shape2):
-    a = sparse.random(shape1, density=0.5)
-    x = a.todense()
-
-    assert_eq(np.broadcast_to(x, shape2), a.broadcast_to(shape2))
-
-
 @pytest.mark.parametrize('scalar', [2, 2.5, np.float32(2.0), np.int8(3)])
 def test_scalar_multiplication(scalar):
     a = sparse.random((2, 3, 4), density=0.5)
@@ -879,13 +1018,13 @@ def test_scipy_sparse_interface():
     x = scipy.sparse.coo_matrix(inp)
     xx = sparse.COO(inp)
 
-    assert_eq(x, xx)
-    assert_eq(x.T, xx.T)
-    assert_eq(xx.to_scipy_sparse(), x)
-    assert_eq(COO.from_scipy_sparse(xx.to_scipy_sparse()), xx)
+    assert_eq(x, xx, check_nnz=False)
+    assert_eq(x.T, xx.T, check_nnz=False)
+    assert_eq(xx.to_scipy_sparse(), x, check_nnz=False)
+    assert_eq(COO.from_scipy_sparse(xx.to_scipy_sparse()), xx, check_nnz=False)
 
-    assert_eq(x, xx)
-    assert_eq(x.T.dot(x), xx.T.dot(xx))
+    assert_eq(x, xx, check_nnz=False)
+    assert_eq(x.T.dot(x), xx.T.dot(xx), check_nnz=False)
     assert isinstance(x + xx, COO)
     assert isinstance(xx + x, COO)
 
diff --git a/sparse/utils.py b/sparse/utils.py
index 3fd3df62..04a3b82c 100644
--- a/sparse/utils.py
+++ b/sparse/utils.py
@@ -1,8 +1,9 @@
 import numpy as np
 from numbers import Integral
+from collections import Iterable
 
 
-def assert_eq(x, y, **kwargs):
+def assert_eq(x, y, check_nnz=True, **kwargs):
     from .coo import COO
     assert x.shape == y.shape
     assert x.dtype == y.dtype
@@ -16,10 +17,14 @@ def assert_eq(x, y, **kwargs):
 
     if hasattr(x, 'todense'):
         xx = x.todense()
+        if check_nnz:
+            assert (xx != 0).sum() == x.nnz
     else:
         xx = x
     if hasattr(y, 'todense'):
         yy = y.todense()
+        if check_nnz:
+            assert (yy != 0).sum() == y.nnz
     else:
         yy = y
     assert np.allclose(xx, yy, **kwargs)
@@ -146,3 +151,44 @@ def random(
         ar = DOK(ar)
 
     return ar
+
+
+def isscalar(x):
+    from .sparse_array import SparseArray
+    return not isinstance(x, SparseArray) and np.isscalar(x)
+
+
+class PositinalArgumentPartial(object):
+    def __init__(self, func, pos, posargs):
+        if not isinstance(pos, Iterable):
+            pos = (pos,)
+            posargs = (posargs,)
+
+        n_partial_args = len(pos)
+
+        self.pos = pos
+        self.posargs = posargs
+        self.func = func
+
+        self.n = n_partial_args
+
+        self.__doc__ = func.__doc__
+
+    def __call__(self, *args, **kwargs):
+        j = 0
+        totargs = []
+
+        for i in range(len(args) + self.n):
+            if j >= self.n or i != self.pos[j]:
+                totargs.append(args[i - j])
+            else:
+                totargs.append(self.posargs[j])
+                j += 1
+
+        return self.func(*totargs, **kwargs)
+
+    def __str__(self):
+        return str(self.func)
+
+    def __repr__(self):
+        return repr(self.func)