pandas-dev · jreback · Nov 12, 2018 · Nov 7, 2018 · Nov 7, 2018 · Nov 7, 2018
diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
@@ -614,7 +614,7 @@ def __array__(self, dtype=None, copy=True):
                     # Can't put pd.NaT in a datetime64[ns]
                     fill_value = np.datetime64('NaT')
             try:
-                dtype = np.result_type(self.sp_values.dtype, fill_value)
+                dtype = np.result_type(self.sp_values.dtype, type(fill_value))
             except TypeError:
                 dtype = object
 
@@ -996,7 +996,7 @@ def _take_with_fill(self, indices, fill_value=None):
         if len(self) == 0:
             # Empty... Allow taking only if all empty
             if (indices == -1).all():
-                dtype = np.result_type(self.sp_values, fill_value)
+                dtype = np.result_type(self.sp_values, type(fill_value))
                 taken = np.empty_like(indices, dtype=dtype)
                 taken.fill(fill_value)
                 return taken
@@ -1009,7 +1009,7 @@ def _take_with_fill(self, indices, fill_value=None):
         if self.sp_index.npoints == 0:
             # Avoid taking from the empty self.sp_values
             taken = np.full(sp_indexer.shape, fill_value=fill_value,
-                            dtype=np.result_type(fill_value))
+                            dtype=np.result_type(type(fill_value)))
         else:
             taken = self.sp_values.take(sp_indexer)
 
@@ -1030,12 +1030,12 @@ def _take_with_fill(self, indices, fill_value=None):
             result_type = taken.dtype
 
             if m0.any():
-                result_type = np.result_type(result_type, self.fill_value)
+                result_type = np.result_type(result_type, type(self.fill_value))
                 taken = taken.astype(result_type)
                 taken[old_fill_indices] = self.fill_value
 
             if m1.any():
-                result_type = np.result_type(result_type, fill_value)
+                result_type = np.result_type(result_type, type(fill_value))
                 taken = taken.astype(result_type)
                 taken[new_fill_indices] = fill_value
 
@@ -1061,7 +1061,7 @@ def _take_without_fill(self, indices):
             # edge case in take...
             # I think just return
             out = np.full(indices.shape, self.fill_value,
-                          dtype=np.result_type(self.fill_value))
+                          dtype=np.result_type(type(self.fill_value)))
             arr, sp_index, fill_value = make_sparse(out,
                                                     fill_value=self.fill_value)
             return type(self)(arr, sparse_index=sp_index,
@@ -1073,7 +1073,7 @@ def _take_without_fill(self, indices):
 
         if fillable.any():
             # TODO: may need to coerce array to fill value
-            result_type = np.result_type(taken, self.fill_value)
+            result_type = np.result_type(taken, type(self.fill_value))
             taken = taken.astype(result_type)
             taken[fillable] = self.fill_value
 
@@ -1215,10 +1215,26 @@ def astype(self, dtype=None, copy=True):
         dtype = pandas_dtype(dtype)
 
         if not isinstance(dtype, SparseDtype):
-            dtype = SparseDtype(dtype, fill_value=self.fill_value)
+            fill_value = astype_nansafe(np.array(self.fill_value),
+                                        dtype).item()
+            dtype = SparseDtype(dtype, fill_value=fill_value)
+
+        # Typically we'll just astype the sp_values to dtype.subtype,
+        # but SparseDtype follows the pandas convention of storing strings
+        # as object dtype. So SparseDtype(str) immediately becomes
+        # SparseDtype(object), and at this point we don't know whether object
+        # means string or something else. We *cannot* just pass object to
+        # astype_nansafe below, since that won't convert to string. So
+        # we rely on the assumption that "string fill_value" means strings
+        # which is close enough to being true.
+        if (is_object_dtype(dtype.subtype) and
+                isinstance(dtype.fill_value, compat.text_type)):
+            subtype = str
+        else:
+            subtype = dtype.subtype
 
         sp_values = astype_nansafe(self.sp_values,
-                                   dtype.subtype,
+                                   subtype,
                                    copy=copy)
         if sp_values is self.sp_values and copy:
             sp_values = sp_values.copy()

diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
@@ -468,6 +468,32 @@ def test_astype_all(self, any_real_dtype):
         tm.assert_numpy_array_equal(np.asarray(res.values),
                                     vals.astype(typ))
 
+    @pytest.mark.parametrize('array, dtype, expected', [
+        (SparseArray([0, 1]), 'float',
+         SparseArray([0., 1.], dtype=SparseDtype(float, 0.0))),
+        (SparseArray([0, 1]), bool, SparseArray([False, True])),
+        (SparseArray([0, 1], fill_value=1), bool,
+         SparseArray([False, True], dtype=SparseDtype(bool, True))),
+        pytest.param(
+            SparseArray([0, 1]), 'datetime64[ns]',
+            SparseArray(np.array([0, 1], dtype='datetime64[ns]'),
+                         dtype=SparseDtype('datetime64[ns]',
+                                           pd.Timestamp('1970'))),
+            marks=[pytest.mark.xfail(reason="NumPy-7619", strict=True)],
+        ),
+        (SparseArray([0, 1, 10]), str,
+         SparseArray(['0', '1', '10'], dtype=SparseDtype(str, '0'))),
+        (SparseArray(['10', '20']), float, SparseArray([10.0, 20.0])),
+    ])
+    def test_astype_more(self, array, dtype, expected):
+        result = array.astype(dtype)
+        tm.assert_sp_array_equal(result, expected)
+
+    def test_astype_nan_raises(self):
+        arr = SparseArray([1.0, np.nan])
+        with tm.assert_raises_regex(ValueError, 'Cannot convert non-finite'):
+            arr.astype(int)
+
     def test_set_fill_value(self):
         arr = SparseArray([1., np.nan, 2.], fill_value=np.nan)
         arr.fill_value = 2