Merge branch 'master' into master

jowens · Aug 21, 2017 · a1dbdf2 · a1dbdf2
2 parents 9333952 + d0d28fe
commit a1dbdf2
Show file tree

Hide file tree

Showing 18 changed files with 480 additions and 322 deletions.
diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
@@ -117,8 +117,10 @@
     // with results. If the commit is `null`, regression detection is
     // skipped for the matching benchmark.
     //
-    // "regressions_first_commits": {
-    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
-    //    "another_benchmark": null,   // Skip regression detection altogether
-    // }
+    "regressions_first_commits": {
+        "*": "v0.20.0"
+    },
+    "regression_thresholds": {
+        "*": 0.05
+    }
 }
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -129,6 +129,9 @@ Other Enhancements
 - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`).
 - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`).
 - :func:`Series.clip()` and :func:`DataFrame.clip()` now treat NA values for upper and lower arguments as None instead of raising `ValueError` (:issue:`17276`).
+- :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`)
+
+
 
 .. _whatsnew_0210.api_breaking:
 
@@ -386,6 +389,9 @@ Numeric
 Categorical
 ^^^^^^^^^^^
 - Bug in :func:`Series.isin` when called with a categorical (:issue`16639`)
+- Bug in the categorical constructor with empty values and categories causing
+  the ``.categories`` to be an empty ``Float64Index`` rather than an empty
+  ``Index`` with object dtype (:issue:`17248`)
 
 
 Other

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -290,7 +290,10 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
                 # On list with NaNs, int values will be converted to float. Use
                 # "object" dtype to prevent this. In the end objects will be
                 # casted to int/... in the category assignment step.
-                dtype = 'object' if isna(values).any() else None
+                if len(values) == 0 or isna(values).any():
+                    dtype = 'object'
+                else:
+                    dtype = None
                 values = _sanitize_array(values, None, dtype=dtype)
 
         if categories is None:

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -802,8 +802,7 @@ def itertuples(self, index=True, name="Pandas"):
         # fallback to regular tuples
         return zip(*arrays)
 
-    if compat.PY3:  # pragma: no cover
-        items = iteritems
+    items = iteritems
 
     def __len__(self):
         """Returns length of info axis, but here we use the index """

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2063,18 +2063,77 @@ def __delitem__(self, key):
 
     def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs):
         """
-        Analogous to ndarray.take
+        Return the elements in the given *positional* indices along an axis.
+
+        This means that we are not indexing according to actual values in
+        the index attribute of the object. We are indexing according to the
+        actual position of the element in the object.
 
         Parameters
         ----------
-        indices : list / array of ints
+        indices : array-like
+            An array of ints indicating which positions to take.
         axis : int, default 0
-        convert : translate neg to pos indices (default)
-        is_copy : mark the returned frame as a copy
+            The axis on which to select elements. "0" means that we are
+            selecting rows, "1" means that we are selecting columns, etc.
+        convert : bool, default True
+            Whether to convert negative indices to positive ones, just as with
+            indexing into Python lists. For example, if `-1` was passed in,
+            this index would be converted ``n - 1``.
+        is_copy : bool, default True
+            Whether to return a copy of the original object or not.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([('falcon', 'bird',    389.0),
+                               ('parrot', 'bird',     24.0),
+                               ('lion',   'mammal',   80.5),
+                               ('monkey', 'mammal', np.nan)],
+                              columns=('name', 'class', 'max_speed'),
+                              index=[0, 2, 3, 1])
+        >>> df
+             name   class  max_speed
+        0  falcon    bird      389.0
+        2  parrot    bird       24.0
+        3    lion  mammal       80.5
+        1  monkey  mammal        NaN
+
+        Take elements at positions 0 and 3 along the axis 0 (default).
+
+        Note how the actual indices selected (0 and 1) do not correspond to
+        our selected indices 0 and 3. That's because we are selecting the 0th
+        and 3rd rows, not rows whose indices equal 0 and 3.
+
+        >>> df.take([0, 3])
+        0  falcon    bird      389.0
+        1  monkey  mammal        NaN
+
+        Take elements at indices 1 and 2 along the axis 1 (column selection).
+
+        >>> df.take([1, 2], axis=1)
+            class  max_speed
+        0    bird      389.0
+        2    bird       24.0
+        3  mammal       80.5
+        1  mammal        NaN
+
+        We may take elements using negative integers for positive indices,
+        starting from the end of the object, just like with Python lists.
+
+        >>> df.take([-1, -2])
+             name   class  max_speed
+        1  monkey  mammal        NaN
+        3    lion  mammal       80.5
 
         Returns
         -------
         taken : type of caller
+            An array-like containing the elements taken from the object.
+
+        See Also
+        --------
+        numpy.ndarray.take
+        numpy.take
         """
         nv.validate_take(tuple(), kwargs)
         self._consolidate_inplace()
@@ -2978,14 +3037,36 @@ def filter(self, items=None, like=None, regex=None, axis=None):
 
     def head(self, n=5):
         """
-        Returns first n rows
+        Return the first n rows.
+
+        Parameters
+        ----------
+        n : int, default 5
+            Number of rows to select.
+
+        Returns
+        -------
+        obj_head : type of caller
+            The first n rows of the caller object.
         """
+
         return self.iloc[:n]
 
     def tail(self, n=5):
         """
-        Returns last n rows
+        Return the last n rows.
+
+        Parameters
+        ----------
+        n : int, default 5
+            Number of rows to select.
+
+        Returns
+        -------
+        obj_tail : type of caller
+            The last n rows of the caller object.
         """
+
         if n == 0:
             return self.iloc[0:0]
         return self.iloc[-n:]

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1985,9 +1985,31 @@ def get_indexer(_i, _idx):
 
 
 def maybe_convert_indices(indices, n):
-    """ if we have negative indicies, translate to postive here
-    if have indicies that are out-of-bounds, raise an IndexError
     """
+    Attempt to convert indices into valid, positive indices.
+
+    If we have negative indices, translate to positive here.
+    If we have indices that are out-of-bounds, raise an IndexError.
+
+    Parameters
+    ----------
+    indices : array-like
+        The array of indices that we are to convert.
+    n : int
+        The number of elements in the array that we are indexing.
+
+    Returns
+    -------
+    valid_indices : array-like
+        An array-like of positive indices that correspond to the ones
+        that were passed in initially to this function.
+
+    Raises
+    ------
+    IndexError : one of the converted indices either exceeded the number
+        of elements (specified by `n`) OR was still negative.
+    """
+
     if isinstance(indices, list):
         indices = np.array(indices)
         if len(indices) == 0:

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1110,8 +1110,7 @@ def iteritems(self):
         """
         return zip(iter(self.index), iter(self))
 
-    if compat.PY3:  # pragma: no cover
-        items = iteritems
+    items = iteritems
 
     # ----------------------------------------------------------------------
     # Misc public methods

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -487,18 +487,18 @@ def _read(filepath_or_buffer, kwds):
     'widths': None,
 }
 
-_c_unsupported = set(['skipfooter'])
-_python_unsupported = set([
+_c_unsupported = {'skipfooter'}
+_python_unsupported = {
     'low_memory',
     'buffer_lines',
     'float_precision',
-])
-_deprecated_args = set([
+}
+_deprecated_args = {
     'as_recarray',
     'buffer_lines',
     'compact_ints',
     'use_unsigned',
-])
+}
 
 
 def _make_parser_function(name, sep=','):

diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
@@ -171,7 +171,16 @@ def test_nonzero(self):
     def test_iteritems(self):
         df = self.klass([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b'])
         for k, v in compat.iteritems(df):
-            assert type(v) == self.klass._constructor_sliced
+            assert isinstance(v, self.klass._constructor_sliced)
+
+    def test_items(self):
+        # issue #17213, #13918
+        cols = ['a', 'b', 'c']
+        df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols)
+        for c, (k, v) in zip(cols, df.items()):
+            assert c == k
+            assert isinstance(v, Series)
+            assert (df[k] == v).all()
 
     def test_iter(self):
         assert tm.equalContents(list(self.frame), self.frame.columns)

diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
@@ -776,7 +776,7 @@ def test_from_arrays_empty(self):
             arrays = [[]] * N
             names = list('ABC')[:N]
             result = MultiIndex.from_arrays(arrays=arrays, names=names)
-            expected = MultiIndex(levels=[np.array([])] * N, labels=[[]] * N,
+            expected = MultiIndex(levels=[[]] * N, labels=[[]] * N,
                                   names=names)
             tm.assert_index_equal(result, expected)
 
@@ -829,7 +829,7 @@ def test_from_product_empty(self):
 
         # 1 level
         result = MultiIndex.from_product([[]], names=['A'])
-        expected = pd.Float64Index([], name='A')
+        expected = pd.Index([], name='A')
         tm.assert_index_equal(result, expected)
 
         # 2 levels
@@ -838,7 +838,7 @@ def test_from_product_empty(self):
         names = ['A', 'B']
         for first, second in zip(l1, l2):
             result = MultiIndex.from_product([first, second], names=names)
-            expected = MultiIndex(levels=[np.array(first), np.array(second)],
+            expected = MultiIndex(levels=[first, second],
                                   labels=[[], []], names=names)
             tm.assert_index_equal(result, expected)
 
@@ -847,8 +847,7 @@ def test_from_product_empty(self):
         for N in range(4):
             lvl2 = lrange(N)
             result = MultiIndex.from_product([[], lvl2, []], names=names)
-            expected = MultiIndex(levels=[np.array(A)
-                                          for A in [[], lvl2, []]],
+            expected = MultiIndex(levels=[[], lvl2, []],
                                   labels=[[], [], []], names=names)
             tm.assert_index_equal(result, expected)
 

diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
@@ -680,7 +680,7 @@ def test_concat_categorical_empty(self):
         tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
 
         s1 = pd.Series([], dtype='category')
-        s2 = pd.Series([])
+        s2 = pd.Series([], dtype='object')
 
         # different dtype => not-category
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)

diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py
@@ -107,17 +107,11 @@ def test_union_categoricals_empty(self):
         exp = Categorical([])
         tm.assert_categorical_equal(res, exp)
 
-        res = union_categoricals([pd.Categorical([]),
-                                  pd.Categorical([1.0])])
-        exp = Categorical([1.0])
+        res = union_categoricals([Categorical([]),
+                                  Categorical(['1'])])
+        exp = Categorical(['1'])
         tm.assert_categorical_equal(res, exp)
 
-        # to make dtype equal
-        nanc = pd.Categorical(np.array([np.nan], dtype=np.float64))
-        res = union_categoricals([nanc,
-                                  pd.Categorical([])])
-        tm.assert_categorical_equal(res, nanc)
-
     def test_union_categorical_same_category(self):
         # check fastpath
         c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])

diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
@@ -301,6 +301,16 @@ def test_iteritems(self):
         # assert is lazy (genrators don't define reverse, lists do)
         assert not hasattr(self.series.iteritems(), 'reverse')
 
+    def test_items(self):
+        for idx, val in self.series.items():
+            assert val == self.series[idx]
+
+        for idx, val in self.ts.items():
+            assert val == self.ts[idx]
+
+        # assert is lazy (genrators don't define reverse, lists do)
+        assert not hasattr(self.series.items(), 'reverse')
+
     def test_raise_on_info(self):
         s = Series(np.random.randn(10))
         with pytest.raises(AttributeError):

diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
@@ -112,6 +112,16 @@ def test_setitem_listlike(self):
         result = c.codes[np.array([100000]).astype(np.int64)]
         tm.assert_numpy_array_equal(result, np.array([5], dtype='int8'))
 
+    def test_constructor_empty(self):
+        # GH 17248
+        c = Categorical([])
+        expected = Index([])
+        tm.assert_index_equal(c.categories, expected)
+
+        c = Categorical([], categories=[1, 2, 3])
+        expected = pd.Int64Index([1, 2, 3])
+        tm.assert_index_equal(c.categories, expected)
+
     def test_constructor_unsortable(self):
 
         # it works!