Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
mgasvoda authored Aug 21, 2017
2 parents 9333952 + d0d28fe commit a1dbdf2
Show file tree
Hide file tree
Showing 18 changed files with 480 additions and 322 deletions.
10 changes: 6 additions & 4 deletions asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,10 @@
// with results. If the commit is `null`, regression detection is
// skipped for the matching benchmark.
//
// "regressions_first_commits": {
// "some_benchmark": "352cdf", // Consider regressions only after this commit
// "another_benchmark": null, // Skip regression detection altogether
// }
"regressions_first_commits": {
"*": "v0.20.0"
},
"regression_thresholds": {
"*": 0.05
}
}
6 changes: 6 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ Other Enhancements
- `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`).
- :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`).
- :func:`Series.clip()` and :func:`DataFrame.clip()` now treat NA values for upper and lower arguments as None instead of raising `ValueError` (:issue:`17276`).
- :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`)



.. _whatsnew_0210.api_breaking:

Expand Down Expand Up @@ -386,6 +389,9 @@ Numeric
Categorical
^^^^^^^^^^^
- Bug in :func:`Series.isin` when called with a categorical (:issue`16639`)
- Bug in the categorical constructor with empty values and categories causing
the ``.categories`` to be an empty ``Float64Index`` rather than an empty
``Index`` with object dtype (:issue:`17248`)


Other
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,10 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
# On list with NaNs, int values will be converted to float. Use
# "object" dtype to prevent this. In the end objects will be
# casted to int/... in the category assignment step.
dtype = 'object' if isna(values).any() else None
if len(values) == 0 or isna(values).any():
dtype = 'object'
else:
dtype = None
values = _sanitize_array(values, None, dtype=dtype)

if categories is None:
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,8 +802,7 @@ def itertuples(self, index=True, name="Pandas"):
# fallback to regular tuples
return zip(*arrays)

if compat.PY3: # pragma: no cover
items = iteritems
items = iteritems

def __len__(self):
"""Returns length of info axis, but here we use the index """
Expand Down
93 changes: 87 additions & 6 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2063,18 +2063,77 @@ def __delitem__(self, key):

def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs):
"""
Analogous to ndarray.take
Return the elements in the given *positional* indices along an axis.
This means that we are not indexing according to actual values in
the index attribute of the object. We are indexing according to the
actual position of the element in the object.
Parameters
----------
indices : list / array of ints
indices : array-like
An array of ints indicating which positions to take.
axis : int, default 0
convert : translate neg to pos indices (default)
is_copy : mark the returned frame as a copy
The axis on which to select elements. "0" means that we are
selecting rows, "1" means that we are selecting columns, etc.
convert : bool, default True
Whether to convert negative indices to positive ones, just as with
indexing into Python lists. For example, if `-1` was passed in,
this index would be converted ``n - 1``.
is_copy : bool, default True
Whether to return a copy of the original object or not.
Examples
--------
>>> df = pd.DataFrame([('falcon', 'bird', 389.0),
('parrot', 'bird', 24.0),
('lion', 'mammal', 80.5),
('monkey', 'mammal', np.nan)],
columns=('name', 'class', 'max_speed'),
index=[0, 2, 3, 1])
>>> df
name class max_speed
0 falcon bird 389.0
2 parrot bird 24.0
3 lion mammal 80.5
1 monkey mammal NaN
Take elements at positions 0 and 3 along the axis 0 (default).
Note how the actual indices selected (0 and 1) do not correspond to
our selected indices 0 and 3. That's because we are selecting the 0th
and 3rd rows, not rows whose indices equal 0 and 3.
>>> df.take([0, 3])
0 falcon bird 389.0
1 monkey mammal NaN
Take elements at indices 1 and 2 along the axis 1 (column selection).
>>> df.take([1, 2], axis=1)
class max_speed
0 bird 389.0
2 bird 24.0
3 mammal 80.5
1 mammal NaN
We may take elements using negative integers for positive indices,
starting from the end of the object, just like with Python lists.
>>> df.take([-1, -2])
name class max_speed
1 monkey mammal NaN
3 lion mammal 80.5
Returns
-------
taken : type of caller
An array-like containing the elements taken from the object.
See Also
--------
numpy.ndarray.take
numpy.take
"""
nv.validate_take(tuple(), kwargs)
self._consolidate_inplace()
Expand Down Expand Up @@ -2978,14 +3037,36 @@ def filter(self, items=None, like=None, regex=None, axis=None):

def head(self, n=5):
"""
Returns first n rows
Return the first n rows.
Parameters
----------
n : int, default 5
Number of rows to select.
Returns
-------
obj_head : type of caller
The first n rows of the caller object.
"""

return self.iloc[:n]

def tail(self, n=5):
"""
Returns last n rows
Return the last n rows.
Parameters
----------
n : int, default 5
Number of rows to select.
Returns
-------
obj_tail : type of caller
The last n rows of the caller object.
"""

if n == 0:
return self.iloc[0:0]
return self.iloc[-n:]
Expand Down
26 changes: 24 additions & 2 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1985,9 +1985,31 @@ def get_indexer(_i, _idx):


def maybe_convert_indices(indices, n):
""" if we have negative indicies, translate to postive here
if have indicies that are out-of-bounds, raise an IndexError
"""
Attempt to convert indices into valid, positive indices.
If we have negative indices, translate to positive here.
If we have indices that are out-of-bounds, raise an IndexError.
Parameters
----------
indices : array-like
The array of indices that we are to convert.
n : int
The number of elements in the array that we are indexing.
Returns
-------
valid_indices : array-like
An array-like of positive indices that correspond to the ones
that were passed in initially to this function.
Raises
------
IndexError : one of the converted indices either exceeded the number
of elements (specified by `n`) OR was still negative.
"""

if isinstance(indices, list):
indices = np.array(indices)
if len(indices) == 0:
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1110,8 +1110,7 @@ def iteritems(self):
"""
return zip(iter(self.index), iter(self))

if compat.PY3: # pragma: no cover
items = iteritems
items = iteritems

# ----------------------------------------------------------------------
# Misc public methods
Expand Down
10 changes: 5 additions & 5 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,18 +487,18 @@ def _read(filepath_or_buffer, kwds):
'widths': None,
}

_c_unsupported = set(['skipfooter'])
_python_unsupported = set([
_c_unsupported = {'skipfooter'}
_python_unsupported = {
'low_memory',
'buffer_lines',
'float_precision',
])
_deprecated_args = set([
}
_deprecated_args = {
'as_recarray',
'buffer_lines',
'compact_ints',
'use_unsigned',
])
}


def _make_parser_function(name, sep=','):
Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/frame/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,16 @@ def test_nonzero(self):
def test_iteritems(self):
df = self.klass([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b'])
for k, v in compat.iteritems(df):
assert type(v) == self.klass._constructor_sliced
assert isinstance(v, self.klass._constructor_sliced)

def test_items(self):
# issue #17213, #13918
cols = ['a', 'b', 'c']
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols)
for c, (k, v) in zip(cols, df.items()):
assert c == k
assert isinstance(v, Series)
assert (df[k] == v).all()

def test_iter(self):
assert tm.equalContents(list(self.frame), self.frame.columns)
Expand Down
9 changes: 4 additions & 5 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@ def test_from_arrays_empty(self):
arrays = [[]] * N
names = list('ABC')[:N]
result = MultiIndex.from_arrays(arrays=arrays, names=names)
expected = MultiIndex(levels=[np.array([])] * N, labels=[[]] * N,
expected = MultiIndex(levels=[[]] * N, labels=[[]] * N,
names=names)
tm.assert_index_equal(result, expected)

Expand Down Expand Up @@ -829,7 +829,7 @@ def test_from_product_empty(self):

# 1 level
result = MultiIndex.from_product([[]], names=['A'])
expected = pd.Float64Index([], name='A')
expected = pd.Index([], name='A')
tm.assert_index_equal(result, expected)

# 2 levels
Expand All @@ -838,7 +838,7 @@ def test_from_product_empty(self):
names = ['A', 'B']
for first, second in zip(l1, l2):
result = MultiIndex.from_product([first, second], names=names)
expected = MultiIndex(levels=[np.array(first), np.array(second)],
expected = MultiIndex(levels=[first, second],
labels=[[], []], names=names)
tm.assert_index_equal(result, expected)

Expand All @@ -847,8 +847,7 @@ def test_from_product_empty(self):
for N in range(4):
lvl2 = lrange(N)
result = MultiIndex.from_product([[], lvl2, []], names=names)
expected = MultiIndex(levels=[np.array(A)
for A in [[], lvl2, []]],
expected = MultiIndex(levels=[[], lvl2, []],
labels=[[], [], []], names=names)
tm.assert_index_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,7 @@ def test_concat_categorical_empty(self):
tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)

s1 = pd.Series([], dtype='category')
s2 = pd.Series([])
s2 = pd.Series([], dtype='object')

# different dtype => not-category
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
Expand Down
12 changes: 3 additions & 9 deletions pandas/tests/reshape/test_union_categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,11 @@ def test_union_categoricals_empty(self):
exp = Categorical([])
tm.assert_categorical_equal(res, exp)

res = union_categoricals([pd.Categorical([]),
pd.Categorical([1.0])])
exp = Categorical([1.0])
res = union_categoricals([Categorical([]),
Categorical(['1'])])
exp = Categorical(['1'])
tm.assert_categorical_equal(res, exp)

# to make dtype equal
nanc = pd.Categorical(np.array([np.nan], dtype=np.float64))
res = union_categoricals([nanc,
pd.Categorical([])])
tm.assert_categorical_equal(res, nanc)

def test_union_categorical_same_category(self):
# check fastpath
c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/series/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,16 @@ def test_iteritems(self):
# assert is lazy (genrators don't define reverse, lists do)
assert not hasattr(self.series.iteritems(), 'reverse')

def test_items(self):
for idx, val in self.series.items():
assert val == self.series[idx]

for idx, val in self.ts.items():
assert val == self.ts[idx]

# assert is lazy (genrators don't define reverse, lists do)
assert not hasattr(self.series.items(), 'reverse')

def test_raise_on_info(self):
s = Series(np.random.randn(10))
with pytest.raises(AttributeError):
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,16 @@ def test_setitem_listlike(self):
result = c.codes[np.array([100000]).astype(np.int64)]
tm.assert_numpy_array_equal(result, np.array([5], dtype='int8'))

def test_constructor_empty(self):
# GH 17248
c = Categorical([])
expected = Index([])
tm.assert_index_equal(c.categories, expected)

c = Categorical([], categories=[1, 2, 3])
expected = pd.Int64Index([1, 2, 3])
tm.assert_index_equal(c.categories, expected)

def test_constructor_unsortable(self):

# it works!
Expand Down
Loading

0 comments on commit a1dbdf2

Please sign in to comment.