From 7818486859d1aba53ce359b93cfc772e688958e5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 19 Aug 2017 06:27:05 -0500 Subject: [PATCH 1/9] BUG: Have object dtype for empty Categorical.categories (#17249) * BUG: Have object dtype for empty Categorical ctor Previously we had a `Float64Index`, which is inconsistent with, e.g., the regular Index constructor. * TST: Update tests in multi for new return Previously these relied worked around the return type by wrapping list-likes in `np.array` and relying on that to cast to float. These workarounds are no longer nescessary. * TST: Update union_categorical tests This relied on `NaN` being a float and empty being a float. Not a necessary test anymore. * TST: set object dtype --- doc/source/whatsnew/v0.21.0.txt | 3 +++ pandas/core/categorical.py | 5 ++++- pandas/tests/indexes/test_multi.py | 9 ++++----- pandas/tests/reshape/test_concat.py | 2 +- pandas/tests/reshape/test_union_categoricals.py | 12 +++--------- pandas/tests/test_categorical.py | 10 ++++++++++ 6 files changed, 25 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4f55c6388c728..6008ea5d4cbcd 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -385,6 +385,9 @@ Numeric Categorical ^^^^^^^^^^^ - Bug in :func:`Series.isin` when called with a categorical (:issue`16639`) +- Bug in the categorical constructor with empty values and categories causing + the ``.categories`` to be an empty ``Float64Index`` rather than an empty + ``Index`` with object dtype (:issue:`17248`) Other diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 230361931125e..1c2a29333001c 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -290,7 +290,10 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False): # On list with NaNs, int values will be converted to float. Use # "object" dtype to prevent this. In the end objects will be # casted to int/... in the category assignment step. - dtype = 'object' if isna(values).any() else None + if len(values) == 0 or isna(values).any(): + dtype = 'object' + else: + dtype = None values = _sanitize_array(values, None, dtype=dtype) if categories is None: diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index da1b309f5a621..c66775f4690cc 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -776,7 +776,7 @@ def test_from_arrays_empty(self): arrays = [[]] * N names = list('ABC')[:N] result = MultiIndex.from_arrays(arrays=arrays, names=names) - expected = MultiIndex(levels=[np.array([])] * N, labels=[[]] * N, + expected = MultiIndex(levels=[[]] * N, labels=[[]] * N, names=names) tm.assert_index_equal(result, expected) @@ -829,7 +829,7 @@ def test_from_product_empty(self): # 1 level result = MultiIndex.from_product([[]], names=['A']) - expected = pd.Float64Index([], name='A') + expected = pd.Index([], name='A') tm.assert_index_equal(result, expected) # 2 levels @@ -838,7 +838,7 @@ def test_from_product_empty(self): names = ['A', 'B'] for first, second in zip(l1, l2): result = MultiIndex.from_product([first, second], names=names) - expected = MultiIndex(levels=[np.array(first), np.array(second)], + expected = MultiIndex(levels=[first, second], labels=[[], []], names=names) tm.assert_index_equal(result, expected) @@ -847,8 +847,7 @@ def test_from_product_empty(self): for N in range(4): lvl2 = lrange(N) result = MultiIndex.from_product([[], lvl2, []], names=names) - expected = MultiIndex(levels=[np.array(A) - for A in [[], lvl2, []]], + expected = MultiIndex(levels=[[], lvl2, []], labels=[[], [], []], names=names) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 46fea86c45925..52cd18126859a 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -680,7 +680,7 @@ def test_concat_categorical_empty(self): tm.assert_series_equal(s1.append(s2, ignore_index=True), s2) s1 = pd.Series([], dtype='category') - s2 = pd.Series([]) + s2 = pd.Series([], dtype='object') # different dtype => not-category tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index fe8d54005ba9b..eb80fb54b4016 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -107,17 +107,11 @@ def test_union_categoricals_empty(self): exp = Categorical([]) tm.assert_categorical_equal(res, exp) - res = union_categoricals([pd.Categorical([]), - pd.Categorical([1.0])]) - exp = Categorical([1.0]) + res = union_categoricals([Categorical([]), + Categorical(['1'])]) + exp = Categorical(['1']) tm.assert_categorical_equal(res, exp) - # to make dtype equal - nanc = pd.Categorical(np.array([np.nan], dtype=np.float64)) - res = union_categoricals([nanc, - pd.Categorical([])]) - tm.assert_categorical_equal(res, nanc) - def test_union_categorical_same_category(self): # check fastpath c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index a0b585a16ad9a..7bbe220378993 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -112,6 +112,16 @@ def test_setitem_listlike(self): result = c.codes[np.array([100000]).astype(np.int64)] tm.assert_numpy_array_equal(result, np.array([5], dtype='int8')) + def test_constructor_empty(self): + # GH 17248 + c = Categorical([]) + expected = Index([]) + tm.assert_index_equal(c.categories, expected) + + c = Categorical([], categories=[1, 2, 3]) + expected = pd.Int64Index([1, 2, 3]) + tm.assert_index_equal(c.categories, expected) + def test_constructor_unsortable(self): # it works! From 4e9c0d1f2156c656df5da4ac3f00190f0da5828b Mon Sep 17 00:00:00 2001 From: jschendel Date: Sat, 19 Aug 2017 10:51:05 -0600 Subject: [PATCH 2/9] CLN: replace %s syntax with .format in pandas.tseries (#17290) --- pandas/tseries/frequencies.py | 38 +++++----- pandas/tseries/holiday.py | 14 ++-- pandas/tseries/offsets.py | 137 +++++++++++++++++++--------------- 3 files changed, 105 insertions(+), 84 deletions(-) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index aa33a3849acb3..7f34bcaf52926 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -409,16 +409,17 @@ def _get_freq_str(base, mult=1): need_suffix = ['QS', 'BQ', 'BQS', 'YS', 'AS', 'BY', 'BA', 'BYS', 'BAS'] for __prefix in need_suffix: for _m in tslib._MONTHS: - _offset_to_period_map['%s-%s' % (__prefix, _m)] = \ - _offset_to_period_map[__prefix] + _alias = '{prefix}-{month}'.format(prefix=__prefix, month=_m) + _offset_to_period_map[_alias] = _offset_to_period_map[__prefix] for __prefix in ['A', 'Q']: for _m in tslib._MONTHS: - _alias = '%s-%s' % (__prefix, _m) + _alias = '{prefix}-{month}'.format(prefix=__prefix, month=_m) _offset_to_period_map[_alias] = _alias _days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] for _d in _days: - _offset_to_period_map['W-%s' % _d] = 'W-%s' % _d + _alias = 'W-{day}'.format(day=_d) + _offset_to_period_map[_alias] = _alias def get_period_alias(offset_str): @@ -587,7 +588,7 @@ def _base_and_stride(freqstr): groups = opattern.match(freqstr) if not groups: - raise ValueError("Could not evaluate %s" % freqstr) + raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) stride = groups.group(1) @@ -775,8 +776,8 @@ def infer_freq(index, warn=True): if not (is_datetime64_dtype(values) or is_timedelta64_dtype(values) or values.dtype == object): - raise TypeError("cannot infer freq from a non-convertible " - "dtype on a Series of {0}".format(index.dtype)) + raise TypeError("cannot infer freq from a non-convertible dtype " + "on a Series of {dtype}".format(dtype=index.dtype)) index = values if is_period_arraylike(index): @@ -789,7 +790,7 @@ def infer_freq(index, warn=True): if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex): if isinstance(index, (pd.Int64Index, pd.Float64Index)): raise TypeError("cannot infer freq from a non-convertible index " - "type {0}".format(type(index))) + "type {type}".format(type=type(index))) index = index.values if not isinstance(index, pd.DatetimeIndex): @@ -956,15 +957,17 @@ def _infer_daily_rule(self): if annual_rule: nyears = self.ydiffs[0] month = _month_aliases[self.rep_stamp.month] - return _maybe_add_count('%s-%s' % (annual_rule, month), nyears) + alias = '{prefix}-{month}'.format(prefix=annual_rule, month=month) + return _maybe_add_count(alias, nyears) quarterly_rule = self._get_quarterly_rule() if quarterly_rule: nquarters = self.mdiffs[0] / 3 mod_dict = {0: 12, 2: 11, 1: 10} month = _month_aliases[mod_dict[self.rep_stamp.month % 3]] - return _maybe_add_count('%s-%s' % (quarterly_rule, month), - nquarters) + alias = '{prefix}-{month}'.format(prefix=quarterly_rule, + month=month) + return _maybe_add_count(alias, nquarters) monthly_rule = self._get_monthly_rule() if monthly_rule: @@ -974,8 +977,8 @@ def _infer_daily_rule(self): days = self.deltas[0] / _ONE_DAY if days % 7 == 0: # Weekly - alias = _weekday_rule_aliases[self.rep_stamp.weekday()] - return _maybe_add_count('W-%s' % alias, days / 7) + day = _weekday_rule_aliases[self.rep_stamp.weekday()] + return _maybe_add_count('W-{day}'.format(day=day), days / 7) else: return _maybe_add_count('D', days) @@ -1048,7 +1051,7 @@ def _get_wom_rule(self): week = week_of_months[0] + 1 wd = _weekday_rule_aliases[weekdays[0]] - return 'WOM-%d%s' % (week, wd) + return 'WOM-{week}{weekday}'.format(week=week, weekday=wd) class _TimedeltaFrequencyInferer(_FrequencyInferer): @@ -1058,15 +1061,16 @@ def _infer_daily_rule(self): days = self.deltas[0] / _ONE_DAY if days % 7 == 0: # Weekly - alias = _weekday_rule_aliases[self.rep_stamp.weekday()] - return _maybe_add_count('W-%s' % alias, days / 7) + wd = _weekday_rule_aliases[self.rep_stamp.weekday()] + alias = 'W-{weekday}'.format(weekday=wd) + return _maybe_add_count(alias, days / 7) else: return _maybe_add_count('D', days) def _maybe_add_count(base, count): if count != 1: - return '%d%s' % (count, base) + return '{count}{base}'.format(count=int(count), base=base) else: return base diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 9acb52ebe0e9f..d8bfa3013f8f7 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -174,16 +174,16 @@ class from pandas.tseries.offsets def __repr__(self): info = '' if self.year is not None: - info += 'year=%s, ' % self.year - info += 'month=%s, day=%s, ' % (self.month, self.day) + info += 'year={year}, '.format(year=self.year) + info += 'month={mon}, day={day}, '.format(mon=self.month, day=self.day) if self.offset is not None: - info += 'offset=%s' % self.offset + info += 'offset={offset}'.format(offset=self.offset) if self.observance is not None: - info += 'observance=%s' % self.observance + info += 'observance={obs}'.format(obs=self.observance) - repr = 'Holiday: %s (%s)' % (self.name, info) + repr = 'Holiday: {name} ({info})'.format(name=self.name, info=info) return repr def dates(self, start_date, end_date, return_name=False): @@ -374,8 +374,8 @@ def holidays(self, start=None, end=None, return_name=False): DatetimeIndex of holidays """ if self.rules is None: - raise Exception('Holiday Calendar %s does not have any ' - 'rules specified' % self.name) + raise Exception('Holiday Calendar {name} does not have any ' + 'rules specified'.format(name=self.name)) if start is None: start = AbstractHolidayCalendar.start_date diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 56ef703e67ca0..29cdda5548896 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -261,10 +261,10 @@ def apply_index(self, i): """ if not type(self) is DateOffset: - raise NotImplementedError("DateOffset subclass %s " + raise NotImplementedError("DateOffset subclass {name} " "does not have a vectorized " - "implementation" - % (self.__class__.__name__,)) + "implementation".format( + name=self.__class__.__name__)) relativedelta_fast = set(['years', 'months', 'weeks', 'days', 'hours', 'minutes', 'seconds', 'microseconds']) @@ -295,10 +295,10 @@ def apply_index(self, i): return i + (self._offset * self.n) else: # relativedelta with other keywords + kwd = set(self.kwds) - relativedelta_fast raise NotImplementedError("DateOffset with relativedelta " - "keyword(s) %s not able to be " - "applied vectorized" % - (set(self.kwds) - relativedelta_fast),) + "keyword(s) {kwd} not able to be " + "applied vectorized".format(kwd=kwd)) def isAnchored(self): return (self.n == 1) @@ -339,19 +339,20 @@ def __repr__(self): if attr not in exclude: attrs.append('='.join((attr, repr(getattr(self, attr))))) + plural = '' if abs(self.n) != 1: plural = 's' - else: - plural = '' - n_str = "" + n_str = '' if self.n != 1: - n_str = "%s * " % self.n + n_str = '{n} * '.format(n=self.n) - out = '<%s' % n_str + className + plural + attrs_str = '' if attrs: - out += ': ' + ', '.join(attrs) - out += '>' + attrs_str = ': ' + ', '.join(attrs) + + repr_content = ''.join([n_str, className, plural, attrs_str]) + out = '<{content}>'.format(content=repr_content) return out @property @@ -501,7 +502,7 @@ def freqstr(self): return repr(self) if self.n != 1: - fstr = '%d%s' % (self.n, code) + fstr = '{n}{code}'.format(n=self.n, code=code) else: fstr = code @@ -509,7 +510,7 @@ def freqstr(self): @property def nanos(self): - raise ValueError("{0} is a non-fixed frequency".format(self)) + raise ValueError("{name} is a non-fixed frequency".format(name=self)) class SingleConstructorOffset(DateOffset): @@ -518,7 +519,7 @@ class SingleConstructorOffset(DateOffset): def _from_name(cls, suffix=None): # default _from_name calls cls with no args if suffix: - raise ValueError("Bad freq suffix %s" % suffix) + raise ValueError("Bad freq suffix {suffix}".format(suffix=suffix)) return cls() @@ -531,21 +532,21 @@ class BusinessMixin(object): def __repr__(self): className = getattr(self, '_outputName', self.__class__.__name__) + plural = '' if abs(self.n) != 1: plural = 's' - else: - plural = '' - n_str = "" + n_str = '' if self.n != 1: - n_str = "%s * " % self.n + n_str = '{n} * '.format(n=self.n) - out = '<%s' % n_str + className + plural + self._repr_attrs() + '>' + repr_content = ''.join([n_str, className, plural, self._repr_attrs()]) + out = '<{content}>'.format(content=repr_content) return out def _repr_attrs(self): if self.offset: - attrs = ['offset=%s' % repr(self.offset)] + attrs = ['offset={offset!r}'.format(offset=self.offset)] else: attrs = None out = '' @@ -601,7 +602,7 @@ def freqstr(self): return repr(self) if self.n != 1: - fstr = '%d%s' % (self.n, code) + fstr = '{n}{code}'.format(n=self.n, code=code) else: fstr = code @@ -1109,7 +1110,8 @@ def name(self): if self.isAnchored: return self.rule_code else: - return "%s-%s" % (self.rule_code, _int_to_month[self.n]) + return "{code}-{month}".format(code=self.rule_code, + month=_int_to_month[self.n]) class MonthEnd(MonthOffset): @@ -1176,9 +1178,9 @@ def __init__(self, n=1, day_of_month=None, normalize=False, **kwds): else: self.day_of_month = int(day_of_month) if not self._min_day_of_month <= self.day_of_month <= 27: - raise ValueError('day_of_month must be ' - '{}<=day_of_month<=27, got {}'.format( - self._min_day_of_month, self.day_of_month)) + msg = 'day_of_month must be {min}<=day_of_month<=27, got {day}' + raise ValueError(msg.format(min=self._min_day_of_month, + day=self.day_of_month)) self.n = int(n) self.normalize = normalize self.kwds = kwds @@ -1190,7 +1192,7 @@ def _from_name(cls, suffix=None): @property def rule_code(self): - suffix = '-{}'.format(self.day_of_month) + suffix = '-{day_of_month}'.format(day_of_month=self.day_of_month) return self._prefix + suffix @apply_wraps @@ -1576,8 +1578,8 @@ def __init__(self, n=1, normalize=False, **kwds): if self.weekday is not None: if self.weekday < 0 or self.weekday > 6: - raise ValueError('Day must be 0<=day<=6, got %d' % - self.weekday) + raise ValueError('Day must be 0<=day<=6, got {day}' + .format(day=self.weekday)) self._inc = timedelta(weeks=1) self.kwds = kwds @@ -1630,7 +1632,7 @@ def onOffset(self, dt): def rule_code(self): suffix = '' if self.weekday is not None: - suffix = '-%s' % (_int_to_weekday[self.weekday]) + suffix = '-{weekday}'.format(weekday=_int_to_weekday[self.weekday]) return self._prefix + suffix @classmethod @@ -1696,11 +1698,11 @@ def __init__(self, n=1, normalize=False, **kwds): raise ValueError('N cannot be 0') if self.weekday < 0 or self.weekday > 6: - raise ValueError('Day must be 0<=day<=6, got %d' % - self.weekday) + raise ValueError('Day must be 0<=day<=6, got {day}' + .format(day=self.weekday)) if self.week < 0 or self.week > 3: - raise ValueError('Week must be 0<=day<=3, got %d' % - self.week) + raise ValueError('Week must be 0<=week<=3, got {week}' + .format(week=self.week)) self.kwds = kwds @@ -1746,15 +1748,18 @@ def onOffset(self, dt): @property def rule_code(self): - return '%s-%d%s' % (self._prefix, self.week + 1, - _int_to_weekday.get(self.weekday, '')) + weekday = _int_to_weekday.get(self.weekday, '') + return '{prefix}-{week}{weekday}'.format(prefix=self._prefix, + week=self.week + 1, + weekday=weekday) _prefix = 'WOM' @classmethod def _from_name(cls, suffix=None): if not suffix: - raise ValueError("Prefix %r requires a suffix." % (cls._prefix)) + raise ValueError("Prefix {prefix!r} requires a suffix." + .format(prefix=cls._prefix)) # TODO: handle n here... # only one digit weeks (1 --> week 0, 2 --> week 1, etc.) week = int(suffix[0]) - 1 @@ -1789,8 +1794,8 @@ def __init__(self, n=1, normalize=False, **kwds): raise ValueError('N cannot be 0') if self.weekday < 0 or self.weekday > 6: - raise ValueError('Day must be 0<=day<=6, got %d' % - self.weekday) + raise ValueError('Day must be 0<=day<=6, got {day}' + .format(day=self.weekday)) self.kwds = kwds @@ -1829,14 +1834,17 @@ def onOffset(self, dt): @property def rule_code(self): - return '%s-%s' % (self._prefix, _int_to_weekday.get(self.weekday, '')) + weekday = _int_to_weekday.get(self.weekday, '') + return '{prefix}-{weekday}'.format(prefix=self._prefix, + weekday=weekday) _prefix = 'LWOM' @classmethod def _from_name(cls, suffix=None): if not suffix: - raise ValueError("Prefix %r requires a suffix." % (cls._prefix)) + raise ValueError("Prefix {prefix!r} requires a suffix." + .format(prefix=cls._prefix)) # TODO: handle n here... weekday = _weekday_to_int[suffix] return cls(weekday=weekday) @@ -1876,7 +1884,8 @@ def _from_name(cls, suffix=None): @property def rule_code(self): - return '%s-%s' % (self._prefix, _int_to_month[self.startingMonth]) + month = _int_to_month[self.startingMonth] + return '{prefix}-{month}'.format(prefix=self._prefix, month=month) class BQuarterEnd(QuarterOffset): @@ -2045,8 +2054,7 @@ def apply(self, other): @apply_index_wraps def apply_index(self, i): freq_month = 12 if self.startingMonth == 1 else self.startingMonth - 1 - # freq_month = self.startingMonth - freqstr = 'Q-%s' % (_int_to_month[freq_month],) + freqstr = 'Q-{month}'.format(month=_int_to_month[freq_month]) return self._beg_apply_index(i, freqstr) @@ -2071,7 +2079,8 @@ def _from_name(cls, suffix=None): @property def rule_code(self): - return '%s-%s' % (self._prefix, _int_to_month[self.month]) + month = _int_to_month[self.month] + return '{prefix}-{month}'.format(prefix=self._prefix, month=month) class BYearEnd(YearOffset): @@ -2246,7 +2255,7 @@ def _rollf(date): @apply_index_wraps def apply_index(self, i): freq_month = 12 if self.month == 1 else self.month - 1 - freqstr = 'A-%s' % (_int_to_month[freq_month],) + freqstr = 'A-{month}'.format(month=_int_to_month[freq_month]) return self._beg_apply_index(i, freqstr) def onOffset(self, dt): @@ -2312,7 +2321,8 @@ def __init__(self, n=1, normalize=False, **kwds): raise ValueError('N cannot be 0') if self.variation not in ["nearest", "last"]: - raise ValueError('%s is not a valid variation' % self.variation) + raise ValueError('{variation} is not a valid variation' + .format(variation=self.variation)) if self.variation == "nearest": weekday_offset = weekday(self.weekday) @@ -2438,8 +2448,9 @@ def _get_year_end_last(self, dt): @property def rule_code(self): + prefix = self._get_prefix() suffix = self.get_rule_code_suffix() - return "%s-%s" % (self._get_prefix(), suffix) + return "{prefix}-{suffix}".format(prefix=prefix, suffix=suffix) def _get_prefix(self): return self._prefix @@ -2451,9 +2462,11 @@ def _get_suffix_prefix(self): return self._suffix_prefix_last def get_rule_code_suffix(self): - return '%s-%s-%s' % (self._get_suffix_prefix(), - _int_to_month[self.startingMonth], - _int_to_weekday[self.weekday]) + prefix = self._get_suffix_prefix() + month = _int_to_month[self.startingMonth] + weekday = _int_to_weekday[self.weekday] + return '{prefix}-{month}-{weekday}'.format(prefix=prefix, month=month, + weekday=weekday) @classmethod def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): @@ -2463,7 +2476,7 @@ def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): variation = "last" else: raise ValueError( - "Unable to parse varion_code: %s" % (varion_code,)) + "Unable to parse varion_code: {code}".format(code=varion_code)) startingMonth = _month_to_int[startingMonth_code] weekday = _weekday_to_int[weekday_code] @@ -2628,8 +2641,9 @@ def onOffset(self, dt): @property def rule_code(self): suffix = self._offset.get_rule_code_suffix() - return "%s-%s" % (self._prefix, - "%s-%d" % (suffix, self.qtr_with_extra_week)) + qtr = self.qtr_with_extra_week + return "{prefix}-{suffix}-{qtr}".format(prefix=self._prefix, + suffix=suffix, qtr=qtr) @classmethod def _from_name(cls, *args): @@ -2712,8 +2726,8 @@ def __add__(self, other): except ApplyTypeError: return NotImplemented except OverflowError: - raise OverflowError("the add operation between {} and {} " - "will overflow".format(self, other)) + raise OverflowError("the add operation between {self} and {other} " + "will overflow".format(self=self, other=other)) def __eq__(self, other): if isinstance(other, compat.string_types): @@ -2771,7 +2785,8 @@ def apply(self, other): elif isinstance(other, type(self)): return type(self)(self.n + other.n) - raise ApplyTypeError('Unhandled type: %s' % type(other).__name__) + raise ApplyTypeError('Unhandled type: {type_str}' + .format(type_str=type(other).__name__)) _prefix = 'undefined' @@ -2921,7 +2936,8 @@ def generate_range(start=None, end=None, periods=None, # faster than cur + offset next_date = offset.apply(cur) if next_date <= cur: - raise ValueError('Offset %s did not increment date' % offset) + raise ValueError('Offset {offset} did not increment date' + .format(offset=offset)) cur = next_date else: while cur >= end: @@ -2930,7 +2946,8 @@ def generate_range(start=None, end=None, periods=None, # faster than cur + offset next_date = offset.apply(cur) if next_date >= cur: - raise ValueError('Offset %s did not decrement date' % offset) + raise ValueError('Offset {offset} did not decrement date' + .format(offset=offset)) cur = next_date From ab32c0a3e2033456ede23dbfeffc6adc8c4ea190 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 19 Aug 2017 17:55:34 -0400 Subject: [PATCH 3/9] TST: parameterize consistency tests for rolling/expanding windows (#17292) --- pandas/tests/test_window.py | 403 ++++++++++++++++++------------------ 1 file changed, 203 insertions(+), 200 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 21a9b05d48126..1cc0ad8bb4041 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -2009,6 +2009,15 @@ def no_nans(x): _consistency_data = _create_consistency_data() +def _rolling_consistency_cases(): + for window in [1, 2, 3, 10, 20]: + for min_periods in set([0, 1, 2, 3, 4, window]): + if min_periods and (min_periods > window): + continue + for center in [False, True]: + yield window, min_periods, center + + class TestMomentsConsistency(Base): base_functions = [ (lambda v: Series(v).count(), None, 'count'), @@ -2177,7 +2186,11 @@ def _non_null_values(x): (mean_x * mean_y)) @pytest.mark.slow - def test_ewm_consistency(self): + @pytest.mark.parametrize( + 'min_periods, adjust, ignore_na', product([0, 1, 2, 3, 4], + [True, False], + [False, True])) + def test_ewm_consistency(self, min_periods, adjust, ignore_na): def _weights(s, com, adjust, ignore_na): if isinstance(s, DataFrame): if not len(s.columns): @@ -2231,52 +2244,51 @@ def _ewma(s, com, min_periods, adjust, ignore_na): return result com = 3. - for min_periods, adjust, ignore_na in product([0, 1, 2, 3, 4], - [True, False], - [False, True]): - # test consistency between different ewm* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: x.expanding().count(), - mean=lambda x: x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).mean(), - mock_mean=lambda x: _ewma(x, com=com, - min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na), - corr=lambda x, y: x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).corr(y), - var_unbiased=lambda x: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).var(bias=False)), - std_unbiased=lambda x: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .std(bias=False)), - cov_unbiased=lambda x, y: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .cov(y, bias=False)), - var_biased=lambda x: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .var(bias=True)), - std_biased=lambda x: x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).std(bias=True), - cov_biased=lambda x, y: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .cov(y, bias=True)), - var_debiasing_factors=lambda x: ( - _variance_debiasing_factors(x, com=com, adjust=adjust, - ignore_na=ignore_na))) + # test consistency between different ewm* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: x.expanding().count(), + mean=lambda x: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).mean(), + mock_mean=lambda x: _ewma(x, com=com, + min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na), + corr=lambda x, y: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).corr(y), + var_unbiased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).var(bias=False)), + std_unbiased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .std(bias=False)), + cov_unbiased=lambda x, y: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .cov(y, bias=False)), + var_biased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .var(bias=True)), + std_biased=lambda x: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).std(bias=True), + cov_biased=lambda x, y: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .cov(y, bias=True)), + var_debiasing_factors=lambda x: ( + _variance_debiasing_factors(x, com=com, adjust=adjust, + ignore_na=ignore_na))) @pytest.mark.slow - def test_expanding_consistency(self): + @pytest.mark.parametrize( + 'min_periods', [0, 1, 2, 3, 4]) + def test_expanding_consistency(self, min_periods): # suppress warnings about empty slices, as we are deliberately testing # with empty/0-length Series/DataFrames @@ -2285,72 +2297,72 @@ def test_expanding_consistency(self): message=".*(empty slice|0 for slice).*", category=RuntimeWarning) - for min_periods in [0, 1, 2, 3, 4]: - - # test consistency between different expanding_* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: x.expanding().count(), - mean=lambda x: x.expanding( - min_periods=min_periods).mean(), - mock_mean=lambda x: x.expanding( - min_periods=min_periods).sum() / x.expanding().count(), - corr=lambda x, y: x.expanding( - min_periods=min_periods).corr(y), - var_unbiased=lambda x: x.expanding( - min_periods=min_periods).var(), - std_unbiased=lambda x: x.expanding( - min_periods=min_periods).std(), - cov_unbiased=lambda x, y: x.expanding( - min_periods=min_periods).cov(y), - var_biased=lambda x: x.expanding( - min_periods=min_periods).var(ddof=0), - std_biased=lambda x: x.expanding( - min_periods=min_periods).std(ddof=0), - cov_biased=lambda x, y: x.expanding( - min_periods=min_periods).cov(y, ddof=0), - var_debiasing_factors=lambda x: ( - x.expanding().count() / - (x.expanding().count() - 1.) - .replace(0., np.nan))) - - # test consistency between expanding_xyz() and either (a) - # expanding_apply of Series.xyz(), or (b) expanding_apply of - # np.nanxyz() - for (x, is_constant, no_nans) in self.data: - functions = self.base_functions - - # GH 8269 - if no_nans: - functions = self.base_functions + self.no_nan_functions - for (f, require_min_periods, name) in functions: - expanding_f = getattr( - x.expanding(min_periods=min_periods), name) - - if (require_min_periods and - (min_periods is not None) and - (min_periods < require_min_periods)): - continue - - if name == 'count': - expanding_f_result = expanding_f() - expanding_apply_f_result = x.expanding( - min_periods=0).apply(func=f) + # test consistency between different expanding_* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: x.expanding().count(), + mean=lambda x: x.expanding( + min_periods=min_periods).mean(), + mock_mean=lambda x: x.expanding( + min_periods=min_periods).sum() / x.expanding().count(), + corr=lambda x, y: x.expanding( + min_periods=min_periods).corr(y), + var_unbiased=lambda x: x.expanding( + min_periods=min_periods).var(), + std_unbiased=lambda x: x.expanding( + min_periods=min_periods).std(), + cov_unbiased=lambda x, y: x.expanding( + min_periods=min_periods).cov(y), + var_biased=lambda x: x.expanding( + min_periods=min_periods).var(ddof=0), + std_biased=lambda x: x.expanding( + min_periods=min_periods).std(ddof=0), + cov_biased=lambda x, y: x.expanding( + min_periods=min_periods).cov(y, ddof=0), + var_debiasing_factors=lambda x: ( + x.expanding().count() / + (x.expanding().count() - 1.) + .replace(0., np.nan))) + + # test consistency between expanding_xyz() and either (a) + # expanding_apply of Series.xyz(), or (b) expanding_apply of + # np.nanxyz() + for (x, is_constant, no_nans) in self.data: + functions = self.base_functions + + # GH 8269 + if no_nans: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + expanding_f = getattr( + x.expanding(min_periods=min_periods), name) + + if (require_min_periods and + (min_periods is not None) and + (min_periods < require_min_periods)): + continue + + if name == 'count': + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding( + min_periods=0).apply(func=f) + else: + if name in ['cov', 'corr']: + expanding_f_result = expanding_f( + pairwise=False) else: - if name in ['cov', 'corr']: - expanding_f_result = expanding_f( - pairwise=False) - else: - expanding_f_result = expanding_f() - expanding_apply_f_result = x.expanding( - min_periods=min_periods).apply(func=f) - - if not tm._incompat_bottleneck_version(name): - assert_equal(expanding_f_result, - expanding_apply_f_result) + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding( + min_periods=min_periods).apply(func=f) + + if not tm._incompat_bottleneck_version(name): + assert_equal(expanding_f_result, + expanding_apply_f_result) @pytest.mark.slow - def test_rolling_consistency(self): + @pytest.mark.parametrize( + 'window,min_periods,center', list(_rolling_consistency_cases())) + def test_rolling_consistency(self, window, min_periods, center): # suppress warnings about empty slices, as we are deliberately testing # with empty/0-length Series/DataFrames @@ -2359,100 +2371,91 @@ def test_rolling_consistency(self): message=".*(empty slice|0 for slice).*", category=RuntimeWarning) - def cases(): - for window in [1, 2, 3, 10, 20]: - for min_periods in set([0, 1, 2, 3, 4, window]): - if min_periods and (min_periods > window): - continue - for center in [False, True]: - yield window, min_periods, center - - for window, min_periods, center in cases(): - # test consistency between different rolling_* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: ( - x.rolling(window=window, center=center) - .count()), - mean=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).mean()), - mock_mean=lambda x: ( - x.rolling(window=window, - min_periods=min_periods, - center=center).sum() - .divide(x.rolling(window=window, - min_periods=min_periods, - center=center).count())), - corr=lambda x, y: ( - x.rolling(window=window, min_periods=min_periods, - center=center).corr(y)), - - var_unbiased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).var()), - - std_unbiased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).std()), - - cov_unbiased=lambda x, y: ( - x.rolling(window=window, min_periods=min_periods, - center=center).cov(y)), - - var_biased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).var(ddof=0)), - - std_biased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).std(ddof=0)), - - cov_biased=lambda x, y: ( - x.rolling(window=window, min_periods=min_periods, - center=center).cov(y, ddof=0)), - var_debiasing_factors=lambda x: ( - x.rolling(window=window, center=center).count() - .divide((x.rolling(window=window, center=center) - .count() - 1.) - .replace(0., np.nan)))) - - # test consistency between rolling_xyz() and either (a) - # rolling_apply of Series.xyz(), or (b) rolling_apply of - # np.nanxyz() - for (x, is_constant, no_nans) in self.data: - functions = self.base_functions - - # GH 8269 - if no_nans: - functions = self.base_functions + self.no_nan_functions - for (f, require_min_periods, name) in functions: - rolling_f = getattr( - x.rolling(window=window, center=center, - min_periods=min_periods), name) - - if require_min_periods and ( - min_periods is not None) and ( - min_periods < require_min_periods): - continue + # test consistency between different rolling_* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: ( + x.rolling(window=window, center=center) + .count()), + mean=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).mean()), + mock_mean=lambda x: ( + x.rolling(window=window, + min_periods=min_periods, + center=center).sum() + .divide(x.rolling(window=window, + min_periods=min_periods, + center=center).count())), + corr=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).corr(y)), - if name == 'count': - rolling_f_result = rolling_f() - rolling_apply_f_result = x.rolling( - window=window, min_periods=0, - center=center).apply(func=f) + var_unbiased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).var()), + + std_unbiased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).std()), + + cov_unbiased=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).cov(y)), + + var_biased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).var(ddof=0)), + + std_biased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).std(ddof=0)), + + cov_biased=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).cov(y, ddof=0)), + var_debiasing_factors=lambda x: ( + x.rolling(window=window, center=center).count() + .divide((x.rolling(window=window, center=center) + .count() - 1.) + .replace(0., np.nan)))) + + # test consistency between rolling_xyz() and either (a) + # rolling_apply of Series.xyz(), or (b) rolling_apply of + # np.nanxyz() + for (x, is_constant, no_nans) in self.data: + functions = self.base_functions + + # GH 8269 + if no_nans: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + rolling_f = getattr( + x.rolling(window=window, center=center, + min_periods=min_periods), name) + + if require_min_periods and ( + min_periods is not None) and ( + min_periods < require_min_periods): + continue + + if name == 'count': + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling( + window=window, min_periods=0, + center=center).apply(func=f) + else: + if name in ['cov', 'corr']: + rolling_f_result = rolling_f( + pairwise=False) else: - if name in ['cov', 'corr']: - rolling_f_result = rolling_f( - pairwise=False) - else: - rolling_f_result = rolling_f() - rolling_apply_f_result = x.rolling( - window=window, min_periods=min_periods, - center=center).apply(func=f) - if not tm._incompat_bottleneck_version(name): - assert_equal(rolling_f_result, - rolling_apply_f_result) + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling( + window=window, min_periods=min_periods, + center=center).apply(func=f) + if not tm._incompat_bottleneck_version(name): + assert_equal(rolling_f_result, + rolling_apply_f_result) # binary moments def test_rolling_cov(self): From 3b02e73b856a6f8d53382bf3908f04447bf90e03 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Sat, 19 Aug 2017 17:59:19 -0400 Subject: [PATCH 4/9] FIX: define `DataFrame.items` for all versions of python (#17214) --- doc/source/whatsnew/v0.21.0.txt | 4 ++++ pandas/core/frame.py | 3 +-- pandas/core/series.py | 3 +-- pandas/tests/frame/test_api.py | 11 ++++++++++- pandas/tests/series/test_api.py | 10 ++++++++++ 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 6008ea5d4cbcd..c5fe89282bf52 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -128,6 +128,10 @@ Other Enhancements - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`). +- :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`) + + + .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 467ef52de234e..b5b3df64d24c0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -802,8 +802,7 @@ def itertuples(self, index=True, name="Pandas"): # fallback to regular tuples return zip(*arrays) - if compat.PY3: # pragma: no cover - items = iteritems + items = iteritems def __len__(self): """Returns length of info axis, but here we use the index """ diff --git a/pandas/core/series.py b/pandas/core/series.py index c8282450b77a9..75dc3d6403650 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1110,8 +1110,7 @@ def iteritems(self): """ return zip(iter(self.index), iter(self)) - if compat.PY3: # pragma: no cover - items = iteritems + items = iteritems # ---------------------------------------------------------------------- # Misc public methods diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 53a1b9525a0dd..a62fcb506a34b 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -171,7 +171,16 @@ def test_nonzero(self): def test_iteritems(self): df = self.klass([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) for k, v in compat.iteritems(df): - assert type(v) == self.klass._constructor_sliced + assert isinstance(v, self.klass._constructor_sliced) + + def test_items(self): + # issue #17213, #13918 + cols = ['a', 'b', 'c'] + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols) + for c, (k, v) in zip(cols, df.items()): + assert c == k + assert isinstance(v, Series) + assert (df[k] == v).all() def test_iter(self): assert tm.equalContents(list(self.frame), self.frame.columns) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 8e22dd38030ee..b7fbe803f8d3b 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -301,6 +301,16 @@ def test_iteritems(self): # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.iteritems(), 'reverse') + def test_items(self): + for idx, val in self.series.items(): + assert val == self.series[idx] + + for idx, val in self.ts.items(): + assert val == self.ts[idx] + + # assert is lazy (genrators don't define reverse, lists do) + assert not hasattr(self.series.items(), 'reverse') + def test_raise_on_info(self): s = Series(np.random.randn(10)) with pytest.raises(AttributeError): From 58d872903449b8a29237288ade6227cdb280fe18 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 20 Aug 2017 16:25:43 -0500 Subject: [PATCH 5/9] PERF: Update ASV publish config (#17293) Stricter cutoffs for considering regressions [ci skip] --- asv_bench/asv.conf.json | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 59c05400d06b0..ced4f2b12445f 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -117,8 +117,10 @@ // with results. If the commit is `null`, regression detection is // skipped for the matching benchmark. // - // "regressions_first_commits": { - // "some_benchmark": "352cdf", // Consider regressions only after this commit - // "another_benchmark": null, // Skip regression detection altogether - // } + "regressions_first_commits": { + "*": "v0.20.0" + }, + "regression_thresholds": { + "*": 0.05 + } } From e14431f897c7c0afd76d627ba933c07c277f8deb Mon Sep 17 00:00:00 2001 From: Yosuke Nakabayashi Date: Mon, 21 Aug 2017 09:50:44 +0200 Subject: [PATCH 6/9] DOC: Expand docstrings for head / tail methods (#16941) --- pandas/core/generic.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5a7f37bba91aa..d9d75c870b20c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2978,14 +2978,36 @@ def filter(self, items=None, like=None, regex=None, axis=None): def head(self, n=5): """ - Returns first n rows + Return the first n rows. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + obj_head : type of caller + The first n rows of the caller object. """ + return self.iloc[:n] def tail(self, n=5): """ - Returns last n rows + Return the last n rows. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + obj_tail : type of caller + The last n rows of the caller object. """ + if n == 0: return self.iloc[0:0] return self.iloc[-n:] From 8354a1dfa9073eab1b120d39be31103fc29394bb Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 21 Aug 2017 00:56:39 -0700 Subject: [PATCH 7/9] MAINT: Use set literal for unsupported + depr args Initializes unsupported and deprecated argument sets with set literals instead of the set constructor in pandas/io/parsers.py, as the former is slightly faster than the latter. --- pandas/io/parsers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 05a04f268f72b..a9821be3fa5e2 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -487,18 +487,18 @@ def _read(filepath_or_buffer, kwds): 'widths': None, } -_c_unsupported = set(['skipfooter']) -_python_unsupported = set([ +_c_unsupported = {'skipfooter'} +_python_unsupported = { 'low_memory', 'buffer_lines', 'float_precision', -]) -_deprecated_args = set([ +} +_deprecated_args = { 'as_recarray', 'buffer_lines', 'compact_ints', 'use_unsigned', -]) +} def _make_parser_function(name, sep=','): From 91245a758ee32658c66bdecd9556f7054cd99901 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 21 Aug 2017 01:14:50 -0700 Subject: [PATCH 8/9] DOC: Add proper docstring to maybe_convert_indices Patches several spelling errors and expands current doc to a proper doc-string. --- pandas/core/indexing.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 109183827de4e..929c2346ba5b0 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1985,9 +1985,31 @@ def get_indexer(_i, _idx): def maybe_convert_indices(indices, n): - """ if we have negative indicies, translate to postive here - if have indicies that are out-of-bounds, raise an IndexError """ + Attempt to convert indices into valid, positive indices. + + If we have negative indices, translate to positive here. + If we have indices that are out-of-bounds, raise an IndexError. + + Parameters + ---------- + indices : array-like + The array of indices that we are to convert. + n : int + The number of elements in the array that we are indexing. + + Returns + ------- + valid_indices : array-like + An array-like of positive indices that correspond to the ones + that were passed in initially to this function. + + Raises + ------ + IndexError : one of the converted indices either exceeded the number + of elements (specified by `n`) OR was still negative. + """ + if isinstance(indices, list): indices = np.array(indices) if len(indices) == 0: From d0d28fec180ee61de17921fe5068ecde95adae8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?agust=C3=ADn=20m=C3=A9ndez?= Date: Mon, 21 Aug 2017 10:27:24 +0200 Subject: [PATCH 9/9] DOC: Improving docstring of take method (#16948) --- pandas/core/generic.py | 67 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d9d75c870b20c..c83b1073afc8e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2063,18 +2063,77 @@ def __delitem__(self, key): def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs): """ - Analogous to ndarray.take + Return the elements in the given *positional* indices along an axis. + + This means that we are not indexing according to actual values in + the index attribute of the object. We are indexing according to the + actual position of the element in the object. Parameters ---------- - indices : list / array of ints + indices : array-like + An array of ints indicating which positions to take. axis : int, default 0 - convert : translate neg to pos indices (default) - is_copy : mark the returned frame as a copy + The axis on which to select elements. "0" means that we are + selecting rows, "1" means that we are selecting columns, etc. + convert : bool, default True + Whether to convert negative indices to positive ones, just as with + indexing into Python lists. For example, if `-1` was passed in, + this index would be converted ``n - 1``. + is_copy : bool, default True + Whether to return a copy of the original object or not. + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0), + ('parrot', 'bird', 24.0), + ('lion', 'mammal', 80.5), + ('monkey', 'mammal', np.nan)], + columns=('name', 'class', 'max_speed'), + index=[0, 2, 3, 1]) + >>> df + name class max_speed + 0 falcon bird 389.0 + 2 parrot bird 24.0 + 3 lion mammal 80.5 + 1 monkey mammal NaN + + Take elements at positions 0 and 3 along the axis 0 (default). + + Note how the actual indices selected (0 and 1) do not correspond to + our selected indices 0 and 3. That's because we are selecting the 0th + and 3rd rows, not rows whose indices equal 0 and 3. + + >>> df.take([0, 3]) + 0 falcon bird 389.0 + 1 monkey mammal NaN + + Take elements at indices 1 and 2 along the axis 1 (column selection). + + >>> df.take([1, 2], axis=1) + class max_speed + 0 bird 389.0 + 2 bird 24.0 + 3 mammal 80.5 + 1 mammal NaN + + We may take elements using negative integers for positive indices, + starting from the end of the object, just like with Python lists. + + >>> df.take([-1, -2]) + name class max_speed + 1 monkey mammal NaN + 3 lion mammal 80.5 Returns ------- taken : type of caller + An array-like containing the elements taken from the object. + + See Also + -------- + numpy.ndarray.take + numpy.take """ nv.validate_take(tuple(), kwargs) self._consolidate_inplace()