From 3be2de63e4c6cfbd04671f86d07869dfc984e9ed Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 10 Jul 2017 03:12:50 -0700
Subject: [PATCH 01/54] MAINT: Drop the get_offset_name method (#16863)

Deprecated since 0.18.0

xref gh-11834
---
 doc/source/whatsnew/v0.21.0.txt |  1 +
 pandas/tseries/frequencies.py   | 14 --------------
 2 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index d5cc3d6ddca8e..43bfebd0c2e59 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -126,6 +126,7 @@ Removal of prior version deprecations/changes
 - ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`)
 - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`)
 - :func:`eval` and :method:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`)
+- The function ``get_offset_name`` has been dropped in favor of the ``.freqstr`` attribute for an offset (:issue:`11834`)
 
 
 .. _whatsnew_0210.performance:
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index 8640f106a048a..c5f6c00a4005a 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -637,20 +637,6 @@ def get_offset(name):
 getOffset = get_offset
 
 
-def get_offset_name(offset):
-    """
-    Return rule name associated with a DateOffset object
-
-    Examples
-    --------
-    get_offset_name(BMonthEnd(1)) --> 'EOM'
-    """
-
-    msg = "get_offset_name(offset) is deprecated. Use offset.freqstr instead"
-    warnings.warn(msg, FutureWarning, stacklevel=2)
-    return offset.freqstr
-
-
 def get_standard_freq(freq):
     """
     Return the standardized frequency string

From a5477b760d939a1f62ab5d38c75bf9d802a2bcf3 Mon Sep 17 00:00:00 2001
From: Adrian Liaw <adrianliaw2000@gmail.com>
Date: Mon, 10 Jul 2017 18:13:49 +0800
Subject: [PATCH 02/54] DOC: Fix missing parentheses in documentation (#16862)

---
 doc/source/groupby.rst          | 2 +-
 doc/source/io.rst               | 4 ++--
 doc/source/whatsnew/v0.13.0.txt | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index 61f43146aba85..937d682d238b3 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -933,7 +933,7 @@ The dimension of the returned result can also change:
 
         d = pd.DataFrame({"a":["x", "y"], "b":[1,2]})
         def identity(df):
-            print df
+            print(df)
             return df
 
         d.groupby("a").apply(identity)
diff --git a/doc/source/io.rst b/doc/source/io.rst
index e1e82f686f182..9bf84e5419ffa 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -3194,7 +3194,7 @@ You can pass ``iterator=True`` to iterate over the unpacked results
 .. ipython:: python
 
    for o in pd.read_msgpack('foo.msg',iterator=True):
-       print o
+       print(o)
 
 You can pass ``append=True`` to the writer to append to an existing pack
 
@@ -3912,7 +3912,7 @@ chunks.
    evens = [2,4,6,8,10]
    coordinates = store.select_as_coordinates('dfeq','number=evens')
    for c in chunks(coordinates, 2):
-        print store.select('dfeq',where=c)
+        print(store.select('dfeq',where=c))
 
 Advanced Queries
 ++++++++++++++++
diff --git a/doc/source/whatsnew/v0.13.0.txt b/doc/source/whatsnew/v0.13.0.txt
index 3347b05a5df37..f440be1ddd56e 100644
--- a/doc/source/whatsnew/v0.13.0.txt
+++ b/doc/source/whatsnew/v0.13.0.txt
@@ -790,7 +790,7 @@ Experimental
   .. ipython:: python
 
      for o in pd.read_msgpack('foo.msg',iterator=True):
-        print o
+        print(o)
 
   .. ipython:: python
      :suppress:

From a43c1576ce3d94bc82f7cdd63531280ced5a9fa0 Mon Sep 17 00:00:00 2001
From: Guillem Borrell <guillemborrell@gmail.com>
Date: Mon, 10 Jul 2017 12:15:08 +0200
Subject: [PATCH 03/54] BUG: rolling.quantile does not return an interpolated
 result (#16247)

---
 asv_bench/benchmarks/rolling.py | 185 ++++++++++++++++++++++++++++++++
 doc/source/whatsnew/v0.21.0.txt |   5 +-
 pandas/_libs/window.pyx         |  15 ++-
 pandas/core/window.py           |  11 +-
 pandas/tests/test_window.py     |  41 ++++++-
 5 files changed, 249 insertions(+), 8 deletions(-)
 create mode 100644 asv_bench/benchmarks/rolling.py

diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
new file mode 100644
index 0000000000000..9da9d0b855323
--- /dev/null
+++ b/asv_bench/benchmarks/rolling.py
@@ -0,0 +1,185 @@
+from .pandas_vb_common import *
+import pandas as pd
+import numpy as np
+
+
+class DataframeRolling(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.N = 100000
+        self.Ns = 10000
+        self.df = pd.DataFrame({'a': np.random.random(self.N)})
+        self.dfs = pd.DataFrame({'a': np.random.random(self.Ns)})
+        self.wins = 10
+        self.winl = 1000
+
+    def time_rolling_quantile_0(self):
+        (self.df.rolling(self.wins).quantile(0.0))
+
+    def time_rolling_quantile_1(self):
+        (self.df.rolling(self.wins).quantile(1.0))
+
+    def time_rolling_quantile_median(self):
+        (self.df.rolling(self.wins).quantile(0.5))
+
+    def time_rolling_median(self):
+        (self.df.rolling(self.wins).median())
+
+    def time_rolling_median(self):
+        (self.df.rolling(self.wins).mean())
+
+    def time_rolling_max(self):
+        (self.df.rolling(self.wins).max())
+
+    def time_rolling_min(self):
+        (self.df.rolling(self.wins).min())
+
+    def time_rolling_std(self):
+        (self.df.rolling(self.wins).std())
+
+    def time_rolling_count(self):
+        (self.df.rolling(self.wins).count())
+
+    def time_rolling_skew(self):
+        (self.df.rolling(self.wins).skew())
+
+    def time_rolling_kurt(self):
+        (self.df.rolling(self.wins).kurt())
+
+    def time_rolling_sum(self):
+        (self.df.rolling(self.wins).sum())
+
+    def time_rolling_corr(self):
+        (self.dfs.rolling(self.wins).corr())
+
+    def time_rolling_cov(self):
+        (self.dfs.rolling(self.wins).cov())
+        
+    def time_rolling_quantile_0_l(self):
+        (self.df.rolling(self.winl).quantile(0.0))
+
+    def time_rolling_quantile_1_l(self):
+        (self.df.rolling(self.winl).quantile(1.0))
+
+    def time_rolling_quantile_median_l(self):
+        (self.df.rolling(self.winl).quantile(0.5))
+
+    def time_rolling_median_l(self):
+        (self.df.rolling(self.winl).median())
+
+    def time_rolling_median_l(self):
+        (self.df.rolling(self.winl).mean())
+
+    def time_rolling_max_l(self):
+        (self.df.rolling(self.winl).max())
+
+    def time_rolling_min_l(self):
+        (self.df.rolling(self.winl).min())
+
+    def time_rolling_std_l(self):
+        (self.df.rolling(self.wins).std())
+
+    def time_rolling_count_l(self):
+        (self.df.rolling(self.wins).count())
+
+    def time_rolling_skew_l(self):
+        (self.df.rolling(self.wins).skew())
+
+    def time_rolling_kurt_l(self):
+        (self.df.rolling(self.wins).kurt())
+
+    def time_rolling_sum_l(self):
+        (self.df.rolling(self.wins).sum())
+
+
+class SeriesRolling(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.N = 100000
+        self.Ns = 10000
+        self.df = pd.DataFrame({'a': np.random.random(self.N)})
+        self.dfs = pd.DataFrame({'a': np.random.random(self.Ns)})
+        self.sr = self.df.a
+        self.srs = self.dfs.a
+        self.wins = 10
+        self.winl = 1000
+
+    def time_rolling_quantile_0(self):
+        (self.sr.rolling(self.wins).quantile(0.0))
+
+    def time_rolling_quantile_1(self):
+        (self.sr.rolling(self.wins).quantile(1.0))
+
+    def time_rolling_quantile_median(self):
+        (self.sr.rolling(self.wins).quantile(0.5))
+
+    def time_rolling_median(self):
+        (self.sr.rolling(self.wins).median())
+
+    def time_rolling_median(self):
+        (self.sr.rolling(self.wins).mean())
+
+    def time_rolling_max(self):
+        (self.sr.rolling(self.wins).max())
+
+    def time_rolling_min(self):
+        (self.sr.rolling(self.wins).min())
+
+    def time_rolling_std(self):
+        (self.sr.rolling(self.wins).std())
+
+    def time_rolling_count(self):
+        (self.sr.rolling(self.wins).count())
+
+    def time_rolling_skew(self):
+        (self.sr.rolling(self.wins).skew())
+
+    def time_rolling_kurt(self):
+        (self.sr.rolling(self.wins).kurt())
+
+    def time_rolling_sum(self):
+        (self.sr.rolling(self.wins).sum())
+
+    def time_rolling_corr(self):
+        (self.srs.rolling(self.wins).corr())
+
+    def time_rolling_cov(self):
+        (self.srs.rolling(self.wins).cov())
+        
+    def time_rolling_quantile_0_l(self):
+        (self.sr.rolling(self.winl).quantile(0.0))
+
+    def time_rolling_quantile_1_l(self):
+        (self.sr.rolling(self.winl).quantile(1.0))
+
+    def time_rolling_quantile_median_l(self):
+        (self.sr.rolling(self.winl).quantile(0.5))
+
+    def time_rolling_median_l(self):
+        (self.sr.rolling(self.winl).median())
+
+    def time_rolling_median_l(self):
+        (self.sr.rolling(self.winl).mean())
+
+    def time_rolling_max_l(self):
+        (self.sr.rolling(self.winl).max())
+
+    def time_rolling_min_l(self):
+        (self.sr.rolling(self.winl).min())
+
+    def time_rolling_std_l(self):
+        (self.sr.rolling(self.wins).std())
+
+    def time_rolling_count_l(self):
+        (self.sr.rolling(self.wins).count())
+
+    def time_rolling_skew_l(self):
+        (self.sr.rolling(self.wins).skew())
+
+    def time_rolling_kurt_l(self):
+        (self.sr.rolling(self.wins).kurt())
+
+    def time_rolling_sum_l(self):
+        (self.sr.rolling(self.wins).sum())
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 43bfebd0c2e59..1edbf1638d233 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -168,9 +168,11 @@ Plotting
 
 Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
-- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
 
+- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
 - Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`)
+- Bug in ``.rolling.quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`)
+
 
 Sparse
 ^^^^^^
@@ -191,6 +193,7 @@ Categorical
 ^^^^^^^^^^^
 
 
+
 Other
 ^^^^^
 - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx
index 3bb8abe26c781..2450eea5500cd 100644
--- a/pandas/_libs/window.pyx
+++ b/pandas/_libs/window.pyx
@@ -1348,8 +1348,9 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
         bint is_variable
         ndarray[int64_t] start, end
         ndarray[double_t] output
+        double vlow, vhigh
 
-    if quantile < 0.0 or quantile > 1.0:
+    if quantile <= 0.0 or quantile >= 1.0:
         raise ValueError("quantile value {0} not in [0, 1]".format(quantile))
 
     # we use the Fixed/Variable Indexer here as the
@@ -1391,7 +1392,17 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
 
         if nobs >= minp:
             idx = int(quantile * <double>(nobs - 1))
-            output[i] = skiplist.get(idx)
+
+            # Single value in skip list
+            if nobs == 1:
+                output[i] = skiplist.get(0)
+
+            # Interpolated quantile
+            else:
+                vlow = skiplist.get(idx)
+                vhigh = skiplist.get(idx + 1)
+                output[i] = (vlow + (vhigh - vlow) *
+                                 (quantile * (nobs - 1) - idx))
         else:
             output[i] = NaN
 
diff --git a/pandas/core/window.py b/pandas/core/window.py
index 02b508bb94e4c..57611794c375f 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -975,8 +975,15 @@ def quantile(self, quantile, **kwargs):
 
         def f(arg, *args, **kwargs):
             minp = _use_window(self.min_periods, window)
-            return _window.roll_quantile(arg, window, minp, indexi,
-                                         self.closed, quantile)
+            if quantile == 1.0:
+                return _window.roll_max(arg, window, minp, indexi,
+                                        self.closed)
+            elif quantile == 0.0:
+                return _window.roll_min(arg, window, minp, indexi,
+                                        self.closed)
+            else:
+                return _window.roll_quantile(arg, window, minp, indexi,
+                                             self.closed, quantile)
 
         return self._apply(f, 'quantile', quantile=quantile,
                            **kwargs)
diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
index 9c3765ffdb716..3ba5d2065cddf 100644
--- a/pandas/tests/test_window.py
+++ b/pandas/tests/test_window.py
@@ -1122,8 +1122,19 @@ def test_rolling_quantile(self):
         def scoreatpercentile(a, per):
             values = np.sort(a, axis=0)
 
-            idx = per / 1. * (values.shape[0] - 1)
-            return values[int(idx)]
+            idx = int(per / 1. * (values.shape[0] - 1))
+
+            if idx == values.shape[0] - 1:
+                retval = values[-1]
+
+            else:
+                qlow = float(idx) / float(values.shape[0] - 1)
+                qhig = float(idx + 1) / float(values.shape[0] - 1)
+                vlow = values[idx]
+                vhig = values[idx + 1]
+                retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow)
+
+            return retval
 
         for q in qs:
 
@@ -1138,6 +1149,30 @@ def alt(x):
 
             self._check_moment_func(f, alt, name='quantile', quantile=q)
 
+    def test_rolling_quantile_np_percentile(self):
+        # #9413: Tests that rolling window's quantile default behavior
+        # is analogus to Numpy's percentile
+        row = 10
+        col = 5
+        idx = pd.date_range(20100101, periods=row, freq='B')
+        df = pd.DataFrame(np.random.rand(row * col).reshape((row, -1)),
+                          index=idx)
+
+        df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0)
+        np_percentile = np.percentile(df, [25, 50, 75], axis=0)
+
+        tm.assert_almost_equal(df_quantile.values, np.array(np_percentile))
+
+    def test_rolling_quantile_series(self):
+        # #16211: Tests that rolling window's quantile default behavior
+        # is analogus to pd.Series' quantile
+        arr = np.arange(100)
+        s = pd.Series(arr)
+        q1 = s.quantile(0.1)
+        q2 = s.rolling(100).quantile(0.1).iloc[-1]
+
+        tm.assert_almost_equal(q1, q2)
+
     def test_rolling_quantile_param(self):
         ser = Series([0.0, .1, .5, .9, 1.0])
 
@@ -3558,7 +3593,7 @@ def test_ragged_quantile(self):
 
         result = df.rolling(window='2s', min_periods=1).quantile(0.5)
         expected = df.copy()
-        expected['B'] = [0.0, 1, 1.0, 3.0, 3.0]
+        expected['B'] = [0.0, 1, 1.5, 3.0, 3.5]
         tm.assert_frame_equal(result, expected)
 
     def test_ragged_std(self):

From 3e20eab7ad5639810b4824790cd559367b326b0b Mon Sep 17 00:00:00 2001
From: Keiron Pizzey <kjpizzey@gmail.com>
Date: Mon, 10 Jul 2017 11:36:01 +0100
Subject: [PATCH 04/54] ENH - Modify Dataframe.select_dtypes to accept scalar
 values (#16860)

---
 doc/source/basics.rst             |   4 -
 doc/source/style.ipynb            |   2 +-
 doc/source/whatsnew/v0.21.0.txt   |   1 +
 pandas/core/frame.py              |  26 +++---
 pandas/tests/frame/test_dtypes.py | 130 +++++++++++++++++++++++++-----
 5 files changed, 130 insertions(+), 33 deletions(-)

diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index 134cc5106015b..d8b1602fb104d 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -2229,7 +2229,3 @@ All numpy dtypes are subclasses of ``numpy.generic``:
 
     Pandas also defines the types ``category``, and ``datetime64[ns, tz]``, which are not integrated into the normal
     numpy hierarchy and wont show up with the above function.
-
-.. note::
-
-   The ``include`` and ``exclude`` parameters must be non-string sequences.
diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb
index 4eeda491426b1..c250787785e14 100644
--- a/doc/source/style.ipynb
+++ b/doc/source/style.ipynb
@@ -935,7 +935,7 @@
     "\n",
     "<span style=\"color: red\">*Experimental: This is a new feature and still under development. We'll be adding features and possibly making breaking changes in future releases. We'd love to hear your feedback.*</span>\n",
     "\n",
-    "Some support is available for exporting styled `DataFrames` to Excel worksheets using the `OpenPyXL` engine. CSS2.2 properties handled include:\n",
+    "Some support is available for exporting styled `DataFrames` to Excel worksheets using the `OpenPyXL` engine. CSS2.2 properties handled include:\n",
     "\n",
     "- `background-color`\n",
     "- `border-style`, `border-width`, `border-color` and their {`top`, `right`, `bottom`, `left` variants}\n",
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 1edbf1638d233..8c71681582063 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -39,6 +39,7 @@ Other Enhancements
 - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
 - :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`)
 - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`)
+- :func:`Dataframe.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`)
 
 .. _whatsnew_0210.api_breaking:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 80cdebc24c39d..6559fc4c24ce2 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2285,9 +2285,9 @@ def select_dtypes(self, include=None, exclude=None):
 
         Parameters
         ----------
-        include, exclude : list-like
-            A list of dtypes or strings to be included/excluded. You must pass
-            in a non-empty sequence for at least one of these.
+        include, exclude : scalar or list-like
+            A selection of dtypes or strings to be included/excluded. At least
+            one of these parameters must be supplied.
 
         Raises
         ------
@@ -2295,8 +2295,6 @@ def select_dtypes(self, include=None, exclude=None):
             * If both of ``include`` and ``exclude`` are empty
             * If ``include`` and ``exclude`` have overlapping elements
             * If any kind of string dtype is passed in.
-        TypeError
-            * If either of ``include`` or ``exclude`` is not a sequence
 
         Returns
         -------
@@ -2331,6 +2329,14 @@ def select_dtypes(self, include=None, exclude=None):
         3  0.0764  False  2
         4 -0.9703   True  1
         5 -1.2094  False  2
+        >>> df.select_dtypes(include='bool')
+           c
+        0  True
+        1  False
+        2  True
+        3  False
+        4  True
+        5  False
         >>> df.select_dtypes(include=['float64'])
            c
         0  1
@@ -2348,10 +2354,12 @@ def select_dtypes(self, include=None, exclude=None):
         4   True
         5  False
         """
-        include, exclude = include or (), exclude or ()
-        if not (is_list_like(include) and is_list_like(exclude)):
-            raise TypeError('include and exclude must both be non-string'
-                            ' sequences')
+
+        if not is_list_like(include):
+            include = (include,) if include is not None else ()
+        if not is_list_like(exclude):
+            exclude = (exclude,) if exclude is not None else ()
+
         selection = tuple(map(frozenset, (include, exclude)))
 
         if not any(selection):
diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py
index 335b76ff2aade..065580d56a683 100644
--- a/pandas/tests/frame/test_dtypes.py
+++ b/pandas/tests/frame/test_dtypes.py
@@ -104,7 +104,7 @@ def test_dtypes_are_correct_after_column_slice(self):
                                              ('b', np.float_),
                                              ('c', np.float_)])))
 
-    def test_select_dtypes_include(self):
+    def test_select_dtypes_include_using_list_like(self):
         df = DataFrame({'a': list('abc'),
                         'b': list(range(1, 4)),
                         'c': np.arange(3, 6).astype('u1'),
@@ -145,14 +145,10 @@ def test_select_dtypes_include(self):
         ei = df[['h', 'i']]
         assert_frame_equal(ri, ei)
 
-        ri = df.select_dtypes(include=['timedelta'])
-        ei = df[['k']]
-        assert_frame_equal(ri, ei)
-
         pytest.raises(NotImplementedError,
                       lambda: df.select_dtypes(include=['period']))
 
-    def test_select_dtypes_exclude(self):
+    def test_select_dtypes_exclude_using_list_like(self):
         df = DataFrame({'a': list('abc'),
                         'b': list(range(1, 4)),
                         'c': np.arange(3, 6).astype('u1'),
@@ -162,7 +158,7 @@ def test_select_dtypes_exclude(self):
         ee = df[['a', 'e']]
         assert_frame_equal(re, ee)
 
-    def test_select_dtypes_exclude_include(self):
+    def test_select_dtypes_exclude_include_using_list_like(self):
         df = DataFrame({'a': list('abc'),
                         'b': list(range(1, 4)),
                         'c': np.arange(3, 6).astype('u1'),
@@ -181,6 +177,114 @@ def test_select_dtypes_exclude_include(self):
         e = df[['b', 'e']]
         assert_frame_equal(r, e)
 
+    def test_select_dtypes_include_using_scalars(self):
+        df = DataFrame({'a': list('abc'),
+                        'b': list(range(1, 4)),
+                        'c': np.arange(3, 6).astype('u1'),
+                        'd': np.arange(4.0, 7.0, dtype='float64'),
+                        'e': [True, False, True],
+                        'f': pd.Categorical(list('abc')),
+                        'g': pd.date_range('20130101', periods=3),
+                        'h': pd.date_range('20130101', periods=3,
+                                           tz='US/Eastern'),
+                        'i': pd.date_range('20130101', periods=3,
+                                           tz='CET'),
+                        'j': pd.period_range('2013-01', periods=3,
+                                             freq='M'),
+                        'k': pd.timedelta_range('1 day', periods=3)})
+
+        ri = df.select_dtypes(include=np.number)
+        ei = df[['b', 'c', 'd', 'k']]
+        assert_frame_equal(ri, ei)
+
+        ri = df.select_dtypes(include='datetime')
+        ei = df[['g']]
+        assert_frame_equal(ri, ei)
+
+        ri = df.select_dtypes(include='datetime64')
+        ei = df[['g']]
+        assert_frame_equal(ri, ei)
+
+        ri = df.select_dtypes(include='category')
+        ei = df[['f']]
+        assert_frame_equal(ri, ei)
+
+        pytest.raises(NotImplementedError,
+                      lambda: df.select_dtypes(include='period'))
+
+    def test_select_dtypes_exclude_using_scalars(self):
+        df = DataFrame({'a': list('abc'),
+                        'b': list(range(1, 4)),
+                        'c': np.arange(3, 6).astype('u1'),
+                        'd': np.arange(4.0, 7.0, dtype='float64'),
+                        'e': [True, False, True],
+                        'f': pd.Categorical(list('abc')),
+                        'g': pd.date_range('20130101', periods=3),
+                        'h': pd.date_range('20130101', periods=3,
+                                           tz='US/Eastern'),
+                        'i': pd.date_range('20130101', periods=3,
+                                           tz='CET'),
+                        'j': pd.period_range('2013-01', periods=3,
+                                             freq='M'),
+                        'k': pd.timedelta_range('1 day', periods=3)})
+
+        ri = df.select_dtypes(exclude=np.number)
+        ei = df[['a', 'e', 'f', 'g', 'h', 'i', 'j']]
+        assert_frame_equal(ri, ei)
+
+        ri = df.select_dtypes(exclude='category')
+        ei = df[['a', 'b', 'c', 'd', 'e', 'g', 'h', 'i', 'j', 'k']]
+        assert_frame_equal(ri, ei)
+
+        pytest.raises(NotImplementedError,
+                      lambda: df.select_dtypes(exclude='period'))
+
+    def test_select_dtypes_include_exclude_using_scalars(self):
+        df = DataFrame({'a': list('abc'),
+                        'b': list(range(1, 4)),
+                        'c': np.arange(3, 6).astype('u1'),
+                        'd': np.arange(4.0, 7.0, dtype='float64'),
+                        'e': [True, False, True],
+                        'f': pd.Categorical(list('abc')),
+                        'g': pd.date_range('20130101', periods=3),
+                        'h': pd.date_range('20130101', periods=3,
+                                           tz='US/Eastern'),
+                        'i': pd.date_range('20130101', periods=3,
+                                           tz='CET'),
+                        'j': pd.period_range('2013-01', periods=3,
+                                             freq='M'),
+                        'k': pd.timedelta_range('1 day', periods=3)})
+
+        ri = df.select_dtypes(include=np.number, exclude='floating')
+        ei = df[['b', 'c', 'k']]
+        assert_frame_equal(ri, ei)
+
+    def test_select_dtypes_include_exclude_mixed_scalars_lists(self):
+        df = DataFrame({'a': list('abc'),
+                        'b': list(range(1, 4)),
+                        'c': np.arange(3, 6).astype('u1'),
+                        'd': np.arange(4.0, 7.0, dtype='float64'),
+                        'e': [True, False, True],
+                        'f': pd.Categorical(list('abc')),
+                        'g': pd.date_range('20130101', periods=3),
+                        'h': pd.date_range('20130101', periods=3,
+                                           tz='US/Eastern'),
+                        'i': pd.date_range('20130101', periods=3,
+                                           tz='CET'),
+                        'j': pd.period_range('2013-01', periods=3,
+                                             freq='M'),
+                        'k': pd.timedelta_range('1 day', periods=3)})
+
+        ri = df.select_dtypes(include=np.number,
+                              exclude=['floating', 'timedelta'])
+        ei = df[['b', 'c']]
+        assert_frame_equal(ri, ei)
+
+        ri = df.select_dtypes(include=[np.number, 'category'],
+                              exclude='floating')
+        ei = df[['b', 'c', 'f', 'k']]
+        assert_frame_equal(ri, ei)
+
     def test_select_dtypes_not_an_attr_but_still_valid_dtype(self):
         df = DataFrame({'a': list('abc'),
                         'b': list(range(1, 4)),
@@ -205,18 +309,6 @@ def test_select_dtypes_empty(self):
                                     'must be nonempty'):
             df.select_dtypes()
 
-    def test_select_dtypes_raises_on_string(self):
-        df = DataFrame({'a': list('abc'), 'b': list(range(1, 4))})
-        with tm.assert_raises_regex(TypeError, 'include and exclude '
-                                    '.+ non-'):
-            df.select_dtypes(include='object')
-        with tm.assert_raises_regex(TypeError, 'include and exclude '
-                                    '.+ non-'):
-            df.select_dtypes(exclude='object')
-        with tm.assert_raises_regex(TypeError, 'include and exclude '
-                                    '.+ non-'):
-            df.select_dtypes(include=int, exclude='object')
-
     def test_select_dtypes_bad_datetime64(self):
         df = DataFrame({'a': list('abc'),
                         'b': list(range(1, 4)),

From f4b12d8488434d5f9a45fba1cbe7ad5a77c776ff Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 10 Jul 2017 06:36:32 -0400
Subject: [PATCH 05/54] COMPAT: moar 32-bit compat for testing of indexers
 (#16869)

xref #16826
---
 pandas/tests/indexes/test_category.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 9dc2cfdecb98f..14f344acbefb2 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -393,7 +393,7 @@ def test_reindex_dtype(self):
         res, indexer = c.reindex(['a', 'c'])
         tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True)
         tm.assert_numpy_array_equal(indexer,
-                                    np.array([0, 3, 2], dtype=np.int64))
+                                    np.array([0, 3, 2], dtype=np.intp))
 
         c = CategoricalIndex(['a', 'b', 'c', 'a'])
         res, indexer = c.reindex(Categorical(['a', 'c']))

From 114feb9290c684b5e5b3a2456307f9116372e89f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 11 Jul 2017 03:01:12 -0700
Subject: [PATCH 06/54] Confirm that select was *not* clearer in 0.12 (#16878)

---
 pandas/core/generic.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 7d1a8adf381fe..5722539b87aec 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2098,7 +2098,6 @@ def xs(self, key, axis=0, level=None, drop_level=True):
 
     _xs = xs
 
-    # TODO: Check if this was clearer in 0.12
     def select(self, crit, axis=0):
         """
         Return data corresponding to axis labels matching criteria

From 6a85e88bee498e7e218f0eeb766f15b9d78e9eaa Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Tue, 11 Jul 2017 11:08:57 +0100
Subject: [PATCH 07/54] Added tests for _get_dtype (#16845)

---
 pandas/tests/dtypes/test_common.py | 39 ++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index ba510e68f9a21..c32e8590c5675 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -524,3 +524,42 @@ def test_is_complex_dtype():
 
     assert com.is_complex_dtype(np.complex)
     assert com.is_complex_dtype(np.array([1 + 1j, 5]))
+
+
+@pytest.mark.parametrize('input_param,result', [
+    (int, np.dtype(int)),
+    ('int32', np.dtype('int32')),
+    (float, np.dtype(float)),
+    ('float64', np.dtype('float64')),
+    (np.dtype('float64'), np.dtype('float64')),
+    pytest.mark.xfail((str, np.dtype('<U')), ),
+    (pd.Series([1, 2], dtype=np.dtype('int16')), np.dtype('int16')),
+    (pd.Series(['a', 'b']), np.dtype(object)),
+    (pd.Index([1, 2]), np.dtype('int64')),
+    (pd.Index(['a', 'b']), np.dtype(object)),
+    ('category', 'category'),
+    (pd.Categorical(['a', 'b']).dtype, CategoricalDtype()),
+    pytest.mark.xfail((pd.Categorical(['a', 'b']), CategoricalDtype()),),
+    (pd.CategoricalIndex(['a', 'b']).dtype, CategoricalDtype()),
+    pytest.mark.xfail((pd.CategoricalIndex(['a', 'b']), CategoricalDtype()),),
+    (pd.DatetimeIndex([1, 2]), np.dtype('<M8[ns]')),
+    (pd.DatetimeIndex([1, 2]).dtype, np.dtype('<M8[ns]')),
+    ('<M8[ns]', np.dtype('<M8[ns]')),
+    ('datetime64[ns, Europe/London]', DatetimeTZDtype('ns', 'Europe/London')),
+    (pd.SparseSeries([1, 2], dtype='int32'), np.dtype('int32')),
+    (pd.SparseSeries([1, 2], dtype='int32').dtype, np.dtype('int32')),
+    (PeriodDtype(freq='D'), PeriodDtype(freq='D')),
+    ('period[D]', PeriodDtype(freq='D')),
+    (IntervalDtype(), IntervalDtype()),
+])
+def test__get_dtype(input_param, result):
+    assert com._get_dtype(input_param) == result
+
+
+@pytest.mark.parametrize('input_param', [None,
+                                         1, 1.2,
+                                         'random string',
+                                         pd.DataFrame([1, 2])])
+def test__get_dtype_fails(input_param):
+    # python objects
+    pytest.raises(TypeError, com._get_dtype, input_param)

From d236f31c7cfc740549b363926580ec0c94559b25 Mon Sep 17 00:00:00 2001
From: aviolov <aviolov@users.noreply.github.com>
Date: Tue, 11 Jul 2017 12:40:50 +0200
Subject: [PATCH 08/54] BUG: Series.isin fails or categoricals (#16858)

---
 doc/source/whatsnew/v0.21.0.txt |  2 +-
 pandas/core/algorithms.py       |  4 ++--
 pandas/tests/test_algos.py      | 10 ++++++++++
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 8c71681582063..015fdf1f45f47 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -192,7 +192,7 @@ Numeric
 
 Categorical
 ^^^^^^^^^^^
-
+- Bug in ``:func:Series.isin()`` when called with a categorical (:issue`16639`)
 
 
 Other
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index d74c5e66ea1a9..b490bf787a037 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -38,7 +38,6 @@
 # --------------- #
 # dtype access    #
 # --------------- #
-
 def _ensure_data(values, dtype=None):
     """
     routine to ensure that our data is of the correct
@@ -113,7 +112,8 @@ def _ensure_data(values, dtype=None):
 
         return values.asi8, dtype, 'int64'
 
-    elif is_categorical_dtype(values) or is_categorical_dtype(dtype):
+    elif (is_categorical_dtype(values) and
+          (is_categorical_dtype(dtype) or dtype is None)):
         values = getattr(values, 'values', values)
         values = values.codes
         dtype = 'category'
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 063dcea5c76d6..9504d2a9426f0 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -586,6 +586,16 @@ def test_large(self):
         expected[1] = True
         tm.assert_numpy_array_equal(result, expected)
 
+    def test_categorical_from_codes(self):
+        # GH 16639
+        vals = np.array([0, 1, 2, 0])
+        cats = ['a', 'b', 'c']
+        Sd = pd.Series(pd.Categorical(1).from_codes(vals, cats))
+        St = pd.Series(pd.Categorical(1).from_codes(np.array([0, 1]), cats))
+        expected = np.array([True, True, False, True])
+        result = algos.isin(Sd, St)
+        tm.assert_numpy_array_equal(expected, result)
+
 
 class TestValueCounts(object):
 

From 55af1ab626baf62dbbc00c2521c20be29b819a06 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 11 Jul 2017 12:39:39 -0400
Subject: [PATCH 09/54] COMPAT with dateutil 2.6.1, fixed ambiguous tz dst
 behavior (#16880)

---
 ci/requirements-3.5.run                |  1 -
 ci/requirements-3.5.sh                 |  4 ++++
 ci/requirements-3.6_NUMPY_DEV.run      |  1 -
 pandas/tests/tseries/test_offsets.py   |  5 ++++-
 pandas/tests/tseries/test_timezones.py | 21 +++++++++++++++++----
 5 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run
index 43e6814ed6c8e..52828b5220997 100644
--- a/ci/requirements-3.5.run
+++ b/ci/requirements-3.5.run
@@ -1,4 +1,3 @@
-python-dateutil
 pytz
 numpy=1.11.3
 openpyxl
diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh
index d0f0b81802dc6..917439a8765a2 100644
--- a/ci/requirements-3.5.sh
+++ b/ci/requirements-3.5.sh
@@ -5,3 +5,7 @@ source activate pandas
 echo "install 35"
 
 conda install -n pandas -c conda-forge feather-format
+
+# pip install python-dateutil to get latest
+conda remove -n pandas python-dateutil --force
+pip install python-dateutil
diff --git a/ci/requirements-3.6_NUMPY_DEV.run b/ci/requirements-3.6_NUMPY_DEV.run
index 0aa987baefb1d..af44f198c687e 100644
--- a/ci/requirements-3.6_NUMPY_DEV.run
+++ b/ci/requirements-3.6_NUMPY_DEV.run
@@ -1,2 +1 @@
-python-dateutil
 pytz
diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py
index 47b15a2b66fc4..e03b3e0a85e5e 100644
--- a/pandas/tests/tseries/test_offsets.py
+++ b/pandas/tests/tseries/test_offsets.py
@@ -4844,7 +4844,7 @@ def test_fallback_plural(self):
             hrs_pre = utc_offsets['utc_offset_daylight']
             hrs_post = utc_offsets['utc_offset_standard']
 
-            if dateutil.__version__ != LooseVersion('2.6.0'):
+            if dateutil.__version__ < LooseVersion('2.6.0'):
                 # buggy ambiguous behavior in 2.6.0
                 # GH 14621
                 # https://github.com/dateutil/dateutil/issues/321
@@ -4852,6 +4852,9 @@ def test_fallback_plural(self):
                     n=3, tstart=self._make_timestamp(self.ts_pre_fallback,
                                                      hrs_pre, tz),
                     expected_utc_offset=hrs_post)
+            elif dateutil.__version__ > LooseVersion('2.6.0'):
+                # fixed, but skip the test
+                continue
 
     def test_springforward_plural(self):
         # test moving from standard to daylight savings
diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py
index de6978d52968b..c034a9c60ef1b 100644
--- a/pandas/tests/tseries/test_timezones.py
+++ b/pandas/tests/tseries/test_timezones.py
@@ -552,8 +552,16 @@ def f():
                            tz=tz, ambiguous='infer')
         assert times[0] == Timestamp('2013-10-26 23:00', tz=tz, freq="H")
 
-        if dateutil.__version__ != LooseVersion('2.6.0'):
-            # see gh-14621
+        if str(tz).startswith('dateutil'):
+            if dateutil.__version__ < LooseVersion('2.6.0'):
+                # see gh-14621
+                assert times[-1] == Timestamp('2013-10-27 01:00:00+0000',
+                                              tz=tz, freq="H")
+            elif dateutil.__version__ > LooseVersion('2.6.0'):
+                # fixed ambiguous behavior
+                assert times[-1] == Timestamp('2013-10-27 01:00:00+0100',
+                                              tz=tz, freq="H")
+        else:
             assert times[-1] == Timestamp('2013-10-27 01:00:00+0000',
                                           tz=tz, freq="H")
 
@@ -1233,13 +1241,18 @@ def test_ambiguous_compat(self):
         assert result_pytz.value == result_dateutil.value
         assert result_pytz.value == 1382835600000000000
 
-        # dateutil 2.6 buggy w.r.t. ambiguous=0
-        if dateutil.__version__ != LooseVersion('2.6.0'):
+        if dateutil.__version__ < LooseVersion('2.6.0'):
+            # dateutil 2.6 buggy w.r.t. ambiguous=0
             # see gh-14621
             # see https://github.com/dateutil/dateutil/issues/321
             assert (result_pytz.to_pydatetime().tzname() ==
                     result_dateutil.to_pydatetime().tzname())
             assert str(result_pytz) == str(result_dateutil)
+        elif dateutil.__version__ > LooseVersion('2.6.0'):
+            # fixed ambiguous behavior
+            assert result_pytz.to_pydatetime().tzname() == 'GMT'
+            assert result_dateutil.to_pydatetime().tzname() == 'BST'
+            assert str(result_pytz) != str(result_dateutil)
 
         # 1 hour difference
         result_pytz = (Timestamp('2013-10-27 01:00:00')

From a9421af1aac906cc38d025ed5db4a2b55cb8b9bc Mon Sep 17 00:00:00 2001
From: Jean Helie <jean@semmle.com>
Date: Tue, 11 Jul 2017 17:40:20 +0100
Subject: [PATCH 10/54] fix wrongly named method (#16881)

---
 asv_bench/benchmarks/rolling.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
index 9da9d0b855323..899349cd21f84 100644
--- a/asv_bench/benchmarks/rolling.py
+++ b/asv_bench/benchmarks/rolling.py
@@ -26,7 +26,7 @@ def time_rolling_quantile_median(self):
     def time_rolling_median(self):
         (self.df.rolling(self.wins).median())
 
-    def time_rolling_median(self):
+    def time_rolling_mean(self):
         (self.df.rolling(self.wins).mean())
 
     def time_rolling_max(self):
@@ -68,7 +68,7 @@ def time_rolling_quantile_median_l(self):
     def time_rolling_median_l(self):
         (self.df.rolling(self.winl).median())
 
-    def time_rolling_median_l(self):
+    def time_rolling_mean_l(self):
         (self.df.rolling(self.winl).mean())
 
     def time_rolling_max_l(self):
@@ -118,7 +118,7 @@ def time_rolling_quantile_median(self):
     def time_rolling_median(self):
         (self.sr.rolling(self.wins).median())
 
-    def time_rolling_median(self):
+    def time_rolling_mean(self):
         (self.sr.rolling(self.wins).mean())
 
     def time_rolling_max(self):
@@ -160,7 +160,7 @@ def time_rolling_quantile_median_l(self):
     def time_rolling_median_l(self):
         (self.sr.rolling(self.winl).median())
 
-    def time_rolling_median_l(self):
+    def time_rolling_mean_l(self):
         (self.sr.rolling(self.winl).mean())
 
     def time_rolling_max_l(self):

From 9d13227345882daaa90f03078c09a9b44a18ce72 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Wed, 12 Jul 2017 10:51:07 -0500
Subject: [PATCH 11/54] TST/PKG: Removed pandas.util.testing.slow definition
 (#16852)

---
 doc/source/whatsnew/v0.21.0.txt              |   1 +
 pandas/tests/computation/test_eval.py        |  16 +--
 pandas/tests/frame/test_repr_info.py         |   5 +-
 pandas/tests/frame/test_to_csv.py            |   8 +-
 pandas/tests/indexing/test_indexing_slow.py  |   7 +-
 pandas/tests/io/parser/common.py             |   2 +-
 pandas/tests/io/test_excel.py                |   2 +-
 pandas/tests/io/test_html.py                 |  36 ++---
 pandas/tests/plotting/test_boxplot_method.py |  23 ++--
 pandas/tests/plotting/test_datetimelike.py   | 102 +++++++-------
 pandas/tests/plotting/test_deprecated.py     |  10 +-
 pandas/tests/plotting/test_frame.py          | 137 +++++++++----------
 pandas/tests/plotting/test_hist_method.py    |  35 +++--
 pandas/tests/plotting/test_misc.py           |  17 ++-
 pandas/tests/plotting/test_series.py         |  59 ++++----
 pandas/tests/series/test_indexing.py         |   5 +-
 pandas/tests/test_expressions.py             |  10 +-
 pandas/tests/test_window.py                  |   6 +-
 pandas/util/testing.py                       |   7 -
 19 files changed, 239 insertions(+), 249 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 015fdf1f45f47..a5ee0e0ce2653 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -101,6 +101,7 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in
 - :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`).
 - Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`)
 - ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`)
+- Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`)
 
 .. _whatsnew_0210.api:
 
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 589f612802fb9..7fc091ebb1892 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -30,7 +30,7 @@
 import pandas.util.testing as tm
 from pandas.util.testing import (assert_frame_equal, randbool,
                                  assert_numpy_array_equal, assert_series_equal,
-                                 assert_produces_warning, slow)
+                                 assert_produces_warning)
 from pandas.compat import PY3, reduce
 
 _series_frame_incompatible = _bool_ops_syms
@@ -144,7 +144,7 @@ def teardown_method(self, method):
         del self.lhses, self.rhses, self.scalar_rhses, self.scalar_lhses
         del self.pandas_rhses, self.pandas_lhses, self.current_engines
 
-    @slow
+    @pytest.mark.slow
     def test_complex_cmp_ops(self):
         cmp_ops = ('!=', '==', '<=', '>=', '<', '>')
         cmp2_ops = ('>', '<')
@@ -161,7 +161,7 @@ def test_simple_cmp_ops(self):
         for lhs, rhs, cmp_op in product(bool_lhses, bool_rhses, self.cmp_ops):
             self.check_simple_cmp_op(lhs, cmp_op, rhs)
 
-    @slow
+    @pytest.mark.slow
     def test_binary_arith_ops(self):
         for lhs, op, rhs in product(self.lhses, self.arith_ops, self.rhses):
             self.check_binary_arith_op(lhs, op, rhs)
@@ -181,17 +181,17 @@ def test_pow(self):
         for lhs, rhs in product(self.lhses, self.rhses):
             self.check_pow(lhs, '**', rhs)
 
-    @slow
+    @pytest.mark.slow
     def test_single_invert_op(self):
         for lhs, op, rhs in product(self.lhses, self.cmp_ops, self.rhses):
             self.check_single_invert_op(lhs, op, rhs)
 
-    @slow
+    @pytest.mark.slow
     def test_compound_invert_op(self):
         for lhs, op, rhs in product(self.lhses, self.cmp_ops, self.rhses):
             self.check_compound_invert_op(lhs, op, rhs)
 
-    @slow
+    @pytest.mark.slow
     def test_chained_cmp_op(self):
         mids = self.lhses
         cmp_ops = '<', '>'
@@ -870,7 +870,7 @@ def test_frame_comparison(self, engine, parser):
             res = pd.eval('df < df3', engine=engine, parser=parser)
             assert_frame_equal(res, df < df3)
 
-    @slow
+    @pytest.mark.slow
     def test_medium_complex_frame_alignment(self, engine, parser):
         args = product(self.lhs_index_types, self.index_types,
                        self.index_types, self.index_types)
@@ -974,7 +974,7 @@ def test_series_frame_commutativity(self, engine, parser):
                     if engine == 'numexpr':
                         assert_frame_equal(a, b)
 
-    @slow
+    @pytest.mark.slow
     def test_complex_series_frame_alignment(self, engine, parser):
         import random
         args = product(self.lhs_index_types, self.index_types,
diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py
index cc37f8cc3cb02..c317ad542659a 100644
--- a/pandas/tests/frame/test_repr_info.py
+++ b/pandas/tests/frame/test_repr_info.py
@@ -8,6 +8,7 @@
 
 from numpy import nan
 import numpy as np
+import pytest
 
 from pandas import (DataFrame, compat, option_context)
 from pandas.compat import StringIO, lrange, u
@@ -40,7 +41,7 @@ def test_repr_mixed(self):
         foo = repr(self.mixed_frame)  # noqa
         self.mixed_frame.info(verbose=False, buf=buf)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_repr_mixed_big(self):
         # big mixed
         biggie = DataFrame({'A': np.random.randn(200),
@@ -87,7 +88,7 @@ def test_repr_dimensions(self):
         with option_context('display.show_dimensions', 'truncate'):
             assert "2 rows x 2 columns" not in repr(df)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_repr_big(self):
         # big one
         biggie = DataFrame(np.zeros((200, 4)), columns=lrange(4),
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
index 69bd2b008416f..6a4b1686a31e2 100644
--- a/pandas/tests/frame/test_to_csv.py
+++ b/pandas/tests/frame/test_to_csv.py
@@ -17,7 +17,7 @@
 from pandas.util.testing import (assert_almost_equal,
                                  assert_series_equal,
                                  assert_frame_equal,
-                                 ensure_clean, slow,
+                                 ensure_clean,
                                  makeCustomDataframe as mkdf)
 import pandas.util.testing as tm
 
@@ -205,7 +205,7 @@ def _check_df(df, cols=None):
         cols = ['b', 'a']
         _check_df(df, cols)
 
-    @slow
+    @pytest.mark.slow
     def test_to_csv_dtnat(self):
         # GH3437
         from pandas import NaT
@@ -236,7 +236,7 @@ def make_dtnat_arr(n, nnat=None):
             assert_frame_equal(df, recons, check_names=False,
                                check_less_precise=True)
 
-    @slow
+    @pytest.mark.slow
     def test_to_csv_moar(self):
 
         def _do_test(df, r_dtype=None, c_dtype=None,
@@ -728,7 +728,7 @@ def test_to_csv_chunking(self):
                 rs = read_csv(filename, index_col=0)
                 assert_frame_equal(rs, aa)
 
-    @slow
+    @pytest.mark.slow
     def test_to_csv_wide_frame_formatting(self):
         # Issue #8621
         df = DataFrame(np.random.randn(1, 100010), columns=None, index=None)
diff --git a/pandas/tests/indexing/test_indexing_slow.py b/pandas/tests/indexing/test_indexing_slow.py
index 08d390a6a213e..1b3fb18d9ff1d 100644
--- a/pandas/tests/indexing/test_indexing_slow.py
+++ b/pandas/tests/indexing/test_indexing_slow.py
@@ -6,11 +6,12 @@
 import pandas as pd
 from pandas.core.api import Series, DataFrame, MultiIndex
 import pandas.util.testing as tm
+import pytest
 
 
 class TestIndexingSlow(object):
 
-    @tm.slow
+    @pytest.mark.slow
     def test_multiindex_get_loc(self):  # GH7724, GH2646
 
         with warnings.catch_warnings(record=True):
@@ -80,7 +81,7 @@ def loop(mi, df, keys):
                     assert not mi.index.lexsort_depth < i
                     loop(mi, df, keys)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_large_dataframe_indexing(self):
         # GH10692
         result = DataFrame({'x': range(10 ** 6)}, dtype='int64')
@@ -88,7 +89,7 @@ def test_large_dataframe_indexing(self):
         expected = DataFrame({'x': range(10 ** 6 + 1)}, dtype='int64')
         tm.assert_frame_equal(result, expected)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_large_mi_dataframe_indexing(self):
         # GH10645
         result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)])
diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index 4b4f44b44c163..584a6561b505b 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -664,7 +664,7 @@ def test_url(self):
         tm.assert_frame_equal(url_table, local_table)
         # TODO: ftp testing
 
-    @tm.slow
+    @pytest.mark.slow
     def test_file(self):
         dirpath = tm.get_data_path()
         localtable = os.path.join(dirpath, 'salaries.csv')
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index abe3757ec64f3..856e8d6466526 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -614,7 +614,7 @@ def test_read_from_s3_url(self):
         local_table = self.get_exceldf('test1')
         tm.assert_frame_equal(url_table, local_table)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_read_from_file_url(self):
 
         # FILE
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index 1e1d653cf94d1..4ef265dcd5113 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -130,7 +130,7 @@ def test_spam_url(self):
 
         assert_framelist_equal(df1, df2)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_banklist(self):
         df1 = self.read_html(self.banklist_data, '.*Florida.*',
                              attrs={'id': 'table'})
@@ -292,7 +292,7 @@ def test_invalid_url(self):
         except ValueError as e:
             assert str(e) == 'No tables found'
 
-    @tm.slow
+    @pytest.mark.slow
     def test_file_url(self):
         url = self.banklist_data
         dfs = self.read_html(file_path_to_url(url), 'First',
@@ -301,7 +301,7 @@ def test_file_url(self):
         for df in dfs:
             assert isinstance(df, DataFrame)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_invalid_table_attrs(self):
         url = self.banklist_data
         with tm.assert_raises_regex(ValueError, 'No tables found'):
@@ -312,39 +312,39 @@ def _bank_data(self, *args, **kwargs):
         return self.read_html(self.banklist_data, 'Metcalf',
                               attrs={'id': 'table'}, *args, **kwargs)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_multiindex_header(self):
         df = self._bank_data(header=[0, 1])[0]
         assert isinstance(df.columns, MultiIndex)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_multiindex_index(self):
         df = self._bank_data(index_col=[0, 1])[0]
         assert isinstance(df.index, MultiIndex)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_multiindex_header_index(self):
         df = self._bank_data(header=[0, 1], index_col=[0, 1])[0]
         assert isinstance(df.columns, MultiIndex)
         assert isinstance(df.index, MultiIndex)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_multiindex_header_skiprows_tuples(self):
         df = self._bank_data(header=[0, 1], skiprows=1, tupleize_cols=True)[0]
         assert isinstance(df.columns, Index)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_multiindex_header_skiprows(self):
         df = self._bank_data(header=[0, 1], skiprows=1)[0]
         assert isinstance(df.columns, MultiIndex)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_multiindex_header_index_skiprows(self):
         df = self._bank_data(header=[0, 1], index_col=[0, 1], skiprows=1)[0]
         assert isinstance(df.index, MultiIndex)
         assert isinstance(df.columns, MultiIndex)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_regex_idempotency(self):
         url = self.banklist_data
         dfs = self.read_html(file_path_to_url(url),
@@ -372,7 +372,7 @@ def test_python_docs_table(self):
         zz = [df.iloc[0, 0][0:4] for df in dfs]
         assert sorted(zz) == sorted(['Repo', 'What'])
 
-    @tm.slow
+    @pytest.mark.slow
     def test_thousands_macau_stats(self):
         all_non_nan_table_index = -2
         macau_data = os.path.join(DATA_PATH, 'macau.html')
@@ -382,7 +382,7 @@ def test_thousands_macau_stats(self):
 
         assert not any(s.isnull().any() for _, s in df.iteritems())
 
-    @tm.slow
+    @pytest.mark.slow
     def test_thousands_macau_index_col(self):
         all_non_nan_table_index = -2
         macau_data = os.path.join(DATA_PATH, 'macau.html')
@@ -523,7 +523,7 @@ def test_nyse_wsj_commas_table(self):
         assert df.shape[0] == nrows
         tm.assert_index_equal(df.columns, columns)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_banklist_header(self):
         from pandas.io.html import _remove_whitespace
 
@@ -562,7 +562,7 @@ def try_remove_ws(x):
                                                              coerce=True)
         tm.assert_frame_equal(converted, gtnew)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_gold_canyon(self):
         gc = 'Gold Canyon'
         with open(self.banklist_data, 'r') as f:
@@ -855,7 +855,7 @@ def test_works_on_valid_markup(self):
         assert isinstance(dfs, list)
         assert isinstance(dfs[0], DataFrame)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_fallback_success(self):
         _skip_if_none_of(('bs4', 'html5lib'))
         banklist_data = os.path.join(DATA_PATH, 'banklist.html')
@@ -898,7 +898,7 @@ def get_elements_from_file(url, element='table'):
     return soup.find_all(element)
 
 
-@tm.slow
+@pytest.mark.slow
 def test_bs4_finds_tables():
     filepath = os.path.join(DATA_PATH, "spam.html")
     with warnings.catch_warnings():
@@ -913,13 +913,13 @@ def get_lxml_elements(url, element):
     return doc.xpath('.//{0}'.format(element))
 
 
-@tm.slow
+@pytest.mark.slow
 def test_lxml_finds_tables():
     filepath = os.path.join(DATA_PATH, "spam.html")
     assert get_lxml_elements(filepath, 'table')
 
 
-@tm.slow
+@pytest.mark.slow
 def test_lxml_finds_tbody():
     filepath = os.path.join(DATA_PATH, "spam.html")
     assert get_lxml_elements(filepath, 'tbody')
diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py
index ce8fb7a57c912..8fe119d28644c 100644
--- a/pandas/tests/plotting/test_boxplot_method.py
+++ b/pandas/tests/plotting/test_boxplot_method.py
@@ -8,7 +8,6 @@
 from pandas import Series, DataFrame, MultiIndex
 from pandas.compat import range, lzip
 import pandas.util.testing as tm
-from pandas.util.testing import slow
 
 import numpy as np
 from numpy import random
@@ -35,7 +34,7 @@ def _skip_if_mpl_14_or_dev_boxplot():
 
 class TestDataFramePlots(TestPlotBase):
 
-    @slow
+    @pytest.mark.slow
     def test_boxplot_legacy(self):
         df = DataFrame(randn(6, 4),
                        index=list(string.ascii_letters[:6]),
@@ -93,13 +92,13 @@ def test_boxplot_legacy(self):
         lines = list(itertools.chain.from_iterable(d.values()))
         assert len(ax.get_lines()) == len(lines)
 
-    @slow
+    @pytest.mark.slow
     def test_boxplot_return_type_none(self):
         # GH 12216; return_type=None & by=None -> axes
         result = self.hist_df.boxplot()
         assert isinstance(result, self.plt.Axes)
 
-    @slow
+    @pytest.mark.slow
     def test_boxplot_return_type_legacy(self):
         # API change in https://github.com/pandas-dev/pandas/pull/7096
         import matplotlib as mpl  # noqa
@@ -125,7 +124,7 @@ def test_boxplot_return_type_legacy(self):
             result = df.boxplot(return_type='both')
         self._check_box_return_type(result, 'both')
 
-    @slow
+    @pytest.mark.slow
     def test_boxplot_axis_limits(self):
 
         def _check_ax_limits(col, ax):
@@ -153,14 +152,14 @@ def _check_ax_limits(col, ax):
         assert age_ax._sharey == height_ax
         assert dummy_ax._sharey is None
 
-    @slow
+    @pytest.mark.slow
     def test_boxplot_empty_column(self):
         _skip_if_mpl_14_or_dev_boxplot()
         df = DataFrame(np.random.randn(20, 4))
         df.loc[:, 0] = np.nan
         _check_plot_works(df.boxplot, return_type='axes')
 
-    @slow
+    @pytest.mark.slow
     def test_figsize(self):
         df = DataFrame(np.random.rand(10, 5),
                        columns=['A', 'B', 'C', 'D', 'E'])
@@ -176,7 +175,7 @@ def test_fontsize(self):
 
 class TestDataFrameGroupByPlots(TestPlotBase):
 
-    @slow
+    @pytest.mark.slow
     def test_boxplot_legacy(self):
         grouped = self.hist_df.groupby(by='gender')
         with tm.assert_produces_warning(UserWarning):
@@ -206,7 +205,7 @@ def test_boxplot_legacy(self):
                                  return_type='axes')
         self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
 
-    @slow
+    @pytest.mark.slow
     def test_grouped_plot_fignums(self):
         n = 10
         weight = Series(np.random.normal(166, 20, size=n))
@@ -230,7 +229,7 @@ def test_grouped_plot_fignums(self):
         res = df.groupby('gender').hist()
         tm.close()
 
-    @slow
+    @pytest.mark.slow
     def test_grouped_box_return_type(self):
         df = self.hist_df
 
@@ -267,7 +266,7 @@ def test_grouped_box_return_type(self):
             returned = df2.boxplot(by='category', return_type=t)
             self._check_box_return_type(returned, t, expected_keys=columns2)
 
-    @slow
+    @pytest.mark.slow
     def test_grouped_box_layout(self):
         df = self.hist_df
 
@@ -341,7 +340,7 @@ def test_grouped_box_layout(self):
             return_type='dict')
         self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 3))
 
-    @slow
+    @pytest.mark.slow
     def test_grouped_box_multiple_axes(self):
         # GH 6970, GH 7069
         df = self.hist_df
diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
index 0cff365be3ec8..e9c7d806fd65d 100644
--- a/pandas/tests/plotting/test_datetimelike.py
+++ b/pandas/tests/plotting/test_datetimelike.py
@@ -14,7 +14,7 @@
 from pandas.core.indexes.period import period_range, Period, PeriodIndex
 from pandas.core.resample import DatetimeIndex
 
-from pandas.util.testing import assert_series_equal, ensure_clean, slow
+from pandas.util.testing import assert_series_equal, ensure_clean
 import pandas.util.testing as tm
 
 from pandas.tests.plotting.common import (TestPlotBase,
@@ -45,7 +45,7 @@ def setup_method(self, method):
     def teardown_method(self, method):
         tm.close()
 
-    @slow
+    @pytest.mark.slow
     def test_ts_plot_with_tz(self):
         # GH2877
         index = date_range('1/1/2011', periods=2, freq='H',
@@ -61,7 +61,7 @@ def test_fontsize_set_correctly(self):
         for label in (ax.get_xticklabels() + ax.get_yticklabels()):
             assert label.get_fontsize() == 2
 
-    @slow
+    @pytest.mark.slow
     def test_frame_inferred(self):
         # inferred freq
         idx = date_range('1/1/1987', freq='MS', periods=100)
@@ -99,7 +99,7 @@ def test_nonnumeric_exclude(self):
 
         pytest.raises(TypeError, df['A'].plot)
 
-    @slow
+    @pytest.mark.slow
     def test_tsplot(self):
         from pandas.tseries.plotting import tsplot
 
@@ -133,7 +133,7 @@ def test_both_style_and_color(self):
         s = ts.reset_index(drop=True)
         pytest.raises(ValueError, s.plot, style='b-', color='#000099')
 
-    @slow
+    @pytest.mark.slow
     def test_high_freq(self):
         freaks = ['ms', 'us']
         for freq in freaks:
@@ -151,7 +151,7 @@ def test_get_datevalue(self):
         assert (get_datevalue('1/1/1987', 'D') ==
                 Period('1987-1-1', 'D').ordinal)
 
-    @slow
+    @pytest.mark.slow
     def test_ts_plot_format_coord(self):
         def check_format_of_first_point(ax, expected_string):
             first_line = ax.get_lines()[0]
@@ -185,28 +185,28 @@ def check_format_of_first_point(ax, expected_string):
         tsplot(daily, self.plt.Axes.plot, ax=ax)
         check_format_of_first_point(ax, 't = 2014-01-01  y = 1.000000')
 
-    @slow
+    @pytest.mark.slow
     def test_line_plot_period_series(self):
         for s in self.period_ser:
             _check_plot_works(s.plot, s.index.freq)
 
-    @slow
+    @pytest.mark.slow
     def test_line_plot_datetime_series(self):
         for s in self.datetime_ser:
             _check_plot_works(s.plot, s.index.freq.rule_code)
 
-    @slow
+    @pytest.mark.slow
     def test_line_plot_period_frame(self):
         for df in self.period_df:
             _check_plot_works(df.plot, df.index.freq)
 
-    @slow
+    @pytest.mark.slow
     def test_line_plot_datetime_frame(self):
         for df in self.datetime_df:
             freq = df.index.to_period(df.index.freq.rule_code).freq
             _check_plot_works(df.plot, freq)
 
-    @slow
+    @pytest.mark.slow
     def test_line_plot_inferred_freq(self):
         for ser in self.datetime_ser:
             ser = Series(ser.values, Index(np.asarray(ser.index)))
@@ -223,7 +223,7 @@ def test_fake_inferred_business(self):
         ts.plot(ax=ax)
         assert not hasattr(ax, 'freq')
 
-    @slow
+    @pytest.mark.slow
     def test_plot_offset_freq(self):
         ser = tm.makeTimeSeries()
         _check_plot_works(ser.plot)
@@ -232,14 +232,14 @@ def test_plot_offset_freq(self):
         ser = Series(np.random.randn(len(dr)), dr)
         _check_plot_works(ser.plot)
 
-    @slow
+    @pytest.mark.slow
     def test_plot_multiple_inferred_freq(self):
         dr = Index([datetime(2000, 1, 1), datetime(2000, 1, 6), datetime(
             2000, 1, 11)])
         ser = Series(np.random.randn(len(dr)), dr)
         _check_plot_works(ser.plot)
 
-    @slow
+    @pytest.mark.slow
     def test_uhf(self):
         import pandas.plotting._converter as conv
         idx = date_range('2012-6-22 21:59:51.960928', freq='L', periods=500)
@@ -257,7 +257,7 @@ def test_uhf(self):
             if len(rs):
                 assert xp == rs
 
-    @slow
+    @pytest.mark.slow
     def test_irreg_hf(self):
         idx = date_range('2012-6-22 21:59:51', freq='S', periods=100)
         df = DataFrame(np.random.randn(len(idx), 2), idx)
@@ -297,7 +297,7 @@ def test_business_freq(self):
         idx = ax.get_lines()[0].get_xdata()
         assert PeriodIndex(data=idx).freqstr == 'B'
 
-    @slow
+    @pytest.mark.slow
     def test_business_freq_convert(self):
         n = tm.N
         tm.N = 300
@@ -327,7 +327,7 @@ def test_dataframe(self):
         idx = ax.get_lines()[0].get_xdata()
         tm.assert_index_equal(bts.index.to_period(), PeriodIndex(idx))
 
-    @slow
+    @pytest.mark.slow
     def test_axis_limits(self):
 
         def _test(ax):
@@ -384,7 +384,7 @@ def test_get_finder(self):
         assert conv.get_finder('A') == conv._annual_finder
         assert conv.get_finder('W') == conv._daily_finder
 
-    @slow
+    @pytest.mark.slow
     def test_finder_daily(self):
         xp = Period('1999-1-1', freq='B').ordinal
         day_lst = [10, 40, 252, 400, 950, 2750, 10000]
@@ -402,7 +402,7 @@ def test_finder_daily(self):
             assert xp == rs
             self.plt.close(ax.get_figure())
 
-    @slow
+    @pytest.mark.slow
     def test_finder_quarterly(self):
         xp = Period('1988Q1').ordinal
         yrs = [3.5, 11]
@@ -420,7 +420,7 @@ def test_finder_quarterly(self):
             assert xp == rs
             self.plt.close(ax.get_figure())
 
-    @slow
+    @pytest.mark.slow
     def test_finder_monthly(self):
         xp = Period('Jan 1988').ordinal
         yrs = [1.15, 2.5, 4, 11]
@@ -448,7 +448,7 @@ def test_finder_monthly_long(self):
         xp = Period('1989Q1', 'M').ordinal
         assert rs == xp
 
-    @slow
+    @pytest.mark.slow
     def test_finder_annual(self):
         xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170]
         for i, nyears in enumerate([5, 10, 19, 49, 99, 199, 599, 1001]):
@@ -461,7 +461,7 @@ def test_finder_annual(self):
             assert rs == Period(xp[i], freq='A').ordinal
             self.plt.close(ax.get_figure())
 
-    @slow
+    @pytest.mark.slow
     def test_finder_minutely(self):
         nminutes = 50 * 24 * 60
         rng = date_range('1/1/1999', freq='Min', periods=nminutes)
@@ -484,7 +484,7 @@ def test_finder_hourly(self):
         xp = Period('1/1/1999', freq='H').ordinal
         assert rs == xp
 
-    @slow
+    @pytest.mark.slow
     def test_gaps(self):
         ts = tm.makeTimeSeries()
         ts[5:25] = np.nan
@@ -529,7 +529,7 @@ def test_gaps(self):
         mask = data.mask
         assert mask[2:5, 1].all()
 
-    @slow
+    @pytest.mark.slow
     def test_gap_upsample(self):
         low = tm.makeTimeSeries()
         low[5:25] = np.nan
@@ -551,7 +551,7 @@ def test_gap_upsample(self):
         mask = data.mask
         assert mask[5:25, 1].all()
 
-    @slow
+    @pytest.mark.slow
     def test_secondary_y(self):
         ser = Series(np.random.randn(10))
         ser2 = Series(np.random.randn(10))
@@ -581,7 +581,7 @@ def test_secondary_y(self):
         assert hasattr(ax2, 'left_ax')
         assert not hasattr(ax2, 'right_ax')
 
-    @slow
+    @pytest.mark.slow
     def test_secondary_y_ts(self):
         idx = date_range('1/1/2000', periods=10)
         ser = Series(np.random.randn(10), idx)
@@ -608,7 +608,7 @@ def test_secondary_y_ts(self):
         ax2 = ser.plot(secondary_y=True)
         assert ax.get_yaxis().get_visible()
 
-    @slow
+    @pytest.mark.slow
     def test_secondary_kde(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
@@ -621,7 +621,7 @@ def test_secondary_kde(self):
         axes = fig.get_axes()
         assert axes[1].get_yaxis().get_ticks_position() == 'right'
 
-    @slow
+    @pytest.mark.slow
     def test_secondary_bar(self):
         ser = Series(np.random.randn(10))
         fig, ax = self.plt.subplots()
@@ -629,7 +629,7 @@ def test_secondary_bar(self):
         axes = fig.get_axes()
         assert axes[1].get_yaxis().get_ticks_position() == 'right'
 
-    @slow
+    @pytest.mark.slow
     def test_secondary_frame(self):
         df = DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c'])
         axes = df.plot(secondary_y=['a', 'c'], subplots=True)
@@ -638,7 +638,7 @@ def test_secondary_frame(self):
                 self.default_tick_position)
         assert axes[2].get_yaxis().get_ticks_position() == 'right'
 
-    @slow
+    @pytest.mark.slow
     def test_secondary_bar_frame(self):
         df = DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c'])
         axes = df.plot(kind='bar', secondary_y=['a', 'c'], subplots=True)
@@ -666,7 +666,7 @@ def test_mixed_freq_regular_first(self):
         assert left == pidx[0].ordinal
         assert right == pidx[-1].ordinal
 
-    @slow
+    @pytest.mark.slow
     def test_mixed_freq_irregular_first(self):
         s1 = tm.makeTimeSeries()
         s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]]
@@ -697,7 +697,7 @@ def test_mixed_freq_regular_first_df(self):
         assert left == pidx[0].ordinal
         assert right == pidx[-1].ordinal
 
-    @slow
+    @pytest.mark.slow
     def test_mixed_freq_irregular_first_df(self):
         # GH 9852
         s1 = tm.makeTimeSeries().to_frame()
@@ -723,7 +723,7 @@ def test_mixed_freq_hf_first(self):
         for l in ax.get_lines():
             assert PeriodIndex(data=l.get_xdata()).freq == 'D'
 
-    @slow
+    @pytest.mark.slow
     def test_mixed_freq_alignment(self):
         ts_ind = date_range('2012-01-01 13:00', '2012-01-02', freq='H')
         ts_data = np.random.randn(12)
@@ -737,7 +737,7 @@ def test_mixed_freq_alignment(self):
 
         assert ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0]
 
-    @slow
+    @pytest.mark.slow
     def test_mixed_freq_lf_first(self):
 
         idxh = date_range('1/1/1999', periods=365, freq='D')
@@ -819,7 +819,7 @@ def test_nat_handling(self):
         assert s.index.min() <= Series(xdata).min()
         assert Series(xdata).max() <= s.index.max()
 
-    @slow
+    @pytest.mark.slow
     def test_to_weekly_resampling(self):
         idxh = date_range('1/1/1999', periods=52, freq='W')
         idxl = date_range('1/1/1999', periods=12, freq='M')
@@ -840,7 +840,7 @@ def test_to_weekly_resampling(self):
         for l in lines:
             assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq
 
-    @slow
+    @pytest.mark.slow
     def test_from_weekly_resampling(self):
         idxh = date_range('1/1/1999', periods=52, freq='W')
         idxl = date_range('1/1/1999', periods=12, freq='M')
@@ -876,7 +876,7 @@ def test_from_weekly_resampling(self):
             else:
                 tm.assert_numpy_array_equal(xdata, expected_h)
 
-    @slow
+    @pytest.mark.slow
     def test_from_resampling_area_line_mixed(self):
         idxh = date_range('1/1/1999', periods=52, freq='W')
         idxl = date_range('1/1/1999', periods=12, freq='M')
@@ -950,7 +950,7 @@ def test_from_resampling_area_line_mixed(self):
                 tm.assert_numpy_array_equal(l.get_ydata(orig=False),
                                             expected_y)
 
-    @slow
+    @pytest.mark.slow
     def test_mixed_freq_second_millisecond(self):
         # GH 7772, GH 7760
         idxh = date_range('2014-07-01 09:00', freq='S', periods=50)
@@ -974,7 +974,7 @@ def test_mixed_freq_second_millisecond(self):
         for l in ax.get_lines():
             assert PeriodIndex(data=l.get_xdata()).freq == 'L'
 
-    @slow
+    @pytest.mark.slow
     def test_irreg_dtypes(self):
         # date
         idx = [date(2000, 1, 1), date(2000, 1, 5), date(2000, 1, 20)]
@@ -988,7 +988,7 @@ def test_irreg_dtypes(self):
         _, ax = self.plt.subplots()
         _check_plot_works(df.plot, ax=ax)
 
-    @slow
+    @pytest.mark.slow
     def test_time(self):
         t = datetime(1, 1, 1, 3, 30, 0)
         deltas = np.random.randint(1, 20, 3).cumsum()
@@ -1024,7 +1024,7 @@ def test_time(self):
                 rs = time(h, m, s).strftime('%H:%M:%S')
                 assert xp == rs
 
-    @slow
+    @pytest.mark.slow
     def test_time_musec(self):
         t = datetime(1, 1, 1, 3, 30, 0)
         deltas = np.random.randint(1, 20, 3).cumsum()
@@ -1051,7 +1051,7 @@ def test_time_musec(self):
                 rs = time(h, m, s).strftime('%H:%M:%S.%f')
                 assert xp == rs
 
-    @slow
+    @pytest.mark.slow
     def test_secondary_upsample(self):
         idxh = date_range('1/1/1999', periods=365, freq='D')
         idxl = date_range('1/1/1999', periods=12, freq='M')
@@ -1067,7 +1067,7 @@ def test_secondary_upsample(self):
         for l in ax.left_ax.get_lines():
             assert PeriodIndex(l.get_xdata()).freq == 'D'
 
-    @slow
+    @pytest.mark.slow
     def test_secondary_legend(self):
         fig = self.plt.figure()
         ax = fig.add_subplot(211)
@@ -1169,7 +1169,7 @@ def test_format_date_axis(self):
             if len(l.get_text()) > 0:
                 assert l.get_rotation() == 30
 
-    @slow
+    @pytest.mark.slow
     def test_ax_plot(self):
         x = DatetimeIndex(start='2012-01-02', periods=10, freq='D')
         y = lrange(len(x))
@@ -1177,7 +1177,7 @@ def test_ax_plot(self):
         lines = ax.plot(x, y, label='Y')
         tm.assert_index_equal(DatetimeIndex(lines[0].get_xdata()), x)
 
-    @slow
+    @pytest.mark.slow
     def test_mpl_nopandas(self):
         dates = [date(2008, 12, 31), date(2009, 1, 31)]
         values1 = np.arange(10.0, 11.0, 0.5)
@@ -1196,7 +1196,7 @@ def test_mpl_nopandas(self):
         exp = np.array([x.toordinal() for x in dates], dtype=np.float64)
         tm.assert_numpy_array_equal(line2.get_xydata()[:, 0], exp)
 
-    @slow
+    @pytest.mark.slow
     def test_irregular_ts_shared_ax_xlim(self):
         # GH 2960
         ts = tm.makeTimeSeries()[:20]
@@ -1212,7 +1212,7 @@ def test_irregular_ts_shared_ax_xlim(self):
         assert left == ts_irregular.index.min().toordinal()
         assert right == ts_irregular.index.max().toordinal()
 
-    @slow
+    @pytest.mark.slow
     def test_secondary_y_non_ts_xlim(self):
         # GH 3490 - non-timeseries with secondary y
         index_1 = [1, 2, 3, 4]
@@ -1229,7 +1229,7 @@ def test_secondary_y_non_ts_xlim(self):
         assert left_before == left_after
         assert right_before < right_after
 
-    @slow
+    @pytest.mark.slow
     def test_secondary_y_regular_ts_xlim(self):
         # GH 3490 - regular-timeseries with secondary y
         index_1 = date_range(start='2000-01-01', periods=4, freq='D')
@@ -1246,7 +1246,7 @@ def test_secondary_y_regular_ts_xlim(self):
         assert left_before == left_after
         assert right_before < right_after
 
-    @slow
+    @pytest.mark.slow
     def test_secondary_y_mixed_freq_ts_xlim(self):
         # GH 3490 - mixed frequency timeseries with secondary y
         rng = date_range('2000-01-01', periods=10000, freq='min')
@@ -1262,7 +1262,7 @@ def test_secondary_y_mixed_freq_ts_xlim(self):
         assert left_before == left_after
         assert right_before == right_after
 
-    @slow
+    @pytest.mark.slow
     def test_secondary_y_irregular_ts_xlim(self):
         # GH 3490 - irregular-timeseries with secondary y
         ts = tm.makeTimeSeries()[:20]
@@ -1361,7 +1361,7 @@ def test_hist(self):
         _, ax = self.plt.subplots()
         ax.hist([x, x], weights=[w1, w2])
 
-    @slow
+    @pytest.mark.slow
     def test_overlapping_datetime(self):
         # GB 6608
         s1 = Series([1, 2, 3], index=[datetime(1995, 12, 31),
diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py
index ca03bcb060e25..970de6ff881ab 100644
--- a/pandas/tests/plotting/test_deprecated.py
+++ b/pandas/tests/plotting/test_deprecated.py
@@ -4,7 +4,7 @@
 
 import pandas as pd
 import pandas.util.testing as tm
-from pandas.util.testing import slow
+import pytest
 
 from numpy.random import randn
 
@@ -23,7 +23,7 @@
 
 class TestDeprecatedNameSpace(TestPlotBase):
 
-    @slow
+    @pytest.mark.slow
     def test_scatter_plot_legacy(self):
         tm._skip_if_no_scipy()
 
@@ -35,7 +35,7 @@ def test_scatter_plot_legacy(self):
         with tm.assert_produces_warning(FutureWarning):
             pd.scatter_matrix(df)
 
-    @slow
+    @pytest.mark.slow
     def test_boxplot_deprecated(self):
         df = pd.DataFrame(randn(6, 4),
                           index=list(string.ascii_letters[:6]),
@@ -46,13 +46,13 @@ def test_boxplot_deprecated(self):
             plotting.boxplot(df, column=['one', 'two'],
                              by='indic')
 
-    @slow
+    @pytest.mark.slow
     def test_radviz_deprecated(self):
         df = self.iris
         with tm.assert_produces_warning(FutureWarning):
             plotting.radviz(frame=df, class_column='Name')
 
-    @slow
+    @pytest.mark.slow
     def test_plot_params(self):
 
         with tm.assert_produces_warning(FutureWarning):
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 352c03582db93..7878740f64e55 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -15,7 +15,6 @@
 from pandas.compat import range, lrange, lmap, lzip, u, zip, PY3
 from pandas.io.formats.printing import pprint_thing
 import pandas.util.testing as tm
-from pandas.util.testing import slow
 
 import numpy as np
 from numpy.random import rand, randn
@@ -41,7 +40,7 @@ def setup_method(self, method):
                                     "C": np.arange(20) + np.random.uniform(
                                         size=20)})
 
-    @slow
+    @pytest.mark.slow
     def test_plot(self):
         df = self.tdf
         _check_plot_works(df.plot, grid=False)
@@ -188,13 +187,13 @@ def test_nonnumeric_exclude(self):
         ax = df.plot()
         assert len(ax.get_lines()) == 1  # B was plotted
 
-    @slow
+    @pytest.mark.slow
     def test_implicit_label(self):
         df = DataFrame(randn(10, 3), columns=['a', 'b', 'c'])
         ax = df.plot(x='a', y='b')
         self._check_text_labels(ax.xaxis.get_label(), 'a')
 
-    @slow
+    @pytest.mark.slow
     def test_donot_overwrite_index_name(self):
         # GH 8494
         df = DataFrame(randn(2, 2), columns=['a', 'b'])
@@ -202,7 +201,7 @@ def test_donot_overwrite_index_name(self):
         df.plot(y='b', label='LABEL')
         assert df.index.name == 'NAME'
 
-    @slow
+    @pytest.mark.slow
     def test_plot_xy(self):
         # columns.inferred_type == 'string'
         df = self.tdf
@@ -228,7 +227,7 @@ def test_plot_xy(self):
         # columns.inferred_type == 'mixed'
         # TODO add MultiIndex test
 
-    @slow
+    @pytest.mark.slow
     def test_logscales(self):
         df = DataFrame({'a': np.arange(100)}, index=np.arange(100))
         ax = df.plot(logy=True)
@@ -240,7 +239,7 @@ def test_logscales(self):
         ax = df.plot(loglog=True)
         self._check_ax_scales(ax, xaxis='log', yaxis='log')
 
-    @slow
+    @pytest.mark.slow
     def test_xcompat(self):
         import pandas as pd
 
@@ -305,7 +304,7 @@ def test_unsorted_index(self):
         rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name='y')
         tm.assert_series_equal(rs, df.y)
 
-    @slow
+    @pytest.mark.slow
     def test_subplots(self):
         df = DataFrame(np.random.rand(10, 3),
                        index=list(string.ascii_letters[:10]))
@@ -345,7 +344,7 @@ def test_subplots(self):
             for ax in axes:
                 assert ax.get_legend() is None
 
-    @slow
+    @pytest.mark.slow
     def test_subplots_timeseries(self):
         idx = date_range(start='2014-07-01', freq='M', periods=10)
         df = DataFrame(np.random.rand(10, 3), index=idx)
@@ -381,7 +380,7 @@ def test_subplots_timeseries(self):
                 self._check_ticks_props(ax, xlabelsize=7, xrot=45,
                                         ylabelsize=7)
 
-    @slow
+    @pytest.mark.slow
     def test_subplots_layout(self):
         # GH 6667
         df = DataFrame(np.random.rand(10, 3),
@@ -427,7 +426,7 @@ def test_subplots_layout(self):
         self._check_axes_shape(axes, axes_num=1, layout=(3, 3))
         assert axes.shape == (3, 3)
 
-    @slow
+    @pytest.mark.slow
     def test_subplots_warnings(self):
         # GH 9464
         warnings.simplefilter('error')
@@ -442,7 +441,7 @@ def test_subplots_warnings(self):
             self.fail(w)
         warnings.simplefilter('default')
 
-    @slow
+    @pytest.mark.slow
     def test_subplots_multiple_axes(self):
         # GH 5353, 6970, GH 7069
         fig, axes = self.plt.subplots(2, 3)
@@ -543,7 +542,7 @@ def test_subplots_sharex_axes_existing_axes(self):
         for ax in axes.ravel():
             self._check_visible(ax.get_yticklabels(), visible=True)
 
-    @slow
+    @pytest.mark.slow
     def test_subplots_dup_columns(self):
         # GH 10962
         df = DataFrame(np.random.rand(5, 5), columns=list('aaaaa'))
@@ -697,7 +696,7 @@ def test_area_lim(self):
             ymin, ymax = ax.get_ylim()
             assert ymax == 0
 
-    @slow
+    @pytest.mark.slow
     def test_bar_colors(self):
         import matplotlib.pyplot as plt
         default_colors = self._maybe_unpack_cycler(plt.rcParams)
@@ -733,7 +732,7 @@ def test_bar_colors(self):
         self._check_colors(ax.patches[::5], facecolors=['green'] * 5)
         tm.close()
 
-    @slow
+    @pytest.mark.slow
     def test_bar_linewidth(self):
         df = DataFrame(randn(5, 5))
 
@@ -754,7 +753,7 @@ def test_bar_linewidth(self):
             for r in ax.patches:
                 assert r.get_linewidth() == 2
 
-    @slow
+    @pytest.mark.slow
     def test_bar_barwidth(self):
         df = DataFrame(randn(5, 5))
 
@@ -792,7 +791,7 @@ def test_bar_barwidth(self):
             for r in ax.patches:
                 assert r.get_height() == width
 
-    @slow
+    @pytest.mark.slow
     def test_bar_barwidth_position(self):
         df = DataFrame(randn(5, 5))
         self._check_bar_alignment(df, kind='bar', stacked=False, width=0.9,
@@ -808,7 +807,7 @@ def test_bar_barwidth_position(self):
         self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9,
                                   position=0.2)
 
-    @slow
+    @pytest.mark.slow
     def test_bar_barwidth_position_int(self):
         # GH 12979
         df = DataFrame(randn(5, 5))
@@ -828,7 +827,7 @@ def test_bar_barwidth_position_int(self):
         self._check_bar_alignment(df, kind='bar', subplots=True, width=1)
         self._check_bar_alignment(df, kind='barh', subplots=True, width=1)
 
-    @slow
+    @pytest.mark.slow
     def test_bar_bottom_left(self):
         df = DataFrame(rand(5, 5))
         ax = df.plot.bar(stacked=False, bottom=1)
@@ -857,7 +856,7 @@ def test_bar_bottom_left(self):
             result = [p.get_x() for p in ax.patches]
             assert result == [1] * 5
 
-    @slow
+    @pytest.mark.slow
     def test_bar_nan(self):
         df = DataFrame({'A': [10, np.nan, 20],
                         'B': [5, 10, 20],
@@ -875,7 +874,7 @@ def test_bar_nan(self):
         expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0]
         assert result == expected
 
-    @slow
+    @pytest.mark.slow
     def test_bar_categorical(self):
         # GH 13019
         df1 = pd.DataFrame(np.random.randn(6, 5),
@@ -901,7 +900,7 @@ def test_bar_categorical(self):
             assert ax.patches[0].get_x() == -0.25
             assert ax.patches[-1].get_x() == 4.75
 
-    @slow
+    @pytest.mark.slow
     def test_plot_scatter(self):
         df = DataFrame(randn(6, 4),
                        index=list(string.ascii_letters[:6]),
@@ -919,7 +918,7 @@ def test_plot_scatter(self):
         axes = df.plot(x='x', y='y', kind='scatter', subplots=True)
         self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
 
-    @slow
+    @pytest.mark.slow
     def test_plot_scatter_with_categorical_data(self):
         # GH 16199
         df = pd.DataFrame({'x': [1, 2, 3, 4],
@@ -937,7 +936,7 @@ def test_plot_scatter_with_categorical_data(self):
             df.plot(x='y', y='y', kind='scatter')
         ve.match('requires x column to be numeric')
 
-    @slow
+    @pytest.mark.slow
     def test_plot_scatter_with_c(self):
         df = DataFrame(randn(6, 4),
                        index=list(string.ascii_letters[:6]),
@@ -1007,7 +1006,7 @@ def test_scatter_colors(self):
         tm.assert_numpy_array_equal(ax.collections[0].get_facecolor()[0],
                                     np.array([1, 1, 1, 1], dtype=np.float64))
 
-    @slow
+    @pytest.mark.slow
     def test_plot_bar(self):
         df = DataFrame(randn(6, 4),
                        index=list(string.ascii_letters[:6]),
@@ -1098,7 +1097,7 @@ def _check_bar_alignment(self, df, kind='bar', stacked=False,
 
         return axes
 
-    @slow
+    @pytest.mark.slow
     def test_bar_stacked_center(self):
         # GH2157
         df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5))
@@ -1107,7 +1106,7 @@ def test_bar_stacked_center(self):
         self._check_bar_alignment(df, kind='barh', stacked=True)
         self._check_bar_alignment(df, kind='barh', stacked=True, width=0.9)
 
-    @slow
+    @pytest.mark.slow
     def test_bar_center(self):
         df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5))
         self._check_bar_alignment(df, kind='bar', stacked=False)
@@ -1115,7 +1114,7 @@ def test_bar_center(self):
         self._check_bar_alignment(df, kind='barh', stacked=False)
         self._check_bar_alignment(df, kind='barh', stacked=False, width=0.9)
 
-    @slow
+    @pytest.mark.slow
     def test_bar_subplots_center(self):
         df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5))
         self._check_bar_alignment(df, kind='bar', subplots=True)
@@ -1123,7 +1122,7 @@ def test_bar_subplots_center(self):
         self._check_bar_alignment(df, kind='barh', subplots=True)
         self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9)
 
-    @slow
+    @pytest.mark.slow
     def test_bar_align_single_column(self):
         df = DataFrame(randn(5))
         self._check_bar_alignment(df, kind='bar', stacked=False)
@@ -1133,7 +1132,7 @@ def test_bar_align_single_column(self):
         self._check_bar_alignment(df, kind='bar', subplots=True)
         self._check_bar_alignment(df, kind='barh', subplots=True)
 
-    @slow
+    @pytest.mark.slow
     def test_bar_edge(self):
         df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5))
 
@@ -1158,7 +1157,7 @@ def test_bar_edge(self):
         self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9,
                                   align='edge')
 
-    @slow
+    @pytest.mark.slow
     def test_bar_log_no_subplots(self):
         # GH3254, GH3298 matplotlib/matplotlib#1882, #1892
         # regressions in 1.2.1
@@ -1172,7 +1171,7 @@ def test_bar_log_no_subplots(self):
         ax = df.plot.bar(grid=True, log=True)
         tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected)
 
-    @slow
+    @pytest.mark.slow
     def test_bar_log_subplots(self):
         expected = np.array([1., 10., 100., 1000.])
         if not self.mpl_le_1_2_1:
@@ -1184,7 +1183,7 @@ def test_bar_log_subplots(self):
         tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected)
         tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected)
 
-    @slow
+    @pytest.mark.slow
     def test_boxplot(self):
         df = self.hist_df
         series = df['height']
@@ -1222,7 +1221,7 @@ def test_boxplot(self):
         tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions)
         assert len(ax.lines) == self.bp_n_objects * len(numeric_cols)
 
-    @slow
+    @pytest.mark.slow
     def test_boxplot_vertical(self):
         df = self.hist_df
         numeric_cols = df._get_numeric_data().columns
@@ -1250,7 +1249,7 @@ def test_boxplot_vertical(self):
         tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions)
         assert len(ax.lines) == self.bp_n_objects * len(numeric_cols)
 
-    @slow
+    @pytest.mark.slow
     def test_boxplot_return_type(self):
         df = DataFrame(randn(6, 4),
                        index=list(string.ascii_letters[:6]),
@@ -1270,7 +1269,7 @@ def test_boxplot_return_type(self):
         result = df.plot.box(return_type='both')
         self._check_box_return_type(result, 'both')
 
-    @slow
+    @pytest.mark.slow
     def test_boxplot_subplots_return_type(self):
         df = self.hist_df
 
@@ -1287,7 +1286,7 @@ def test_boxplot_subplots_return_type(self):
                 expected_keys=['height', 'weight', 'category'],
                 check_ax_title=False)
 
-    @slow
+    @pytest.mark.slow
     def test_kde_df(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
@@ -1308,7 +1307,7 @@ def test_kde_df(self):
         axes = df.plot(kind='kde', logy=True, subplots=True)
         self._check_ax_scales(axes, yaxis='log')
 
-    @slow
+    @pytest.mark.slow
     def test_kde_missing_vals(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
@@ -1316,7 +1315,7 @@ def test_kde_missing_vals(self):
         df.loc[0, 0] = np.nan
         _check_plot_works(df.plot, kind='kde')
 
-    @slow
+    @pytest.mark.slow
     def test_hist_df(self):
         from matplotlib.patches import Rectangle
         if self.mpl_le_1_2_1:
@@ -1376,7 +1375,7 @@ def _check_box_coord(self, patches, expected_y=None, expected_h=None,
             tm.assert_numpy_array_equal(result_width, expected_w,
                                         check_dtype=False)
 
-    @slow
+    @pytest.mark.slow
     def test_hist_df_coord(self):
         normal_df = DataFrame({'A': np.repeat(np.array([1, 2, 3, 4, 5]),
                                               np.array([10, 9, 8, 7, 6])),
@@ -1467,12 +1466,12 @@ def test_hist_df_coord(self):
                                       expected_x=np.array([0, 0, 0, 0, 0]),
                                       expected_w=np.array([6, 7, 8, 9, 10]))
 
-    @slow
+    @pytest.mark.slow
     def test_plot_int_columns(self):
         df = DataFrame(randn(100, 4)).cumsum()
         _check_plot_works(df.plot, legend=True)
 
-    @slow
+    @pytest.mark.slow
     def test_df_legend_labels(self):
         kinds = ['line', 'bar', 'barh', 'kde', 'area', 'hist']
         df = DataFrame(rand(3, 3), columns=['a', 'b', 'c'])
@@ -1565,7 +1564,7 @@ def test_legend_name(self):
         leg_title = ax.legend_.get_title()
         self._check_text_labels(leg_title, 'new')
 
-    @slow
+    @pytest.mark.slow
     def test_no_legend(self):
         kinds = ['line', 'bar', 'barh', 'kde', 'area', 'hist']
         df = DataFrame(rand(3, 3), columns=['a', 'b', 'c'])
@@ -1577,7 +1576,7 @@ def test_no_legend(self):
             ax = df.plot(kind=kind, legend=False)
             self._check_legend_labels(ax, visible=False)
 
-    @slow
+    @pytest.mark.slow
     def test_style_by_column(self):
         import matplotlib.pyplot as plt
         fig = plt.gcf()
@@ -1593,7 +1592,7 @@ def test_style_by_column(self):
             for i, l in enumerate(ax.get_lines()[:len(markers)]):
                 assert l.get_marker() == markers[i]
 
-    @slow
+    @pytest.mark.slow
     def test_line_label_none(self):
         s = Series([1, 2])
         ax = s.plot()
@@ -1602,7 +1601,7 @@ def test_line_label_none(self):
         ax = s.plot(legend=True)
         assert ax.get_legend().get_texts()[0].get_text() == 'None'
 
-    @slow
+    @pytest.mark.slow
     @tm.capture_stdout
     def test_line_colors(self):
         from matplotlib import cm
@@ -1654,13 +1653,13 @@ def test_line_colors(self):
             # Forced show plot
             _check_plot_works(df.plot, color=custom_colors)
 
-    @slow
+    @pytest.mark.slow
     def test_dont_modify_colors(self):
         colors = ['r', 'g', 'b']
         pd.DataFrame(np.random.rand(10, 2)).plot(color=colors)
         assert len(colors) == 3
 
-    @slow
+    @pytest.mark.slow
     def test_line_colors_and_styles_subplots(self):
         # GH 9894
         from matplotlib import cm
@@ -1738,7 +1737,7 @@ def test_line_colors_and_styles_subplots(self):
             self._check_colors(ax.get_lines(), linecolors=[c])
         tm.close()
 
-    @slow
+    @pytest.mark.slow
     def test_area_colors(self):
         from matplotlib import cm
         from matplotlib.collections import PolyCollection
@@ -1798,7 +1797,7 @@ def test_area_colors(self):
         for h in handles:
             assert h.get_alpha() == 0.5
 
-    @slow
+    @pytest.mark.slow
     def test_hist_colors(self):
         default_colors = self._maybe_unpack_cycler(self.plt.rcParams)
 
@@ -1832,7 +1831,7 @@ def test_hist_colors(self):
         self._check_colors(ax.patches[::10], facecolors=['green'] * 5)
         tm.close()
 
-    @slow
+    @pytest.mark.slow
     def test_kde_colors(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
@@ -1855,7 +1854,7 @@ def test_kde_colors(self):
         rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df)))
         self._check_colors(ax.get_lines(), linecolors=rgba_colors)
 
-    @slow
+    @pytest.mark.slow
     def test_kde_colors_and_styles_subplots(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
@@ -1914,7 +1913,7 @@ def test_kde_colors_and_styles_subplots(self):
             self._check_colors(ax.get_lines(), linecolors=[c])
         tm.close()
 
-    @slow
+    @pytest.mark.slow
     def test_boxplot_colors(self):
         def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c='k',
                           fliers_c=None):
@@ -2025,7 +2024,7 @@ def test_all_invalid_plot_data(self):
             with pytest.raises(TypeError):
                 df.plot(kind=kind)
 
-    @slow
+    @pytest.mark.slow
     def test_partially_invalid_plot_data(self):
         with tm.RNGContext(42):
             df = DataFrame(randn(10, 2), dtype=object)
@@ -2050,7 +2049,7 @@ def test_invalid_kind(self):
         with pytest.raises(ValueError):
             df.plot(kind='aasdf')
 
-    @slow
+    @pytest.mark.slow
     def test_hexbin_basic(self):
         df = self.hexbin_df
 
@@ -2066,7 +2065,7 @@ def test_hexbin_basic(self):
         # return value is single axes
         self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
 
-    @slow
+    @pytest.mark.slow
     def test_hexbin_with_c(self):
         df = self.hexbin_df
 
@@ -2076,7 +2075,7 @@ def test_hexbin_with_c(self):
         ax = df.plot.hexbin(x='A', y='B', C='C', reduce_C_function=np.std)
         assert len(ax.collections) == 1
 
-    @slow
+    @pytest.mark.slow
     def test_hexbin_cmap(self):
         df = self.hexbin_df
 
@@ -2088,14 +2087,14 @@ def test_hexbin_cmap(self):
         ax = df.plot.hexbin(x='A', y='B', colormap=cm)
         assert ax.collections[0].cmap.name == cm
 
-    @slow
+    @pytest.mark.slow
     def test_no_color_bar(self):
         df = self.hexbin_df
 
         ax = df.plot.hexbin(x='A', y='B', colorbar=None)
         assert ax.collections[0].colorbar is None
 
-    @slow
+    @pytest.mark.slow
     def test_allow_cmap(self):
         df = self.hexbin_df
 
@@ -2105,7 +2104,7 @@ def test_allow_cmap(self):
         with pytest.raises(TypeError):
             df.plot.hexbin(x='A', y='B', cmap='YlGn', colormap='BuGn')
 
-    @slow
+    @pytest.mark.slow
     def test_pie_df(self):
         df = DataFrame(np.random.rand(5, 3), columns=['X', 'Y', 'Z'],
                        index=['a', 'b', 'c', 'd', 'e'])
@@ -2159,7 +2158,7 @@ def test_pie_df_nan(self):
             assert ([x.get_text() for x in ax.get_legend().get_texts()] ==
                     base_expected[:i] + base_expected[i + 1:])
 
-    @slow
+    @pytest.mark.slow
     def test_errorbar_plot(self):
         d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)}
         df = DataFrame(d)
@@ -2227,7 +2226,7 @@ def test_errorbar_plot(self):
         with pytest.raises((ValueError, TypeError)):
             df.plot(yerr=df_err)
 
-    @slow
+    @pytest.mark.slow
     def test_errorbar_with_integer_column_names(self):
         # test with integer column names
         df = DataFrame(np.random.randn(10, 2))
@@ -2237,7 +2236,7 @@ def test_errorbar_with_integer_column_names(self):
         ax = _check_plot_works(df.plot, y=0, yerr=1)
         self._check_has_errorbars(ax, xerr=0, yerr=1)
 
-    @slow
+    @pytest.mark.slow
     def test_errorbar_with_partial_columns(self):
         df = DataFrame(np.random.randn(10, 3))
         df_err = DataFrame(np.random.randn(10, 2), columns=[0, 2])
@@ -2260,7 +2259,7 @@ def test_errorbar_with_partial_columns(self):
             ax = _check_plot_works(df.plot, yerr=err)
             self._check_has_errorbars(ax, xerr=0, yerr=1)
 
-    @slow
+    @pytest.mark.slow
     def test_errorbar_timeseries(self):
 
         d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)}
@@ -2370,7 +2369,7 @@ def _check_errorbar_color(containers, expected, has_err='has_xerr'):
         self._check_has_errorbars(ax, xerr=0, yerr=1)
         _check_errorbar_color(ax.containers, 'green', has_err='has_yerr')
 
-    @slow
+    @pytest.mark.slow
     def test_sharex_and_ax(self):
         # https://github.com/pandas-dev/pandas/issues/9737 using gridspec,
         # the axis in fig.get_axis() are sorted differently than pandas
@@ -2422,7 +2421,7 @@ def _check(axes):
             self._check_visible(ax.get_xticklabels(minor=True), visible=True)
         tm.close()
 
-    @slow
+    @pytest.mark.slow
     def test_sharey_and_ax(self):
         # https://github.com/pandas-dev/pandas/issues/9737 using gridspec,
         # the axis in fig.get_axis() are sorted differently than pandas
@@ -2505,7 +2504,7 @@ def test_memory_leak(self):
                 # need to actually access something to get an error
                 results[key].lines
 
-    @slow
+    @pytest.mark.slow
     def test_df_subplots_patterns_minorticks(self):
         # GH 10657
         import matplotlib.pyplot as plt
@@ -2550,7 +2549,7 @@ def test_df_subplots_patterns_minorticks(self):
             self._check_visible(ax.get_xticklabels(minor=True), visible=True)
         tm.close()
 
-    @slow
+    @pytest.mark.slow
     def test_df_gridspec_patterns(self):
         # GH 10819
         import matplotlib.pyplot as plt
@@ -2673,7 +2672,7 @@ def _get_boxed_grid():
             self._check_visible(ax.get_xticklabels(minor=True), visible=True)
         tm.close()
 
-    @slow
+    @pytest.mark.slow
     def test_df_grid_settings(self):
         # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792
         self._check_grid_settings(
diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py
index 17a75e5cb287c..5f7b2dd2d6ca9 100644
--- a/pandas/tests/plotting/test_hist_method.py
+++ b/pandas/tests/plotting/test_hist_method.py
@@ -6,7 +6,6 @@
 
 from pandas import Series, DataFrame
 import pandas.util.testing as tm
-from pandas.util.testing import slow
 
 import numpy as np
 from numpy.random import randn
@@ -28,7 +27,7 @@ def setup_method(self, method):
         self.ts = tm.makeTimeSeries()
         self.ts.name = 'ts'
 
-    @slow
+    @pytest.mark.slow
     def test_hist_legacy(self):
         _check_plot_works(self.ts.hist)
         _check_plot_works(self.ts.hist, grid=False)
@@ -52,13 +51,13 @@ def test_hist_legacy(self):
         with pytest.raises(ValueError):
             self.ts.hist(by=self.ts.index, figure=fig)
 
-    @slow
+    @pytest.mark.slow
     def test_hist_bins_legacy(self):
         df = DataFrame(np.random.randn(10, 2))
         ax = df.hist(bins=2)[0][0]
         assert len(ax.patches) == 2
 
-    @slow
+    @pytest.mark.slow
     def test_hist_layout(self):
         df = self.hist_df
         with pytest.raises(ValueError):
@@ -67,7 +66,7 @@ def test_hist_layout(self):
         with pytest.raises(ValueError):
             df.height.hist(layout=[1, 1])
 
-    @slow
+    @pytest.mark.slow
     def test_hist_layout_with_by(self):
         df = self.hist_df
 
@@ -113,7 +112,7 @@ def test_hist_layout_with_by(self):
         self._check_axes_shape(
             axes, axes_num=4, layout=(4, 2), figsize=(12, 7))
 
-    @slow
+    @pytest.mark.slow
     def test_hist_no_overlap(self):
         from matplotlib.pyplot import subplot, gcf
         x = Series(randn(2))
@@ -126,13 +125,13 @@ def test_hist_no_overlap(self):
         axes = fig.axes if self.mpl_ge_1_5_0 else fig.get_axes()
         assert len(axes) == 2
 
-    @slow
+    @pytest.mark.slow
     def test_hist_by_no_extra_plots(self):
         df = self.hist_df
         axes = df.height.hist(by=df.gender)  # noqa
         assert len(self.plt.get_fignums()) == 1
 
-    @slow
+    @pytest.mark.slow
     def test_plot_fails_when_ax_differs_from_figure(self):
         from pylab import figure
         fig1 = figure()
@@ -144,7 +143,7 @@ def test_plot_fails_when_ax_differs_from_figure(self):
 
 class TestDataFramePlots(TestPlotBase):
 
-    @slow
+    @pytest.mark.slow
     def test_hist_df_legacy(self):
         from matplotlib.patches import Rectangle
         with tm.assert_produces_warning(UserWarning):
@@ -210,7 +209,7 @@ def test_hist_df_legacy(self):
         with pytest.raises(AttributeError):
             ser.hist(foo='bar')
 
-    @slow
+    @pytest.mark.slow
     def test_hist_layout(self):
         df = DataFrame(randn(100, 3))
 
@@ -241,7 +240,7 @@ def test_hist_layout(self):
         with pytest.raises(ValueError):
             df.hist(layout=(-1, -1))
 
-    @slow
+    @pytest.mark.slow
     # GH 9351
     def test_tight_layout(self):
         if self.mpl_ge_2_0_1:
@@ -254,7 +253,7 @@ def test_tight_layout(self):
 
 class TestDataFrameGroupByPlots(TestPlotBase):
 
-    @slow
+    @pytest.mark.slow
     def test_grouped_hist_legacy(self):
         from matplotlib.patches import Rectangle
 
@@ -303,7 +302,7 @@ def test_grouped_hist_legacy(self):
         with tm.assert_produces_warning(FutureWarning):
             df.hist(by='C', figsize='default')
 
-    @slow
+    @pytest.mark.slow
     def test_grouped_hist_legacy2(self):
         n = 10
         weight = Series(np.random.normal(166, 20, size=n))
@@ -318,7 +317,7 @@ def test_grouped_hist_legacy2(self):
         assert len(self.plt.get_fignums()) == 2
         tm.close()
 
-    @slow
+    @pytest.mark.slow
     def test_grouped_hist_layout(self):
         df = self.hist_df
         pytest.raises(ValueError, df.hist, column='weight', by=df.gender,
@@ -367,7 +366,7 @@ def test_grouped_hist_layout(self):
         axes = df.hist(column=['height', 'weight', 'category'])
         self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
 
-    @slow
+    @pytest.mark.slow
     def test_grouped_hist_multiple_axes(self):
         # GH 6970, GH 7069
         df = self.hist_df
@@ -387,7 +386,7 @@ def test_grouped_hist_multiple_axes(self):
             # pass different number of axes from required
             axes = df.hist(column='height', ax=axes)
 
-    @slow
+    @pytest.mark.slow
     def test_axis_share_x(self):
         df = self.hist_df
         # GH4089
@@ -401,7 +400,7 @@ def test_axis_share_x(self):
         assert not ax1._shared_y_axes.joined(ax1, ax2)
         assert not ax2._shared_y_axes.joined(ax1, ax2)
 
-    @slow
+    @pytest.mark.slow
     def test_axis_share_y(self):
         df = self.hist_df
         ax1, ax2 = df.hist(column='height', by=df.gender, sharey=True)
@@ -414,7 +413,7 @@ def test_axis_share_y(self):
         assert not ax1._shared_x_axes.joined(ax1, ax2)
         assert not ax2._shared_x_axes.joined(ax1, ax2)
 
-    @slow
+    @pytest.mark.slow
     def test_axis_share_xy(self):
         df = self.hist_df
         ax1, ax2 = df.hist(column='height', by=df.gender, sharex=True,
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
index d93ad90a36a9c..684a943fb5a69 100644
--- a/pandas/tests/plotting/test_misc.py
+++ b/pandas/tests/plotting/test_misc.py
@@ -7,7 +7,6 @@
 from pandas import Series, DataFrame
 from pandas.compat import lmap
 import pandas.util.testing as tm
-from pandas.util.testing import slow
 
 import numpy as np
 from numpy import random
@@ -30,7 +29,7 @@ def setup_method(self, method):
         self.ts = tm.makeTimeSeries()
         self.ts.name = 'ts'
 
-    @slow
+    @pytest.mark.slow
     def test_autocorrelation_plot(self):
         from pandas.plotting import autocorrelation_plot
         _check_plot_works(autocorrelation_plot, series=self.ts)
@@ -39,13 +38,13 @@ def test_autocorrelation_plot(self):
         ax = autocorrelation_plot(self.ts, label='Test')
         self._check_legend_labels(ax, labels=['Test'])
 
-    @slow
+    @pytest.mark.slow
     def test_lag_plot(self):
         from pandas.plotting import lag_plot
         _check_plot_works(lag_plot, series=self.ts)
         _check_plot_works(lag_plot, series=self.ts, lag=5)
 
-    @slow
+    @pytest.mark.slow
     def test_bootstrap_plot(self):
         from pandas.plotting import bootstrap_plot
         _check_plot_works(bootstrap_plot, series=self.ts, size=10)
@@ -53,7 +52,7 @@ def test_bootstrap_plot(self):
 
 class TestDataFramePlots(TestPlotBase):
 
-    @slow
+    @pytest.mark.slow
     def test_scatter_plot_legacy(self):
         tm._skip_if_no_scipy()
 
@@ -130,7 +129,7 @@ def test_scatter_matrix_axis(self):
         self._check_ticks_props(
             axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0)
 
-    @slow
+    @pytest.mark.slow
     def test_andrews_curves(self):
         from pandas.plotting import andrews_curves
         from matplotlib import cm
@@ -195,7 +194,7 @@ def test_andrews_curves(self):
         with tm.assert_produces_warning(FutureWarning):
             andrews_curves(data=df, class_column='Name')
 
-    @slow
+    @pytest.mark.slow
     def test_parallel_coordinates(self):
         from pandas.plotting import parallel_coordinates
         from matplotlib import cm
@@ -263,7 +262,7 @@ def test_parallel_coordinates_with_sorted_labels(self):
             # lables and colors are ordered strictly increasing
             assert prev[1] < nxt[1] and prev[0] < nxt[0]
 
-    @slow
+    @pytest.mark.slow
     def test_radviz(self):
         from pandas.plotting import radviz
         from matplotlib import cm
@@ -301,7 +300,7 @@ def test_radviz(self):
         handles, labels = ax.get_legend_handles_labels()
         self._check_colors(handles, facecolors=colors)
 
-    @slow
+    @pytest.mark.slow
     def test_subplot_titles(self):
         df = self.iris.drop('Name', axis=1).head()
         # Use the column names as the subplot titles
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
index 7c66b5dafb9c7..9c9011ba1ca7b 100644
--- a/pandas/tests/plotting/test_series.py
+++ b/pandas/tests/plotting/test_series.py
@@ -12,7 +12,6 @@
 from pandas import Series, DataFrame, date_range
 from pandas.compat import range, lrange
 import pandas.util.testing as tm
-from pandas.util.testing import slow
 
 import numpy as np
 from numpy.random import randn
@@ -41,7 +40,7 @@ def setup_method(self, method):
         self.iseries = tm.makePeriodSeries()
         self.iseries.name = 'iseries'
 
-    @slow
+    @pytest.mark.slow
     def test_plot(self):
         _check_plot_works(self.ts.plot, label='foo')
         _check_plot_works(self.ts.plot, use_index=False)
@@ -79,7 +78,7 @@ def test_plot(self):
         ax = _check_plot_works(self.ts.plot, subplots=True, layout=(1, -1))
         self._check_axes_shape(ax, axes_num=1, layout=(1, 1))
 
-    @slow
+    @pytest.mark.slow
     def test_plot_figsize_and_title(self):
         # figsize and title
         _, ax = self.plt.subplots()
@@ -210,7 +209,7 @@ def test_line_use_index_false(self):
         label2 = ax2.get_xlabel()
         assert label2 == ''
 
-    @slow
+    @pytest.mark.slow
     def test_bar_log(self):
         expected = np.array([1., 10., 100., 1000.])
 
@@ -252,7 +251,7 @@ def test_bar_log(self):
         tm.assert_almost_equal(res[1], ymax)
         tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), expected)
 
-    @slow
+    @pytest.mark.slow
     def test_bar_ignore_index(self):
         df = Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
         _, ax = self.plt.subplots()
@@ -280,7 +279,7 @@ def test_irregular_datetime(self):
         ax.set_xlim('1/1/1999', '1/1/2001')
         assert xp == ax.get_xlim()[0]
 
-    @slow
+    @pytest.mark.slow
     def test_pie_series(self):
         # if sum of values is less than 1.0, pie handle them as rate and draw
         # semicircle.
@@ -339,14 +338,14 @@ def test_pie_nan(self):
         result = [x.get_text() for x in ax.texts]
         assert result == expected
 
-    @slow
+    @pytest.mark.slow
     def test_hist_df_kwargs(self):
         df = DataFrame(np.random.randn(10, 2))
         _, ax = self.plt.subplots()
         ax = df.plot.hist(bins=5, ax=ax)
         assert len(ax.patches) == 10
 
-    @slow
+    @pytest.mark.slow
     def test_hist_df_with_nonnumerics(self):
         # GH 9853
         with tm.RNGContext(1):
@@ -361,7 +360,7 @@ def test_hist_df_with_nonnumerics(self):
         ax = df.plot.hist(ax=ax)  # bins=10
         assert len(ax.patches) == 40
 
-    @slow
+    @pytest.mark.slow
     def test_hist_legacy(self):
         _check_plot_works(self.ts.hist)
         _check_plot_works(self.ts.hist, grid=False)
@@ -387,13 +386,13 @@ def test_hist_legacy(self):
         with pytest.raises(ValueError):
             self.ts.hist(by=self.ts.index, figure=fig)
 
-    @slow
+    @pytest.mark.slow
     def test_hist_bins_legacy(self):
         df = DataFrame(np.random.randn(10, 2))
         ax = df.hist(bins=2)[0][0]
         assert len(ax.patches) == 2
 
-    @slow
+    @pytest.mark.slow
     def test_hist_layout(self):
         df = self.hist_df
         with pytest.raises(ValueError):
@@ -402,7 +401,7 @@ def test_hist_layout(self):
         with pytest.raises(ValueError):
             df.height.hist(layout=[1, 1])
 
-    @slow
+    @pytest.mark.slow
     def test_hist_layout_with_by(self):
         df = self.hist_df
 
@@ -446,7 +445,7 @@ def test_hist_layout_with_by(self):
         self._check_axes_shape(axes, axes_num=4, layout=(4, 2),
                                figsize=(12, 7))
 
-    @slow
+    @pytest.mark.slow
     def test_hist_no_overlap(self):
         from matplotlib.pyplot import subplot, gcf
         x = Series(randn(2))
@@ -459,7 +458,7 @@ def test_hist_no_overlap(self):
         axes = fig.axes if self.mpl_ge_1_5_0 else fig.get_axes()
         assert len(axes) == 2
 
-    @slow
+    @pytest.mark.slow
     def test_hist_secondary_legend(self):
         # GH 9610
         df = DataFrame(np.random.randn(30, 4), columns=list('abcd'))
@@ -499,7 +498,7 @@ def test_hist_secondary_legend(self):
         assert ax.get_yaxis().get_visible()
         tm.close()
 
-    @slow
+    @pytest.mark.slow
     def test_df_series_secondary_legend(self):
         # GH 9779
         df = DataFrame(np.random.randn(30, 3), columns=list('abc'))
@@ -563,14 +562,14 @@ def test_df_series_secondary_legend(self):
         assert ax.get_yaxis().get_visible()
         tm.close()
 
-    @slow
+    @pytest.mark.slow
     def test_plot_fails_with_dupe_color_and_style(self):
         x = Series(randn(2))
         with pytest.raises(ValueError):
             _, ax = self.plt.subplots()
             x.plot(style='k--', color='k', ax=ax)
 
-    @slow
+    @pytest.mark.slow
     def test_hist_kde(self):
         _, ax = self.plt.subplots()
         ax = self.ts.plot.hist(logy=True, ax=ax)
@@ -593,7 +592,7 @@ def test_hist_kde(self):
         ylabels = ax.get_yticklabels()
         self._check_text_labels(ylabels, [''] * len(ylabels))
 
-    @slow
+    @pytest.mark.slow
     def test_kde_kwargs(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
@@ -608,7 +607,7 @@ def test_kde_kwargs(self):
         self._check_ax_scales(ax, yaxis='log')
         self._check_text_labels(ax.yaxis.get_label(), 'Density')
 
-    @slow
+    @pytest.mark.slow
     def test_kde_missing_vals(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
@@ -619,7 +618,7 @@ def test_kde_missing_vals(self):
         # gh-14821: check if the values have any missing values
         assert any(~np.isnan(axes.lines[0].get_xdata()))
 
-    @slow
+    @pytest.mark.slow
     def test_hist_kwargs(self):
         _, ax = self.plt.subplots()
         ax = self.ts.plot.hist(bins=5, ax=ax)
@@ -637,7 +636,7 @@ def test_hist_kwargs(self):
             ax = self.ts.plot.hist(align='left', stacked=True, ax=ax)
             tm.close()
 
-    @slow
+    @pytest.mark.slow
     def test_hist_kde_color(self):
         _, ax = self.plt.subplots()
         ax = self.ts.plot.hist(logy=True, bins=10, color='b', ax=ax)
@@ -654,7 +653,7 @@ def test_hist_kde_color(self):
         assert len(lines) == 1
         self._check_colors(lines, ['r'])
 
-    @slow
+    @pytest.mark.slow
     def test_boxplot_series(self):
         _, ax = self.plt.subplots()
         ax = self.ts.plot.box(logy=True, ax=ax)
@@ -664,7 +663,7 @@ def test_boxplot_series(self):
         ylabels = ax.get_yticklabels()
         self._check_text_labels(ylabels, [''] * len(ylabels))
 
-    @slow
+    @pytest.mark.slow
     def test_kind_both_ways(self):
         s = Series(range(3))
         kinds = (plotting._core._common_kinds +
@@ -676,7 +675,7 @@ def test_kind_both_ways(self):
             s.plot(kind=kind, ax=ax)
             getattr(s.plot, kind)()
 
-    @slow
+    @pytest.mark.slow
     def test_invalid_plot_data(self):
         s = Series(list('abcd'))
         _, ax = self.plt.subplots()
@@ -686,7 +685,7 @@ def test_invalid_plot_data(self):
             with pytest.raises(TypeError):
                 s.plot(kind=kind, ax=ax)
 
-    @slow
+    @pytest.mark.slow
     def test_valid_object_plot(self):
         s = Series(lrange(10), dtype=object)
         for kind in plotting._core._common_kinds:
@@ -708,7 +707,7 @@ def test_invalid_kind(self):
         with pytest.raises(ValueError):
             s.plot(kind='aasdf')
 
-    @slow
+    @pytest.mark.slow
     def test_dup_datetime_index_plot(self):
         dr1 = date_range('1/1/2009', periods=4)
         dr2 = date_range('1/2/2009', periods=4)
@@ -717,7 +716,7 @@ def test_dup_datetime_index_plot(self):
         s = Series(values, index=index)
         _check_plot_works(s.plot)
 
-    @slow
+    @pytest.mark.slow
     def test_errorbar_plot(self):
 
         s = Series(np.arange(10), name='x')
@@ -764,14 +763,14 @@ def test_table(self):
         _check_plot_works(self.series.plot, table=True)
         _check_plot_works(self.series.plot, table=self.series)
 
-    @slow
+    @pytest.mark.slow
     def test_series_grid_settings(self):
         # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792
         self._check_grid_settings(Series([1, 2, 3]),
                                   plotting._core._series_kinds +
                                   plotting._core._common_kinds)
 
-    @slow
+    @pytest.mark.slow
     def test_standard_colors(self):
         from pandas.plotting._style import _get_standard_colors
 
@@ -788,7 +787,7 @@ def test_standard_colors(self):
             result = _get_standard_colors(3, color=[c])
             assert result == [c] * 3
 
-    @slow
+    @pytest.mark.slow
     def test_standard_colors_all(self):
         import matplotlib.colors as colors
         from pandas.plotting._style import _get_standard_colors
diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py
index 7774d10c5eaf8..6d8a54b538237 100644
--- a/pandas/tests/series/test_indexing.py
+++ b/pandas/tests/series/test_indexing.py
@@ -20,8 +20,7 @@
 
 from pandas.compat import lrange, range
 from pandas import compat
-from pandas.util.testing import (slow,
-                                 assert_series_equal,
+from pandas.util.testing import (assert_series_equal,
                                  assert_almost_equal,
                                  assert_frame_equal)
 import pandas.util.testing as tm
@@ -2592,7 +2591,7 @@ def test_series_set_value(self):
         # s2 = s.set_value(dates[1], index[1])
         # assert s2.values.dtype == 'M8[ns]'
 
-    @slow
+    @pytest.mark.slow
     def test_slice_locs_indexerror(self):
         times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10)
                  for i in range(100000)]
diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py
index 08c3a25e66b0e..2b972477ae999 100644
--- a/pandas/tests/test_expressions.py
+++ b/pandas/tests/test_expressions.py
@@ -16,7 +16,7 @@
 from pandas import compat, _np_version_under1p11, _np_version_under1p13
 from pandas.util.testing import (assert_almost_equal, assert_series_equal,
                                  assert_frame_equal, assert_panel_equal,
-                                 assert_panel4d_equal, slow)
+                                 assert_panel4d_equal)
 from pandas.io.formats.printing import pprint_thing
 import pandas.util.testing as tm
 
@@ -196,7 +196,7 @@ def test_integer_arithmetic_frame(self):
     def test_integer_arithmetic_series(self):
         self.run_series(self.integer.iloc[:, 0], self.integer.iloc[:, 0])
 
-    @slow
+    @pytest.mark.slow
     def test_integer_panel(self):
         self.run_panel(_integer2_panel, np.random.randint(1, 100))
 
@@ -206,11 +206,11 @@ def test_float_arithemtic_frame(self):
     def test_float_arithmetic_series(self):
         self.run_series(self.frame2.iloc[:, 0], self.frame2.iloc[:, 0])
 
-    @slow
+    @pytest.mark.slow
     def test_float_panel(self):
         self.run_panel(_frame2_panel, np.random.randn() + 0.1, binary_comp=0.8)
 
-    @slow
+    @pytest.mark.slow
     def test_panel4d(self):
         with catch_warnings(record=True):
             self.run_panel(tm.makePanel4D(), np.random.randn() + 0.5,
@@ -226,7 +226,7 @@ def test_mixed_arithmetic_series(self):
         for col in self.mixed2.columns:
             self.run_series(self.mixed2[col], self.mixed2[col], binary_comp=4)
 
-    @slow
+    @pytest.mark.slow
     def test_mixed_panel(self):
         self.run_panel(_mixed2_panel, np.random.randint(1, 100),
                        binary_comp=-2)
diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
index 3ba5d2065cddf..dd35e4375841e 100644
--- a/pandas/tests/test_window.py
+++ b/pandas/tests/test_window.py
@@ -2155,7 +2155,7 @@ def _non_null_values(x):
                                 assert_equal(cov_x_y, mean_x_times_y -
                                              (mean_x * mean_y))
 
-    @tm.slow
+    @pytest.mark.slow
     def test_ewm_consistency(self):
         def _weights(s, com, adjust, ignore_na):
             if isinstance(s, DataFrame):
@@ -2254,7 +2254,7 @@ def _ewma(s, com, min_periods, adjust, ignore_na):
                     _variance_debiasing_factors(x, com=com, adjust=adjust,
                                                 ignore_na=ignore_na)))
 
-    @tm.slow
+    @pytest.mark.slow
     def test_expanding_consistency(self):
 
         # suppress warnings about empty slices, as we are deliberately testing
@@ -2328,7 +2328,7 @@ def test_expanding_consistency(self):
                             assert_equal(expanding_f_result,
                                          expanding_apply_f_result)
 
-    @tm.slow
+    @pytest.mark.slow
     def test_rolling_consistency(self):
 
         # suppress warnings about empty slices, as we are deliberately testing
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 17e09b38b20e0..d6ba9561340cc 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -50,13 +50,6 @@
 
 from pandas._libs import testing as _testing
 from pandas.io.common import urlopen
-try:
-    import pytest
-    slow = pytest.mark.slow
-except ImportError:
-    # Should be ok to just ignore. If you actually need
-    # slow then you'll hit an import error long before getting here.
-    pass
 
 
 N = 30

From 63536f4a80a1f1f03732411d015910c55a1f9290 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Thu, 13 Jul 2017 12:15:26 -0700
Subject: [PATCH 12/54] MAINT: Remove unused mock import (#16908)

We import it, set it as an attribute, and then don't use it.
---
 pandas/tests/io/formats/test_printing.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py
index aae3ba31648ff..ec34e7656e01f 100644
--- a/pandas/tests/io/formats/test_printing.py
+++ b/pandas/tests/io/formats/test_printing.py
@@ -127,14 +127,7 @@ class TestTableSchemaRepr(object):
     @classmethod
     def setup_class(cls):
         pytest.importorskip('IPython')
-        try:
-            import mock
-        except ImportError:
-            try:
-                from unittest import mock
-            except ImportError:
-                pytest.skip("Mock is not installed")
-        cls.mock = mock
+
         from IPython.core.interactiveshell import InteractiveShell
         cls.display_formatter = InteractiveShell.instance().display_formatter
 

From 25384ba459ba7de9fb9d36821f0a4ae239cc40b2 Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Thu, 13 Jul 2017 21:35:48 +0100
Subject: [PATCH 13/54] Let _get_dtype accept Categoricals and CategoricalIndex
  (#16887)

---
 doc/source/whatsnew/v0.21.0.txt    | 1 -
 pandas/core/dtypes/common.py       | 4 +++-
 pandas/tests/dtypes/test_common.py | 6 +++---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index a5ee0e0ce2653..8ba57c0fa50be 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -149,7 +149,6 @@ Conversion
 ^^^^^^^^^^
 
 
-
 Indexing
 ^^^^^^^^
 
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 2eebf3704253e..a386c04cc4fdd 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -11,7 +11,7 @@
                      ExtensionDtype)
 from .generic import (ABCCategorical, ABCPeriodIndex,
                       ABCDatetimeIndex, ABCSeries,
-                      ABCSparseArray, ABCSparseSeries)
+                      ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex)
 from .inference import is_string_like
 from .inference import *  # noqa
 
@@ -1713,6 +1713,8 @@ def _get_dtype(arr_or_dtype):
             return PeriodDtype.construct_from_string(arr_or_dtype)
         elif is_interval_dtype(arr_or_dtype):
             return IntervalDtype.construct_from_string(arr_or_dtype)
+    elif isinstance(arr_or_dtype, (ABCCategorical, ABCCategoricalIndex)):
+        return arr_or_dtype.dtype
 
     if hasattr(arr_or_dtype, 'dtype'):
         arr_or_dtype = arr_or_dtype.dtype
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index c32e8590c5675..7188e397c0617 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -532,16 +532,16 @@ def test_is_complex_dtype():
     (float, np.dtype(float)),
     ('float64', np.dtype('float64')),
     (np.dtype('float64'), np.dtype('float64')),
-    pytest.mark.xfail((str, np.dtype('<U')), ),
+    (str, np.dtype(str)),
     (pd.Series([1, 2], dtype=np.dtype('int16')), np.dtype('int16')),
     (pd.Series(['a', 'b']), np.dtype(object)),
     (pd.Index([1, 2]), np.dtype('int64')),
     (pd.Index(['a', 'b']), np.dtype(object)),
     ('category', 'category'),
     (pd.Categorical(['a', 'b']).dtype, CategoricalDtype()),
-    pytest.mark.xfail((pd.Categorical(['a', 'b']), CategoricalDtype()),),
+    (pd.Categorical(['a', 'b']), CategoricalDtype()),
     (pd.CategoricalIndex(['a', 'b']).dtype, CategoricalDtype()),
-    pytest.mark.xfail((pd.CategoricalIndex(['a', 'b']), CategoricalDtype()),),
+    (pd.CategoricalIndex(['a', 'b']), CategoricalDtype()),
     (pd.DatetimeIndex([1, 2]), np.dtype('<M8[ns]')),
     (pd.DatetimeIndex([1, 2]).dtype, np.dtype('<M8[ns]')),
     ('<M8[ns]', np.dtype('<M8[ns]')),

From 4ca9fcd73225af9cb1dc2acc99bb494dc5f8926a Mon Sep 17 00:00:00 2001
From: jdeschenes <deschenes.j.m@gmail.com>
Date: Thu, 13 Jul 2017 19:04:29 -0400
Subject: [PATCH 14/54] Fixes for #16896(TimedeltaIndex indexing regression for
 strings) (#16907)

---
 doc/source/whatsnew/v0.21.0.txt                   | 2 +-
 pandas/core/dtypes/common.py                      | 4 +++-
 pandas/tests/dtypes/test_common.py                | 9 +++++++--
 pandas/tests/indexes/timedeltas/test_timedelta.py | 3 +++
 pandas/tests/indexing/test_timedelta.py           | 9 ++++++++-
 5 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 8ba57c0fa50be..039b24cc63217 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -154,7 +154,7 @@ Indexing
 
 - When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`).
 - When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`).
-
+- Fixes regression in 0.20.3 when indexing with a string on a ``TimedeltaIndex`` (:issue:`16896`).
 
 I/O
 ^^^
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index a386c04cc4fdd..114900ce802be 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -392,13 +392,15 @@ def is_timedelta64_dtype(arr_or_dtype):
     False
     >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]"))
     True
+    >>> is_timedelta64_dtype('0 days')
+    False
     """
 
     if arr_or_dtype is None:
         return False
     try:
         tipo = _get_dtype_type(arr_or_dtype)
-    except ValueError:
+    except:
         return False
     return issubclass(tipo, np.timedelta64)
 
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index 7188e397c0617..290cdd732b6d6 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -199,12 +199,17 @@ def test_is_datetime64tz_dtype():
 
 def test_is_timedelta64_dtype():
     assert not com.is_timedelta64_dtype(object)
+    assert not com.is_timedelta64_dtype(None)
     assert not com.is_timedelta64_dtype([1, 2, 3])
     assert not com.is_timedelta64_dtype(np.array([], dtype=np.datetime64))
+    assert not com.is_timedelta64_dtype('0 days')
+    assert not com.is_timedelta64_dtype("0 days 00:00:00")
+    assert not com.is_timedelta64_dtype(["0 days 00:00:00"])
+    assert not com.is_timedelta64_dtype("NO DATE")
+
     assert com.is_timedelta64_dtype(np.timedelta64)
     assert com.is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]"))
-
-    assert not com.is_timedelta64_dtype("0 days 00:00:00")
+    assert com.is_timedelta64_dtype(pd.to_timedelta(['0 days', '1 days']))
 
 
 def test_is_period_dtype():
diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
index 08cf5108ffdb1..a4fc26382fb9b 100644
--- a/pandas/tests/indexes/timedeltas/test_timedelta.py
+++ b/pandas/tests/indexes/timedeltas/test_timedelta.py
@@ -66,6 +66,9 @@ def test_get_loc(self):
         for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]:
             assert idx.get_loc('1 day 1 hour', method) == loc
 
+        # GH 16896
+        assert idx.get_loc('0 days') == 0
+
     def test_get_loc_nat(self):
         tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00'])
 
diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py
index be3ea8f0c371d..32609362e49af 100644
--- a/pandas/tests/indexing/test_timedelta.py
+++ b/pandas/tests/indexing/test_timedelta.py
@@ -5,7 +5,6 @@
 
 
 class TestTimedeltaIndexing(object):
-
     def test_boolean_indexing(self):
         # GH 14946
         df = pd.DataFrame({'x': range(10)})
@@ -40,3 +39,11 @@ def test_list_like_indexing(self, indexer, expected):
                                 dtype="int64")
 
         tm.assert_frame_equal(expected, df)
+
+    def test_string_indexing(self):
+        # GH 16896
+        df = pd.DataFrame({'x': range(3)},
+                          index=pd.to_timedelta(range(3), unit='days'))
+        expected = df.iloc[0]
+        sliced = df.loc['0 days']
+        tm.assert_series_equal(sliced, expected)

From 6000c5b9624fdd8925099f215eba282bfbef87ce Mon Sep 17 00:00:00 2001
From: jdeschenes <deschenes.j.m@gmail.com>
Date: Fri, 14 Jul 2017 10:13:53 -0400
Subject: [PATCH 15/54] Fix for #16909(DeltatimeIndex.get_loc is not working on
 np.deltatime64 data type) (#16912)

---
 doc/source/whatsnew/v0.21.0.txt                   | 1 +
 pandas/core/indexes/timedeltas.py                 | 4 ++--
 pandas/tests/indexes/timedeltas/test_timedelta.py | 3 +++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 039b24cc63217..2716d9b09eaa9 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -155,6 +155,7 @@ Indexing
 - When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`).
 - When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`).
 - Fixes regression in 0.20.3 when indexing with a string on a ``TimedeltaIndex`` (:issue:`16896`).
+- Fixed ``TimedeltaIndex.get_loc`` handling of ``np.timedelta64`` inputs (:issue:`16909`).
 
 I/O
 ^^^
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index faec813df3993..68713743d72ed 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -15,7 +15,7 @@
     _ensure_int64)
 from pandas.core.dtypes.missing import isnull
 from pandas.core.dtypes.generic import ABCSeries
-from pandas.core.common import _maybe_box, _values_from_object, is_bool_indexer
+from pandas.core.common import _maybe_box, _values_from_object
 
 from pandas.core.indexes.base import Index
 from pandas.core.indexes.numeric import Int64Index
@@ -682,7 +682,7 @@ def get_loc(self, key, method=None, tolerance=None):
         -------
         loc : int
         """
-        if is_bool_indexer(key) or is_timedelta64_dtype(key):
+        if is_list_like(key):
             raise TypeError
 
         if isnull(key):
diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
index a4fc26382fb9b..59e4b1432b8bc 100644
--- a/pandas/tests/indexes/timedeltas/test_timedelta.py
+++ b/pandas/tests/indexes/timedeltas/test_timedelta.py
@@ -66,6 +66,9 @@ def test_get_loc(self):
         for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]:
             assert idx.get_loc('1 day 1 hour', method) == loc
 
+        # GH 16909
+        assert idx.get_loc(idx[1].to_timedelta64()) == 1
+
         # GH 16896
         assert idx.get_loc('0 days') == 0
 

From a587d568d213c62307a72d98d6913239f55844e8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Fri, 14 Jul 2017 14:46:41 -0500
Subject: [PATCH 16/54] DOC: Recommend sphinx 1.5 for now (#16929)

For the SciPy sprint tomorrow, until the cause of the doc-building slowdown is fully identified.
---
 ci/requirements_all.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/requirements_all.txt b/ci/requirements_all.txt
index e9f49ed879c86..de37ec4d20be4 100644
--- a/ci/requirements_all.txt
+++ b/ci/requirements_all.txt
@@ -2,7 +2,7 @@ pytest
 pytest-cov
 pytest-xdist
 flake8
-sphinx
+sphinx=1.5*
 nbsphinx
 ipython
 python-dateutil

From 6858d0f6caa60c98acc4b6c3eaa6cd0309aedca6 Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <bashtage@users.noreply.github.com>
Date: Fri, 14 Jul 2017 22:20:28 +0100
Subject: [PATCH 17/54] BUG: Allow value labels to be read with iterator
 (#16926)

All value labels to be read before the iterator has been used
Fix issue where categorical data was incorrectly reformatted when
write_index was False

closes #16923
---
 doc/source/whatsnew/v0.21.0.txt |  1 +
 pandas/io/stata.py              | 36 ++++++++++++++++++---------------
 pandas/tests/io/test_stata.py   | 18 ++++++++++++++---
 3 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 2716d9b09eaa9..bd19d71182762 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -162,6 +162,7 @@ I/O
 
 - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`)
 
+- Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 107dccfc8175c..30991d8a24c63 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -997,6 +997,7 @@ def __init__(self, path_or_buf, convert_dates=True,
             self.path_or_buf = BytesIO(contents)
 
         self._read_header()
+        self._setup_dtype()
 
     def __enter__(self):
         """ enter context manager """
@@ -1299,6 +1300,23 @@ def _read_old_header(self, first_char):
         # necessary data to continue parsing
         self.data_location = self.path_or_buf.tell()
 
+    def _setup_dtype(self):
+        """Map between numpy and state dtypes"""
+        if self._dtype is not None:
+            return self._dtype
+
+        dtype = []  # Convert struct data types to numpy data type
+        for i, typ in enumerate(self.typlist):
+            if typ in self.NUMPY_TYPE_MAP:
+                dtype.append(('s' + str(i), self.byteorder +
+                              self.NUMPY_TYPE_MAP[typ]))
+            else:
+                dtype.append(('s' + str(i), 'S' + str(typ)))
+        dtype = np.dtype(dtype)
+        self._dtype = dtype
+
+        return self._dtype
+
     def _calcsize(self, fmt):
         return (type(fmt) is int and fmt or
                 struct.calcsize(self.byteorder + fmt))
@@ -1472,22 +1490,10 @@ def read(self, nrows=None, convert_dates=None,
         if nrows is None:
             nrows = self.nobs
 
-        if (self.format_version >= 117) and (self._dtype is None):
+        if (self.format_version >= 117) and (not self._value_labels_read):
             self._can_read_value_labels = True
             self._read_strls()
 
-        # Setup the dtype.
-        if self._dtype is None:
-            dtype = []  # Convert struct data types to numpy data type
-            for i, typ in enumerate(self.typlist):
-                if typ in self.NUMPY_TYPE_MAP:
-                    dtype.append(('s' + str(i), self.byteorder +
-                                  self.NUMPY_TYPE_MAP[typ]))
-                else:
-                    dtype.append(('s' + str(i), 'S' + str(typ)))
-            dtype = np.dtype(dtype)
-            self._dtype = dtype
-
         # Read data
         dtype = self._dtype
         max_read_len = (self.nobs - self._lines_read) * dtype.itemsize
@@ -1958,7 +1964,6 @@ def _prepare_categoricals(self, data):
             return data
 
         get_base_missing_value = StataMissingValue.get_base_missing_value
-        index = data.index
         data_formatted = []
         for col, col_is_cat in zip(data, is_cat):
             if col_is_cat:
@@ -1981,8 +1986,7 @@ def _prepare_categoricals(self, data):
 
                 # Replace missing values with Stata missing value for type
                 values[values == -1] = get_base_missing_value(dtype)
-                data_formatted.append((col, values, index))
-
+                data_formatted.append((col, values))
             else:
                 data_formatted.append((col, data[col]))
         return DataFrame.from_items(data_formatted)
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index b9c6736563160..a414928d318c4 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -9,18 +9,18 @@
 from datetime import datetime
 from distutils.version import LooseVersion
 
-import pytest
 import numpy as np
 import pandas as pd
 import pandas.util.testing as tm
+import pytest
 from pandas import compat
+from pandas._libs.tslib import NaT
 from pandas.compat import iterkeys
+from pandas.core.dtypes.common import is_categorical_dtype
 from pandas.core.frame import DataFrame, Series
 from pandas.io.parsers import read_csv
 from pandas.io.stata import (read_stata, StataReader, InvalidColumnName,
                              PossiblePrecisionLoss, StataMissingValue)
-from pandas._libs.tslib import NaT
-from pandas.core.dtypes.common import is_categorical_dtype
 
 
 class TestStata(object):
@@ -1297,3 +1297,15 @@ def test_pickle_path_localpath(self):
         reader = lambda x: read_stata(x).set_index('index')
         result = tm.round_trip_localpath(df.to_stata, reader)
         tm.assert_frame_equal(df, result)
+
+    @pytest.mark.parametrize('write_index', [True, False])
+    def test_value_labels_iterator(self, write_index):
+        # GH 16923
+        d = {'A': ['B', 'E', 'C', 'A', 'E']}
+        df = pd.DataFrame(data=d)
+        df['A'] = df['A'].astype('category')
+        with tm.ensure_clean() as path:
+            df.to_stata(path, write_index=write_index)
+            dta_iter = pd.read_stata(path, iterator=True)
+            value_labels = dta_iter.value_labels()
+        assert value_labels == {'A': {0: 'A', 1: 'B', 2: 'C', 3: 'E'}}

From ad24759871ea43131711cfce1e5fc69c06d82956 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Fri, 14 Jul 2017 21:16:00 -0700
Subject: [PATCH 18/54] DOC: Update flake8 command instructions (#16919)

---
 .github/PULL_REQUEST_TEMPLATE.md |  2 +-
 doc/source/contributing.rst      | 24 +++++++++++++++++-------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 959858fb50f89..e8b6ee21ad104 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,4 +1,4 @@
  - [ ] closes #xxxx
  - [ ] tests added / passed
- - [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff`` (On Windows, ``git diff upstream/master -u -- "*.py" | flake8 --diff`` might work as an alternative.)
+ - [ ] passes ``git diff upstream/master -u -- "*.py" | flake8 --diff``
  - [ ] whatsnew entry
diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index cd444f796fabb..bfcf560565977 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -509,7 +509,7 @@ the `flake8 <http://pypi.python.org/pypi/flake8>`_ tool
 and report any stylistic errors in your code. Therefore, it is helpful before
 submitting code to run the check yourself on the diff::
 
-   git diff master --name-only -- '*.py' | flake8 --diff
+   git diff master -u -- "*.py" | flake8 --diff
 
 This command will catch any stylistic errors in your changes specifically, but
 be beware it may not catch all of them. For example, if you delete the only
@@ -518,18 +518,28 @@ unused function. However, style-checking the diff will not catch this because
 the actual import is not part of the diff. Thus, for completeness, you should
 run this command, though it will take longer::
 
-   git diff master --name-only -- '*.py' | grep 'pandas/' | xargs -r flake8
+   git diff master --name-only -- "*.py" | grep "pandas/" | xargs -r flake8
 
 Note that on OSX, the ``-r`` flag is not available, so you have to omit it and
 run this slightly modified command::
 
-   git diff master --name-only -- '*.py' | grep 'pandas/' | xargs flake8
+   git diff master --name-only -- "*.py" | grep "pandas/" | xargs flake8
 
-Note that on Windows, ``grep``, ``xargs``, and other tools are likely
-unavailable. However, this has been shown to work on smaller commits in the
-standard Windows command line::
+Note that on Windows, these commands are unfortunately not possible because
+commands like ``grep`` and ``xargs`` are not available natively. To imitate the
+behavior with the commands above, you should run::
 
-   git diff master -u -- "*.py" | flake8 --diff
+    git diff master --name-only -- "*.py"
+
+This will list all of the Python files that have been modified. The only ones
+that matter during linting are any whose directory filepath begins with "pandas."
+For each filepath, copy and paste it after the ``flake8`` command as shown below:
+
+    flake8 <python-filepath>
+
+Alternatively, you can install the ``grep`` and ``xargs`` commands via the
+`MinGW <http://www.mingw.org/>`__ toolchain, and it will allow you to run the
+commands above.
 
 Backwards Compatibility
 ~~~~~~~~~~~~~~~~~~~~~~~

From 5f2b96bb637f6ddeec169c5ef8ad20013a03c853 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sat, 15 Jul 2017 13:30:03 +0100
Subject: [PATCH 19/54] TST: Don't assert that a bug exists in numpy (#16940)

Better to ignore the warning from the bug, rather than assert the bug is still there

After this change, numpy/numpy#9412 _could_ be backported to fix the bug
---
 pandas/tests/test_algos.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 9504d2a9426f0..993dcc4f527b2 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import pytest
+import warnings
 
 from numpy.random import RandomState
 from numpy import nan
@@ -127,7 +128,7 @@ def test_unsortable(self):
         arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object)
         if compat.PY2 and not pd._np_version_under1p10:
             # RuntimeWarning: tp_compare didn't return -1 or -2 for exception
-            with tm.assert_produces_warning(RuntimeWarning):
+            with warnings.catch_warnings():
                 pytest.raises(TypeError, algos.safe_sort, arr)
         else:
             pytest.raises(TypeError, algos.safe_sort, arr)

From 6cee09ebfd2e8fb15f3e225bd9770852a6a533d1 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 15 Jul 2017 08:11:34 -0500
Subject: [PATCH 20/54] CI: add .pep8speakes.yml

---
 .pep8speakes.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 .pep8speakes.yml

diff --git a/.pep8speakes.yml b/.pep8speakes.yml
new file mode 100644
index 0000000000000..299b76c8922cc
--- /dev/null
+++ b/.pep8speakes.yml
@@ -0,0 +1,10 @@
+# File : .pep8speaks.yml
+
+scanner:
+    diff_only: True  # If True, errors caused by only the patch are shown
+
+pycodestyle:
+    max-line-length: 79
+    ignore:  # Errors and warnings to ignore
+        - E731
+        - E402

From 80e40f81d78ade9921607a092a00b83f9d34cfd3 Mon Sep 17 00:00:00 2001
From: faic <goris2005@gmail.com>
Date: Sat, 15 Jul 2017 16:58:24 +0300
Subject: [PATCH 21/54] CLN16668: remove OrderedDefaultDict (#16939)

---
 pandas/compat/__init__.py | 25 -------------------------
 pandas/core/panel.py      |  6 ++++--
 2 files changed, 4 insertions(+), 27 deletions(-)

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 9eacb9acef2c9..33b41d61aa978 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -21,7 +21,6 @@
   given metaclass instead (and avoids intermediary class creation)
 
 Other items:
-* OrderedDefaultDict
 * platform checker
 """
 # pylint disable=W0611
@@ -373,30 +372,6 @@ def parse_date(timestr, *args, **kwargs):
     parse_date = _date_parser.parse
 
 
-class OrderedDefaultdict(OrderedDict):
-
-    def __init__(self, *args, **kwargs):
-        newdefault = None
-        newargs = ()
-        if args:
-            newdefault = args[0]
-            if not (newdefault is None or callable(newdefault)):
-                raise TypeError('first argument must be callable or None')
-            newargs = args[1:]
-        self.default_factory = newdefault
-        super(self.__class__, self).__init__(*newargs, **kwargs)
-
-    def __missing__(self, key):
-        if self.default_factory is None:
-            raise KeyError(key)
-        self[key] = value = self.default_factory()
-        return value
-
-    def __reduce__(self):  # optional, for pickle support
-        args = self.default_factory if self.default_factory else tuple()
-        return type(self), args, None, None, list(self.items())
-
-
 # https://github.com/pandas-dev/pandas/pull/9123
 def is_platform_little_endian():
     """ am I little endian """
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index d1f5b4587059c..69a8468552f54 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -19,7 +19,7 @@
 import pandas.core.ops as ops
 import pandas.core.missing as missing
 from pandas import compat
-from pandas.compat import (map, zip, range, u, OrderedDict, OrderedDefaultdict)
+from pandas.compat import (map, zip, range, u, OrderedDict)
 from pandas.compat.numpy import function as nv
 from pandas.core.common import _try_sort, _default_index
 from pandas.core.frame import DataFrame
@@ -260,9 +260,11 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None):
         -------
         Panel
         """
+        from collections import defaultdict
+
         orient = orient.lower()
         if orient == 'minor':
-            new_data = OrderedDefaultdict(dict)
+            new_data = defaultdict(OrderedDict)
             for col, df in compat.iteritems(data):
                 for item, s in compat.iteritems(df):
                     new_data[item][col] = s

From 61f0c5ce2eae8a548e4729ee5cc8a8633faa8316 Mon Sep 17 00:00:00 2001
From: Alan Velasco <AlanVelasco.A@gmail.com>
Date: Sat, 15 Jul 2017 10:34:04 -0500
Subject: [PATCH 22/54] Change "pls" to "please" in error message (#16947)

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6559fc4c24ce2..4d8b831b7d63f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3359,7 +3359,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
         inplace = validate_bool_kwarg(inplace, 'inplace')
         # 10726
         if by is not None:
-            warnings.warn("by argument to sort_index is deprecated, pls use "
+            warnings.warn("by argument to sort_index is deprecated, please use "
                           ".sort_values(by=...)", FutureWarning, stacklevel=2)
             if level is not None:
                 raise ValueError("unable to simultaneously sort by and level")

From 0e47b280ae6159dbc8817f3c7bd3e296af480c5d Mon Sep 17 00:00:00 2001
From: Alex Lubbock <alubbock@users.noreply.github.com>
Date: Sat, 15 Jul 2017 10:34:31 -0500
Subject: [PATCH 23/54] BUG: MultiIndex sort with ascending as list (#16937)

---
 doc/source/whatsnew/v0.21.0.txt |  1 +
 pandas/core/indexes/multi.py    |  3 ++-
 pandas/tests/test_multilevel.py | 23 +++++++++++++++++++++++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index bd19d71182762..6ddf6029b99bb 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -156,6 +156,7 @@ Indexing
 - When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`).
 - Fixes regression in 0.20.3 when indexing with a string on a ``TimedeltaIndex`` (:issue:`16896`).
 - Fixed ``TimedeltaIndex.get_loc`` handling of ``np.timedelta64`` inputs (:issue:`16909`).
+- Fix :meth:`MultiIndex.sort_index` ordering when ``ascending`` argument is a list, but not all levels are specified, or are in a different order (:issue:`16934`).
 
 I/O
 ^^^
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 81eac0ac0684f..ed7ca079a07b5 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1697,7 +1697,8 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True):
                 raise ValueError("level must have same length as ascending")
 
             from pandas.core.sorting import lexsort_indexer
-            indexer = lexsort_indexer(self.labels, orders=ascending)
+            indexer = lexsort_indexer([self.labels[lev] for lev in level],
+                                      orders=ascending)
 
         # level ordering
         else:
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index c8c210c42eac2..a56ff0fc2d158 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -2781,3 +2781,26 @@ def test_sort_index_nan(self):
         result = s.sort_index(na_position='first')
         expected = s.iloc[[1, 2, 3, 0]]
         tm.assert_series_equal(result, expected)
+
+    def test_sort_ascending_list(self):
+        # GH: 16934
+
+        # Set up a Series with a three level MultiIndex
+        arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
+                  ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'],
+                  [4, 3, 2, 1, 4, 3, 2, 1]]
+        tuples = list(zip(*arrays))
+        index = pd.MultiIndex.from_tuples(tuples,
+                                          names=['first', 'second', 'third'])
+        s = pd.Series(range(8), index=index)
+
+        # Sort with boolean ascending
+        result = s.sort_index(level=['third', 'first'], ascending=False)
+        expected = s.iloc[[4, 0, 5, 1, 6, 2, 7, 3]]
+        tm.assert_series_equal(result, expected)
+
+        # Sort with list of boolean ascending
+        result = s.sort_index(level=['third', 'first'],
+                              ascending=[False, True])
+        expected = s.iloc[[0, 4, 1, 5, 2, 6, 3, 7]]
+        tm.assert_series_equal(result, expected)

From d7bf220c2daeaf86ba2e2026b4fe900d441720d8 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Sat, 15 Jul 2017 17:38:22 +0200
Subject: [PATCH 24/54] DOC: Improving docstring of pop method (#16416)
 (#16520)

---
 pandas/core/generic.py | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 5722539b87aec..a4bb746722c1e 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -547,6 +547,43 @@ def swapaxes(self, axis1, axis2, copy=True):
     def pop(self, item):
         """
         Return item and drop from frame. Raise KeyError if not found.
+
+        Parameters
+        ----------
+        item : str
+            Column label to be popped
+
+        Returns
+        -------
+        popped : Series
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([('falcon', 'bird',    389.0),
+        ...                    ('parrot', 'bird',     24.0),
+        ...                    ('lion',   'mammal',   80.5),
+        ...                    ('monkey', 'mammal', np.nan)],
+        ...                   columns=('name', 'class', 'max_speed'))
+        >>> df
+             name   class  max_speed
+        0  falcon    bird      389.0
+        1  parrot    bird       24.0
+        2    lion  mammal       80.5
+        3  monkey  mammal        NaN
+
+        >>> df.pop('class')
+        0      bird
+        1      bird
+        2    mammal
+        3    mammal
+        Name: class, dtype: object
+
+        >>> df
+             name  max_speed
+        0  falcon      389.0
+        1  parrot       24.0
+        2    lion       80.5
+        3  monkey        NaN
         """
         result = self[item]
         del self[item]

From 794fd789603e06e86456375f92489ae4de92a99a Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 15 Jul 2017 10:57:41 -0500
Subject: [PATCH 25/54] PEP8

---
 pandas/core/frame.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4d8b831b7d63f..b5462bbe67647 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3359,8 +3359,9 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
         inplace = validate_bool_kwarg(inplace, 'inplace')
         # 10726
         if by is not None:
-            warnings.warn("by argument to sort_index is deprecated, please use "
-                          ".sort_values(by=...)", FutureWarning, stacklevel=2)
+            warnings.warn("by argument to sort_index is deprecated, "
+                          "please use .sort_values(by=...)",
+                          FutureWarning, stacklevel=2)
             if level is not None:
                 raise ValueError("unable to simultaneously sort by and level")
             return self.sort_values(by, axis=axis, ascending=ascending,

From daf07a64d681e70eda6211b739919cae6345e86f Mon Sep 17 00:00:00 2001
From: Alan Velasco <AlanVelasco.A@gmail.com>
Date: Sat, 15 Jul 2017 11:01:38 -0500
Subject: [PATCH 26/54] WARN: add stacklevel to to_dict() UserWarning (#16927)
 (#16936)

* ERR: add stacklevel to to_dict() UserWarning (#16927)

* TST: Add warning testing to to_dict()

* Fix warning assertion on to_dict() test

* Add github issue to documentation on to_dict() warning test
---
 pandas/core/frame.py                  | 3 ++-
 pandas/tests/frame/test_convert_to.py | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b5462bbe67647..9920ddf854850 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -972,7 +972,8 @@ def to_dict(self, orient='dict', into=dict):
         """
         if not self.columns.is_unique:
             warnings.warn("DataFrame columns are not unique, some "
-                          "columns will be omitted.", UserWarning)
+                          "columns will be omitted.", UserWarning,
+                          stacklevel=2)
         # GH16122
         into_c = standardize_mapping(into)
         if orient.lower().startswith('d'):
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index 34dd138ee1c80..629c695b702fe 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -216,6 +216,13 @@ def test_to_dict_errors(self, mapping):
         with pytest.raises(TypeError):
             df.to_dict(into=mapping)
 
+    def test_to_dict_not_unique_warning(self):
+        # GH16927: When converting to a dict, if a column has a non-unique name
+        # it will be dropped, throwing a warning.
+        df = DataFrame([[1, 2, 3]], columns=['a', 'a', 'b'])
+        with tm.assert_produces_warning(UserWarning):
+            df.to_dict()
+
     @pytest.mark.parametrize('tz', ['UTC', 'GMT', 'US/Eastern'])
     def test_to_records_datetimeindex_with_tz(self, tz):
         # GH13937

From 4c498f8451fe4c491a6f38ed9e35da3d3ab6b9b8 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 15 Jul 2017 11:26:10 -0500
Subject: [PATCH 27/54] CI: fix pep8speaks .yml file

---
 .pep8speakes.yml => .pep8speaks.yml | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename .pep8speakes.yml => .pep8speaks.yml (100%)

diff --git a/.pep8speakes.yml b/.pep8speaks.yml
similarity index 100%
rename from .pep8speakes.yml
rename to .pep8speaks.yml

From 7500218947bffd4915832e9037d9f48991e53ca3 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 15 Jul 2017 12:33:04 -0500
Subject: [PATCH 28/54] DOC: whatsnew 0.21.0 edits

---
 doc/source/whatsnew/v0.21.0.txt | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 6ddf6029b99bb..34095d55b8cc9 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -31,15 +31,15 @@ Other Enhancements
 ^^^^^^^^^^^^^^^^^^
 
 - The ``validate`` argument for :func:`merge` function now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here <merging.validation>` (:issue:`16270`)
-- ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned.  The default is ``dict``, which is backwards compatible. (:issue:`16122`)
-- ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`)
-- ``Series.rename_axis()`` and ``DataFrame.rename_axis()`` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`)
-- :func:`to_pickle` has gained a ``protocol`` parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL <https://docs.python.org/3/library/pickle.html#data-stream-format>`__
+- :func:`Series.to_dict` and :func:`DataFrame.to_dict` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned.  The default is ``dict``, which is backwards compatible. (:issue:`16122`)
+- :func:`RangeIndex.append` now returns a ``RangeIndex`` object when possible (:issue:`16212`)
+- :func:`Series.rename_axis` and :func:`DataFrame.rename_axis` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`)
+- :func:`Series.to_pickle` and :func:`DataFrame.to_pickle` have gained a ``protocol`` parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL <https://docs.python.org/3/library/pickle.html#data-stream-format>`__
 - :func:`api.types.infer_dtype` now infers decimals. (:issue:`15690`)
 - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
 - :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`)
 - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`)
-- :func:`Dataframe.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`)
+- :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`)
 
 .. _whatsnew_0210.api_breaking:
 
@@ -92,9 +92,14 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in
    ...
    ValueError: Cannot operate inplace if there is no assignment
 
+.. _whatsnew_0210.api:
+
+Other API Changes
+^^^^^^^^^^^^^^^^^
+
 - Support has been dropped for Python 3.4 (:issue:`15251`)
 - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`)
-- Accessing a non-existent attribute on a closed :class:`HDFStore` will now
+- Accessing a non-existent attribute on a closed :class:`~pandas.HDFStore` will now
   raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`)
 - :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`)
 - :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`)
@@ -102,12 +107,6 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in
 - Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`)
 - ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`)
 - Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`)
-
-.. _whatsnew_0210.api:
-
-Other API Changes
-^^^^^^^^^^^^^^^^^
-
 - Moved definition of ``MergeError`` to the ``pandas.errors`` module.
 
 
@@ -127,7 +126,7 @@ Removal of prior version deprecations/changes
 - The ``pd.options.display.mpl_style`` configuration has been dropped (:issue:`12190`)
 - ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`)
 - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`)
-- :func:`eval` and :method:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`)
+- :func:`eval` and :func:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`)
 - The function ``get_offset_name`` has been dropped in favor of the ``.freqstr`` attribute for an offset (:issue:`11834`)
 
 

From 3955261c04d5b838488a45fe7b186399bcdca137 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 15 Jul 2017 13:07:35 -0500
Subject: [PATCH 29/54] CI: disable codecov reporting

---
 codecov.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/codecov.yml b/codecov.yml
index b4552563deeaa..512bc2e82a736 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -5,7 +5,9 @@ coverage:
   status:
     project:
       default:
+        enabled: no
         target: '82'
     patch:
       default:
+        enabled: no
         target: '50'

From 96168ef698ac8bbccba251258ee66958359b11bf Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 15 Jul 2017 12:14:34 -0700
Subject: [PATCH 30/54] MAINT: Move series.remove_na to
 core.dtypes.missing.remove_na_arraylike

Closes gh-16935
---
 pandas/core/dtypes/missing.py |  7 +++++++
 pandas/core/series.py         | 11 ++---------
 pandas/plotting/_core.py      | 12 ++++++------
 pandas/tests/test_panel.py    |  4 ++--
 pandas/tests/test_panel4d.py  |  4 ++--
 5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index af3a873bc2866..9913923cb7807 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -394,3 +394,10 @@ def na_value_for_dtype(dtype):
     elif is_bool_dtype(dtype):
         return False
     return np.nan
+
+
+def remove_na_arraylike(arr):
+    """
+    Return array-like containing only true/non-NaN values, possibly empty.
+    """
+    return arr[notnull(lib.values_from_object(arr))]
diff --git a/pandas/core/series.py b/pandas/core/series.py
index e1f668dd3afda..98b548f8ab3b5 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -36,7 +36,7 @@
     maybe_upcast, infer_dtype_from_scalar,
     maybe_convert_platform,
     maybe_cast_to_datetime, maybe_castable)
-from pandas.core.dtypes.missing import isnull, notnull
+from pandas.core.dtypes.missing import isnull, notnull, remove_na_arraylike
 
 from pandas.core.common import (is_bool_indexer,
                                 _default_index,
@@ -2749,7 +2749,7 @@ def dropna(self, axis=0, inplace=False, **kwargs):
         axis = self._get_axis_number(axis or 0)
 
         if self._can_hold_na:
-            result = remove_na(self)
+            result = remove_na_arraylike(self)
             if inplace:
                 self._update_inplace(result)
             else:
@@ -2888,13 +2888,6 @@ def _dir_additions(self):
 # Supplementary functions
 
 
-def remove_na(series):
-    """
-    Return series containing only true/non-NaN values, possibly empty.
-    """
-    return series[notnull(_values_from_object(series))]
-
-
 def _sanitize_index(data, index, copy=False):
     """ sanitize an index type to return an ndarray of the underlying, pass
     thru a non-Index
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index f8e83aea03594..9cceebb5c4cdb 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -11,7 +11,7 @@
 
 from pandas.util._decorators import cache_readonly
 from pandas.core.base import PandasObject
-from pandas.core.dtypes.missing import notnull
+from pandas.core.dtypes.missing import notnull, remove_na_arraylike
 from pandas.core.dtypes.common import (
     is_list_like,
     is_integer,
@@ -21,7 +21,7 @@
 from pandas.core.common import AbstractMethodError, isnull, _try_sort
 from pandas.core.generic import _shared_docs, _shared_doc_kwargs
 from pandas.core.index import Index, MultiIndex
-from pandas.core.series import Series, remove_na
+from pandas.core.series import Series
 from pandas.core.indexes.period import PeriodIndex
 from pandas.compat import range, lrange, map, zip, string_types
 import pandas.compat as compat
@@ -1376,7 +1376,7 @@ def _plot(cls, ax, y, style=None, bw_method=None, ind=None,
         from scipy.stats import gaussian_kde
         from scipy import __version__ as spv
 
-        y = remove_na(y)
+        y = remove_na_arraylike(y)
 
         if LooseVersion(spv) >= '0.11.0':
             gkde = gaussian_kde(y, bw_method=bw_method)
@@ -1495,13 +1495,13 @@ def _args_adjust(self):
     @classmethod
     def _plot(cls, ax, y, column_num=None, return_type='axes', **kwds):
         if y.ndim == 2:
-            y = [remove_na(v) for v in y]
+            y = [remove_na_arraylike(v) for v in y]
             # Boxplot fails with empty arrays, so need to add a NaN
             #   if any cols are empty
             # GH 8181
             y = [v if v.size > 0 else np.array([np.nan]) for v in y]
         else:
-            y = remove_na(y)
+            y = remove_na_arraylike(y)
         bp = ax.boxplot(y, **kwds)
 
         if return_type == 'dict':
@@ -1969,7 +1969,7 @@ def maybe_color_bp(bp):
 
     def plot_group(keys, values, ax):
         keys = [pprint_thing(x) for x in keys]
-        values = [remove_na(v) for v in values]
+        values = [remove_na_arraylike(v) for v in values]
         bp = ax.boxplot(values, **kwds)
         if fontsize is not None:
             ax.tick_params(axis='both', labelsize=fontsize)
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index e19e42e062932..445611c1696f5 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -10,11 +10,11 @@
 import pandas as pd
 
 from pandas.core.dtypes.common import is_float_dtype
+from pandas.core.dtypes.missing import remove_na_arraylike
 from pandas import (Series, DataFrame, Index, date_range, isnull, notnull,
                     pivot, MultiIndex)
 from pandas.core.nanops import nanall, nanany
 from pandas.core.panel import Panel
-from pandas.core.series import remove_na
 
 from pandas.io.formats.printing import pprint_thing
 from pandas import compat
@@ -155,7 +155,7 @@ def _check_stat_op(self, name, alternative, obj=None, has_skipna=True):
         if has_skipna:
 
             def skipna_wrapper(x):
-                nona = remove_na(x)
+                nona = remove_na_arraylike(x)
                 if len(nona) == 0:
                     return np.nan
                 return alternative(nona)
diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py
index e1995316e7b7c..18643aff15e9b 100644
--- a/pandas/tests/test_panel4d.py
+++ b/pandas/tests/test_panel4d.py
@@ -7,10 +7,10 @@
 import numpy as np
 
 from pandas.core.dtypes.common import is_float_dtype
+from pandas.core.dtypes.missing import remove_na_arraylike
 from pandas import Series, Index, isnull, notnull
 from pandas.core.panel import Panel
 from pandas.core.panel4d import Panel4D
-from pandas.core.series import remove_na
 from pandas.tseries.offsets import BDay
 
 from pandas.util.testing import (assert_frame_equal, assert_series_equal,
@@ -118,7 +118,7 @@ def _check_stat_op(self, name, alternative, obj=None, has_skipna=True):
 
         if has_skipna:
             def skipna_wrapper(x):
-                nona = remove_na(x)
+                nona = remove_na_arraylike(x)
                 if len(nona) == 0:
                     return np.nan
                 return alternative(nona)

From 2cd85ca748f62d7430b30e2d9ddd036e972cc64e Mon Sep 17 00:00:00 2001
From: Alan Velasco <AlanVelasco.A@gmail.com>
Date: Sat, 15 Jul 2017 16:28:23 -0500
Subject: [PATCH 31/54] Support non unique period indexes on join and merge
 operations (#16949)

* Support non unique period indexes on join and merge operations

* Add frame assertion on tests and release notes

* Explicitly use dtype int64 on arange
---
 doc/source/whatsnew/v0.21.0.txt    |  2 +-
 pandas/core/indexes/base.py        |  4 ++--
 pandas/tests/reshape/test_join.py  | 12 ++++++++++++
 pandas/tests/reshape/test_merge.py | 12 ++++++++++++
 4 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 34095d55b8cc9..11d3e4cf964aa 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -184,7 +184,7 @@ Sparse
 
 Reshaping
 ^^^^^^^^^
-
+- Joining/Merging with a non unique ``PeriodIndex`` raised a TypeError (:issue:`16871`)
 
 
 Numeric
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index e1053c1610175..bbbc19b36964d 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3119,14 +3119,14 @@ def _join_multi(self, other, how, return_indexers=True):
     def _join_non_unique(self, other, how='left', return_indexers=False):
         from pandas.core.reshape.merge import _get_join_indexers
 
-        left_idx, right_idx = _get_join_indexers([self.values],
+        left_idx, right_idx = _get_join_indexers([self._values],
                                                  [other._values], how=how,
                                                  sort=True)
 
         left_idx = _ensure_platform_int(left_idx)
         right_idx = _ensure_platform_int(right_idx)
 
-        join_index = np.asarray(self.values.take(left_idx))
+        join_index = np.asarray(self._values.take(left_idx))
         mask = left_idx == -1
         np.putmask(join_index, mask, other._values.take(right_idx))
 
diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py
index e25661fb65271..e4894307918c6 100644
--- a/pandas/tests/reshape/test_join.py
+++ b/pandas/tests/reshape/test_join.py
@@ -550,6 +550,18 @@ def test_join_mixed_non_unique_index(self):
                              index=[1, 2, 2, 'a'])
         tm.assert_frame_equal(result, expected)
 
+    def test_join_non_unique_period_index(self):
+        # GH #16871
+        index = pd.period_range('2016-01-01', periods=16, freq='M')
+        df = DataFrame([i for i in range(len(index))],
+                       index=index, columns=['pnum'])
+        df2 = concat([df, df])
+        result = df.join(df2, how='inner', rsuffix='_df2')
+        expected = DataFrame(
+            np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2),
+            columns=['pnum', 'pnum_df2'], index=df2.sort_index().index)
+        tm.assert_frame_equal(result, expected)
+
     def test_mixed_type_join_with_suffix(self):
         # GH #916
         df = DataFrame(np.random.randn(20, 6),
diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py
index 4ac376a9752cb..919675188576e 100644
--- a/pandas/tests/reshape/test_merge.py
+++ b/pandas/tests/reshape/test_merge.py
@@ -585,6 +585,18 @@ def test_merge_on_datetime64tz(self):
         assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]'
         assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]'
 
+    def test_merge_non_unique_period_index(self):
+        # GH #16871
+        index = pd.period_range('2016-01-01', periods=16, freq='M')
+        df = DataFrame([i for i in range(len(index))],
+                       index=index, columns=['pnum'])
+        df2 = concat([df, df])
+        result = df.merge(df2, left_index=True, right_index=True, how='inner')
+        expected = DataFrame(
+            np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2),
+            columns=['pnum_x', 'pnum_y'], index=df2.sort_index().index)
+        tm.assert_frame_equal(result, expected)
+
     def test_merge_on_periods(self):
         left = pd.DataFrame({'key': pd.period_range('20151010', periods=2,
                                                     freq='D'),

From 8e3d8315d63f61c1cc7a0ea9ad24cdd63b63f6b8 Mon Sep 17 00:00:00 2001
From: Morgan243 <morgansstuart243@gmail.com>
Date: Sat, 15 Jul 2017 19:13:49 -0400
Subject: [PATCH 32/54] BUG: Set secondary axis font size for `secondary_y`
 during plotting

The parameter was not being respected for `secondary_y`.

Closes gh-12565
---
 doc/source/whatsnew/v0.21.0.txt     |  2 +-
 pandas/plotting/_core.py            |  9 +++++++++
 pandas/tests/plotting/test_frame.py | 17 +++++++++++++++++
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 11d3e4cf964aa..df53c4a3d6caf 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -166,7 +166,7 @@ I/O
 
 Plotting
 ^^^^^^^^
-
+- Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`)
 
 
 Groupby/Resample/Rolling
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 9cceebb5c4cdb..a623288efc1ae 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -379,6 +379,11 @@ def _post_plot_logic_common(self, ax, data):
             self._apply_axis_properties(ax.xaxis, rot=self.rot,
                                         fontsize=self.fontsize)
             self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize)
+
+            if hasattr(ax, 'right_ax'):
+                self._apply_axis_properties(ax.right_ax.yaxis,
+                                            fontsize=self.fontsize)
+
         elif self.orientation == 'horizontal':
             if self._need_to_set_index:
                 yticklabels = [labels.get(y, '') for y in ax.get_yticks()]
@@ -386,6 +391,10 @@ def _post_plot_logic_common(self, ax, data):
             self._apply_axis_properties(ax.yaxis, rot=self.rot,
                                         fontsize=self.fontsize)
             self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize)
+
+            if hasattr(ax, 'right_ax'):
+                self._apply_axis_properties(ax.right_ax.yaxis,
+                                            fontsize=self.fontsize)
         else:  # pragma no cover
             raise ValueError
 
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 7878740f64e55..6d813ac76cc4e 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -2733,6 +2733,23 @@ def test_rcParams_bar_colors(self):
                 barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar")
         assert color_tuples == [c.get_facecolor() for c in barplot.patches]
 
+    @pytest.mark.parametrize('method', ['line', 'barh', 'bar'])
+    def test_secondary_axis_font_size(self, method):
+        # GH: 12565
+        df = (pd.DataFrame(np.random.randn(15, 2),
+                           columns=list('AB'))
+              .assign(C=lambda df: df.B.cumsum())
+              .assign(D=lambda df: df.C * 1.1))
+
+        fontsize = 20
+        sy = ['C', 'D']
+
+        kwargs = dict(secondary_y=sy, fontsize=fontsize,
+                      mark_right=True)
+        ax = getattr(df.plot, method)(**kwargs)
+        self._check_ticks_props(axes=ax.right_ax,
+                                ylabelsize=fontsize)
+
 
 def _generate_4_axes_via_gridspec():
     import matplotlib.pyplot as plt

From 4f04d0be1fe22dabaff6c0eeb6162bffb763af46 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 15 Jul 2017 16:04:03 -0500
Subject: [PATCH 33/54] DOC: more whatsnew fixes

---
 doc/source/whatsnew/v0.21.0.txt | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index df53c4a3d6caf..a5d4259480ba8 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -46,11 +46,11 @@ Other Enhancements
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+.. _whatsnew_0210.api_breaking.pandas_eval:
+
 Improved error handling during item assignment in pd.eval
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-.. _whatsnew_0210.api_breaking.pandas_eval:
-
 :func:`eval` will now raise a ``ValueError`` when item assignment malfunctions, or
 inplace operations are specified, but there is no item assignment in the expression (:issue:`16732`)
 
@@ -154,8 +154,8 @@ Indexing
 - When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`).
 - When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`).
 - Fixes regression in 0.20.3 when indexing with a string on a ``TimedeltaIndex`` (:issue:`16896`).
-- Fixed ``TimedeltaIndex.get_loc`` handling of ``np.timedelta64`` inputs (:issue:`16909`).
-- Fix :meth:`MultiIndex.sort_index` ordering when ``ascending`` argument is a list, but not all levels are specified, or are in a different order (:issue:`16934`).
+- Fixed :func:`TimedeltaIndex.get_loc` handling of ``np.timedelta64`` inputs (:issue:`16909`).
+- Fix :func:`MultiIndex.sort_index` ordering when ``ascending`` argument is a list, but not all levels are specified, or are in a different order (:issue:`16934`).
 
 I/O
 ^^^
@@ -172,9 +172,9 @@ Plotting
 Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
-- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
-- Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`)
-- Bug in ``.rolling.quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`)
+- Bug in ``DataFrame.resample(...).size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
+- Bug in :func:`infer_freq` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`)
+- Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`)
 
 
 Sparse
@@ -194,7 +194,7 @@ Numeric
 
 Categorical
 ^^^^^^^^^^^
-- Bug in ``:func:Series.isin()`` when called with a categorical (:issue`16639`)
+- Bug in :func:`Series.isin` when called with a categorical (:issue`16639`)
 
 
 Other

From 1212fe034b7302f40bf253aedd9e3989514eeb52 Mon Sep 17 00:00:00 2001
From: aernlund <awe220@nyumc.org>
Date: Sat, 15 Jul 2017 18:43:02 -0500
Subject: [PATCH 34/54] DOC: Reset index examples

closes #16416

Author: aernlund <awe220@nyumc.org>

Closes #16967 from aernlund/reset_index_docs and squashes the following commits:

3c6a4b6 [aernlund] DOC: added examples to reset_index
4838155 [aernlund] DOC: added examples to reset_index
2a51e2b [aernlund] DOC: added examples to reset_index
---
 pandas/core/frame.py  | 32 ++++++++++++++++++++++++++++++++
 pandas/core/series.py | 31 +++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 9920ddf854850..9a79ca1d4eab1 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3020,6 +3020,38 @@ def reset_index(self, level=None, drop=False, inplace=False, col_level=0,
         Returns
         -------
         resetted : DataFrame
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]},
+        ...                   index=pd.Index(['a', 'b', 'c', 'd'],
+        ...                                  name='idx'))
+        >>> df.reset_index()
+          idx  a  b
+        0   a  1  5
+        1   b  2  6
+        2   c  3  7
+        3   d  4  8
+
+        >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz', 'foo',
+        ...                     'foo', 'qux', 'qux']),
+        ...           np.array(['one', 'two', 'one', 'two', 'one', 'two',
+        ...                     'one', 'two'])]
+        >>> df2 = pd.DataFrame(
+        ...     np.random.randn(8, 4),
+        ...     index=pd.MultiIndex.from_arrays(arrays,
+        ...                                     names=['a', 'b']))
+        >>> df2.reset_index(level='a')
+               a         0         1         2         3
+        b
+        one  bar -1.099413  0.291838  0.598198  0.162181
+        two  bar -0.312184 -0.119904  0.250360  0.364378
+        one  baz  0.713596 -0.490636  0.074967 -0.297857
+        two  baz  0.998397  0.524499 -2.228976  0.901155
+        one  foo  0.923204  0.920695  1.264488  1.476921
+        two  foo -1.566922  0.783278 -0.073656  0.266027
+        one  qux -0.230470  0.109800 -1.383409  0.048421
+        two  qux -0.865993 -0.865984  0.705367 -0.170446
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
         if inplace:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 98b548f8ab3b5..4d5b718ce0ae9 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -948,6 +948,37 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False):
         Returns
         ----------
         resetted : DataFrame, or Series if drop == True
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3, 4], index=pd.Index(['a', 'b', 'c', 'd'],
+        ...                                            name = 'idx'))
+        >>> s.reset_index()
+           index  0
+        0      0  1
+        1      1  2
+        2      2  3
+        3      3  4
+
+        >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz', 'foo',
+        ...                     'foo', 'qux', 'qux']),
+        ...           np.array(['one', 'two', 'one', 'two', 'one', 'two',
+        ...                     'one', 'two'])]
+        >>> s2 = pd.Series(
+        ...     np.random.randn(8),
+        ...     index=pd.MultiIndex.from_arrays(arrays,
+        ...                                     names=['a', 'b']))
+        >>> s2.reset_index(level='a')
+               a         0
+        b
+        one  bar -0.286320
+        two  bar -0.587934
+        one  baz  0.710491
+        two  baz -1.429006
+        one  foo  0.790700
+        two  foo  0.824863
+        one  qux -0.718963
+        two  qux -0.055028
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
         if drop:

From 3524edb82e7945998876591813b7e77fe620ce36 Mon Sep 17 00:00:00 2001
From: Daniel Chen <chendaniely@users.noreply.github.com>
Date: Sat, 15 Jul 2017 18:49:08 -0500
Subject: [PATCH 35/54] channel from pandas to conda-forge (#16966)

---
 doc/source/contributing.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index bfcf560565977..b44d0f36b86a1 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -171,7 +171,7 @@ other dependencies, you can install them as follows::
 
 To install *all* pandas dependencies you can do the following::
 
-      conda install -n pandas_dev -c pandas --file ci/requirements_all.txt
+      conda install -n pandas_dev -c conda-forge --file ci/requirements_all.txt
 
 To work in this environment, Windows users should ``activate`` it as follows::
 

From 53ae390f442e745503745e5fa8ed7b06b72fd102 Mon Sep 17 00:00:00 2001
From: Prasanjit Prakash <prasanjit@gmail.com>
Date: Sun, 16 Jul 2017 06:42:57 +0530
Subject: [PATCH 36/54] BUG: coercing of bools in groupby transform (#16895)

---
 doc/source/whatsnew/v0.21.0.txt        |  2 +-
 pandas/core/dtypes/cast.py             |  7 ++++---
 pandas/tests/dtypes/test_cast.py       |  8 +++++++-
 pandas/tests/groupby/test_transform.py | 13 +++++++++++++
 4 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index a5d4259480ba8..762107a261090 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -175,7 +175,7 @@ Groupby/Resample/Rolling
 - Bug in ``DataFrame.resample(...).size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
 - Bug in :func:`infer_freq` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`)
 - Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`)
-
+- Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`)
 
 Sparse
 ^^^^^^
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 16b0a5c8a74ca..6532e17695c86 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -110,9 +110,7 @@ def trans(x):  # noqa
                     np.prod(result.shape)):
                 return result
 
-        if issubclass(dtype.type, np.floating):
-            return result.astype(dtype)
-        elif is_bool_dtype(dtype) or is_integer_dtype(dtype):
+        if is_bool_dtype(dtype) or is_integer_dtype(dtype):
 
             # if we don't have any elements, just astype it
             if not np.prod(result.shape):
@@ -144,6 +142,9 @@ def trans(x):  # noqa
                     # hit here
                     if (new_result == result).all():
                         return new_result
+        elif (issubclass(dtype.type, np.floating) and
+                not is_bool_dtype(result.dtype)):
+            return result.astype(dtype)
 
         # a datetimelike
         # GH12821, iNaT is casted to float
diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py
index 767e99d98cf29..6e07487b3e04f 100644
--- a/pandas/tests/dtypes/test_cast.py
+++ b/pandas/tests/dtypes/test_cast.py
@@ -9,7 +9,7 @@
 from datetime import datetime, timedelta, date
 import numpy as np
 
-from pandas import Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT
+from pandas import Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT, Series
 
 from pandas.core.dtypes.cast import (
     maybe_downcast_to_dtype,
@@ -45,6 +45,12 @@ def test_downcast_conv(self):
         expected = np.array([8, 8, 8, 8, 9])
         assert (np.array_equal(result, expected))
 
+        # GH16875 coercing of bools
+        ser = Series([True, True, False])
+        result = maybe_downcast_to_dtype(ser, np.dtype(np.float64))
+        expected = ser
+        tm.assert_series_equal(result, expected)
+
         # conversions
 
         expected = np.array([1, 2])
diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index 40434ff510421..98839a17d6e0c 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -195,6 +195,19 @@ def test_transform_bug(self):
         expected = Series(np.arange(5, 0, step=-1), name='B')
         assert_series_equal(result, expected)
 
+    def test_transform_numeric_to_boolean(self):
+        # GH 16875
+        # inconsistency in transforming boolean values
+        expected = pd.Series([True, True], name='A')
+
+        df = pd.DataFrame({'A': [1.1, 2.2], 'B': [1, 2]})
+        result = df.groupby('B').A.transform(lambda x: True)
+        assert_series_equal(result, expected)
+
+        df = pd.DataFrame({'A': [1, 2], 'B': [1, 2]})
+        result = df.groupby('B').A.transform(lambda x: True)
+        assert_series_equal(result, expected)
+
     def test_transform_datetime_to_timedelta(self):
         # GH 15429
         # transforming a datetime to timedelta

From 01a8be3578e9d0b2a66b8318c5477e3e6cfb75f2 Mon Sep 17 00:00:00 2001
From: Andrew <andrew.loucky@gmail.com>
Date: Sat, 15 Jul 2017 21:20:55 -0400
Subject: [PATCH 37/54] DOC: misspelling in DatetimeIndex.indexer_between_time
 [CI skip] (#16963)

---
 pandas/core/indexes/datetimes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index d8aae2367976b..e6bc1790f2992 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -1882,7 +1882,7 @@ def indexer_between_time(self, start_time, end_time, include_start=True,
         Select values between particular times of day (e.g., 9:00-9:30AM).
 
         Return values of the index between two times.  If start_time or
-        end_time are strings then tseres.tools.to_time is used to convert to
+        end_time are strings then tseries.tools.to_time is used to convert to
         a time object.
 
         Parameters

From 148e038bfaf2a3893b52e28b6469cf5984eec794 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 15 Jul 2017 20:42:56 -0500
Subject: [PATCH 38/54] CLN: some residual code removed, xref to #16761
 (#16974)

---
 pandas/core/config_init.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index e70db1d13e376..04563907582ee 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -255,18 +255,6 @@ def use_numexpr_cb(key):
     df.info() (the behaviour in earlier versions of pandas).
 """
 
-pc_mpl_style_doc = """
-: bool
-    Setting this to 'default' will modify the rcParams used by matplotlib
-    to give plots a more pleasing visual style by default.
-    Setting this to None/False restores the values to their initial value.
-"""
-
-pc_mpl_style_deprecation_warning = """
-mpl_style had been deprecated and will be removed in a future version.
-Use `matplotlib.pyplot.style.use` instead.
-"""
-
 pc_memory_usage_doc = """
 : bool, string or None
     This specifies if the memory usage of a DataFrame should be displayed when

From 9c096d29a1e9a68b8151de4896b0d9684383821a Mon Sep 17 00:00:00 2001
From: Iva Miholic <ivamiho@gmail.com>
Date: Sun, 16 Jul 2017 09:04:35 +0100
Subject: [PATCH 39/54] ENH: Create a 'Y' alias for date_range yearly frequency

Closes gh-9313
---
 pandas/tests/tseries/test_frequencies.py | 41 ++++++++++++++++--------
 pandas/tseries/frequencies.py            | 22 +++++++++++++
 2 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py
index 54d12317b0bf8..4bcd0b49db7e0 100644
--- a/pandas/tests/tseries/test_frequencies.py
+++ b/pandas/tests/tseries/test_frequencies.py
@@ -248,9 +248,10 @@ def test_anchored_shortcuts(self):
 
         # ensure invalid cases fail as expected
         invalid_anchors = ['SM-0', 'SM-28', 'SM-29',
-                           'SM-FOO', 'BSM', 'SM--1'
+                           'SM-FOO', 'BSM', 'SM--1',
                            'SMS-1', 'SMS-28', 'SMS-30',
-                           'SMS-BAR', 'BSMS', 'SMS--2']
+                           'SMS-BAR', 'SMS-BYR' 'BSMS',
+                           'SMS--2']
         for invalid_anchor in invalid_anchors:
             with tm.assert_raises_regex(ValueError,
                                         'Invalid frequency: '):
@@ -292,11 +293,15 @@ def test_get_rule_month():
 
     result = frequencies._get_rule_month('A-DEC')
     assert (result == 'DEC')
+    result = frequencies._get_rule_month('Y-DEC')
+    assert (result == 'DEC')
     result = frequencies._get_rule_month(offsets.YearEnd())
     assert (result == 'DEC')
 
     result = frequencies._get_rule_month('A-MAY')
     assert (result == 'MAY')
+    result = frequencies._get_rule_month('Y-MAY')
+    assert (result == 'MAY')
     result = frequencies._get_rule_month(offsets.YearEnd(month=5))
     assert (result == 'MAY')
 
@@ -305,6 +310,10 @@ def test_period_str_to_code():
     assert (frequencies._period_str_to_code('A') == 1000)
     assert (frequencies._period_str_to_code('A-DEC') == 1000)
     assert (frequencies._period_str_to_code('A-JAN') == 1001)
+    assert (frequencies._period_str_to_code('Y') == 1000)
+    assert (frequencies._period_str_to_code('Y-DEC') == 1000)
+    assert (frequencies._period_str_to_code('Y-JAN') == 1001)
+
     assert (frequencies._period_str_to_code('Q') == 2000)
     assert (frequencies._period_str_to_code('Q-DEC') == 2000)
     assert (frequencies._period_str_to_code('Q-FEB') == 2002)
@@ -349,6 +358,10 @@ def test_freq_code(self):
         assert frequencies.get_freq('3A') == 1000
         assert frequencies.get_freq('-1A') == 1000
 
+        assert frequencies.get_freq('Y') == 1000
+        assert frequencies.get_freq('3Y') == 1000
+        assert frequencies.get_freq('-1Y') == 1000
+
         assert frequencies.get_freq('W') == 4000
         assert frequencies.get_freq('W-MON') == 4001
         assert frequencies.get_freq('W-FRI') == 4005
@@ -369,6 +382,13 @@ def test_freq_group(self):
         assert frequencies.get_freq_group('-1A') == 1000
         assert frequencies.get_freq_group('A-JAN') == 1000
         assert frequencies.get_freq_group('A-MAY') == 1000
+
+        assert frequencies.get_freq_group('Y') == 1000
+        assert frequencies.get_freq_group('3Y') == 1000
+        assert frequencies.get_freq_group('-1Y') == 1000
+        assert frequencies.get_freq_group('Y-JAN') == 1000
+        assert frequencies.get_freq_group('Y-MAY') == 1000
+
         assert frequencies.get_freq_group(offsets.YearEnd()) == 1000
         assert frequencies.get_freq_group(offsets.YearEnd(month=1)) == 1000
         assert frequencies.get_freq_group(offsets.YearEnd(month=5)) == 1000
@@ -790,12 +810,6 @@ def test_series(self):
         for freq in [None, 'L']:
             s = Series(period_range('2013', periods=10, freq=freq))
             pytest.raises(TypeError, lambda: frequencies.infer_freq(s))
-        for freq in ['Y']:
-
-            msg = frequencies._INVALID_FREQ_ERROR
-            with tm.assert_raises_regex(ValueError, msg):
-                s = Series(period_range('2013', periods=10, freq=freq))
-            pytest.raises(TypeError, lambda: frequencies.infer_freq(s))
 
         # DateTimeIndex
         for freq in ['M', 'L', 'S']:
@@ -812,11 +826,12 @@ def test_legacy_offset_warnings(self):
                  'W@FRI', 'W@SAT', 'W@SUN', 'Q@JAN', 'Q@FEB', 'Q@MAR',
                  'A@JAN', 'A@FEB', 'A@MAR', 'A@APR', 'A@MAY', 'A@JUN',
                  'A@JUL', 'A@AUG', 'A@SEP', 'A@OCT', 'A@NOV', 'A@DEC',
-                 'WOM@1MON', 'WOM@2MON', 'WOM@3MON', 'WOM@4MON',
-                 'WOM@1TUE', 'WOM@2TUE', 'WOM@3TUE', 'WOM@4TUE',
-                 'WOM@1WED', 'WOM@2WED', 'WOM@3WED', 'WOM@4WED',
-                 'WOM@1THU', 'WOM@2THU', 'WOM@3THU', 'WOM@4THU'
-                 'WOM@1FRI', 'WOM@2FRI', 'WOM@3FRI', 'WOM@4FRI']
+                 'Y@JAN', 'WOM@1MON', 'WOM@2MON', 'WOM@3MON',
+                 'WOM@4MON', 'WOM@1TUE', 'WOM@2TUE', 'WOM@3TUE',
+                 'WOM@4TUE', 'WOM@1WED', 'WOM@2WED', 'WOM@3WED',
+                 'WOM@4WED', 'WOM@1THU', 'WOM@2THU', 'WOM@3THU',
+                 'WOM@4THU', 'WOM@1FRI', 'WOM@2FRI', 'WOM@3FRI',
+                 'WOM@4FRI']
 
         msg = frequencies._INVALID_FREQ_ERROR
         for freq in freqs:
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index c5f6c00a4005a..5c3c90520d1c3 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -422,6 +422,27 @@ def get_period_alias(offset_str):
     return _offset_to_period_map.get(offset_str, None)
 
 
+_pure_alias = {
+    # 'A' is equivalent to 'Y'.
+    'Y': 'A',
+    'YS': 'AS',
+    'BY': 'BA',
+    'BYS': 'BAS',
+    'Y-DEC': 'A-DEC',
+    'Y-JAN': 'A-JAN',
+    'Y-FEB': 'A-FEB',
+    'Y-MAR': 'A-MAR',
+    'Y-APR': 'A-APR',
+    'Y-MAY': 'A-MAY',
+    'Y-JUN': 'A-JUN',
+    'Y-JUL': 'A-JUL',
+    'Y-AUG': 'A-AUG',
+    'Y-SEP': 'A-SEP',
+    'Y-OCT': 'A-OCT',
+    'Y-NOV': 'A-NOV',
+}
+
+
 _lite_rule_alias = {
     'W': 'W-SUN',
     'Q': 'Q-DEC',
@@ -718,6 +739,7 @@ def get_standard_freq(freq):
 
 
 def _period_str_to_code(freqstr):
+    freqstr = _pure_alias.get(freqstr, freqstr)
     freqstr = _lite_rule_alias.get(freqstr, freqstr)
 
     if freqstr not in _dont_uppercase:

From 7ffe7fc21f3dc4ca444de9c83dbf61313b6986e2 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Sun, 16 Jul 2017 02:57:14 -0700
Subject: [PATCH 40/54] Revert "ENH: Create a 'Y' alias for date_range yearly
 frequency" (#16976)

This reverts commit 9c096d29a1e9a68b8151de4896b0d9684383821a, as it was prematurely made.
---
 pandas/tests/tseries/test_frequencies.py | 41 ++++++++----------------
 pandas/tseries/frequencies.py            | 22 -------------
 2 files changed, 13 insertions(+), 50 deletions(-)

diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py
index 4bcd0b49db7e0..54d12317b0bf8 100644
--- a/pandas/tests/tseries/test_frequencies.py
+++ b/pandas/tests/tseries/test_frequencies.py
@@ -248,10 +248,9 @@ def test_anchored_shortcuts(self):
 
         # ensure invalid cases fail as expected
         invalid_anchors = ['SM-0', 'SM-28', 'SM-29',
-                           'SM-FOO', 'BSM', 'SM--1',
+                           'SM-FOO', 'BSM', 'SM--1'
                            'SMS-1', 'SMS-28', 'SMS-30',
-                           'SMS-BAR', 'SMS-BYR' 'BSMS',
-                           'SMS--2']
+                           'SMS-BAR', 'BSMS', 'SMS--2']
         for invalid_anchor in invalid_anchors:
             with tm.assert_raises_regex(ValueError,
                                         'Invalid frequency: '):
@@ -293,15 +292,11 @@ def test_get_rule_month():
 
     result = frequencies._get_rule_month('A-DEC')
     assert (result == 'DEC')
-    result = frequencies._get_rule_month('Y-DEC')
-    assert (result == 'DEC')
     result = frequencies._get_rule_month(offsets.YearEnd())
     assert (result == 'DEC')
 
     result = frequencies._get_rule_month('A-MAY')
     assert (result == 'MAY')
-    result = frequencies._get_rule_month('Y-MAY')
-    assert (result == 'MAY')
     result = frequencies._get_rule_month(offsets.YearEnd(month=5))
     assert (result == 'MAY')
 
@@ -310,10 +305,6 @@ def test_period_str_to_code():
     assert (frequencies._period_str_to_code('A') == 1000)
     assert (frequencies._period_str_to_code('A-DEC') == 1000)
     assert (frequencies._period_str_to_code('A-JAN') == 1001)
-    assert (frequencies._period_str_to_code('Y') == 1000)
-    assert (frequencies._period_str_to_code('Y-DEC') == 1000)
-    assert (frequencies._period_str_to_code('Y-JAN') == 1001)
-
     assert (frequencies._period_str_to_code('Q') == 2000)
     assert (frequencies._period_str_to_code('Q-DEC') == 2000)
     assert (frequencies._period_str_to_code('Q-FEB') == 2002)
@@ -358,10 +349,6 @@ def test_freq_code(self):
         assert frequencies.get_freq('3A') == 1000
         assert frequencies.get_freq('-1A') == 1000
 
-        assert frequencies.get_freq('Y') == 1000
-        assert frequencies.get_freq('3Y') == 1000
-        assert frequencies.get_freq('-1Y') == 1000
-
         assert frequencies.get_freq('W') == 4000
         assert frequencies.get_freq('W-MON') == 4001
         assert frequencies.get_freq('W-FRI') == 4005
@@ -382,13 +369,6 @@ def test_freq_group(self):
         assert frequencies.get_freq_group('-1A') == 1000
         assert frequencies.get_freq_group('A-JAN') == 1000
         assert frequencies.get_freq_group('A-MAY') == 1000
-
-        assert frequencies.get_freq_group('Y') == 1000
-        assert frequencies.get_freq_group('3Y') == 1000
-        assert frequencies.get_freq_group('-1Y') == 1000
-        assert frequencies.get_freq_group('Y-JAN') == 1000
-        assert frequencies.get_freq_group('Y-MAY') == 1000
-
         assert frequencies.get_freq_group(offsets.YearEnd()) == 1000
         assert frequencies.get_freq_group(offsets.YearEnd(month=1)) == 1000
         assert frequencies.get_freq_group(offsets.YearEnd(month=5)) == 1000
@@ -810,6 +790,12 @@ def test_series(self):
         for freq in [None, 'L']:
             s = Series(period_range('2013', periods=10, freq=freq))
             pytest.raises(TypeError, lambda: frequencies.infer_freq(s))
+        for freq in ['Y']:
+
+            msg = frequencies._INVALID_FREQ_ERROR
+            with tm.assert_raises_regex(ValueError, msg):
+                s = Series(period_range('2013', periods=10, freq=freq))
+            pytest.raises(TypeError, lambda: frequencies.infer_freq(s))
 
         # DateTimeIndex
         for freq in ['M', 'L', 'S']:
@@ -826,12 +812,11 @@ def test_legacy_offset_warnings(self):
                  'W@FRI', 'W@SAT', 'W@SUN', 'Q@JAN', 'Q@FEB', 'Q@MAR',
                  'A@JAN', 'A@FEB', 'A@MAR', 'A@APR', 'A@MAY', 'A@JUN',
                  'A@JUL', 'A@AUG', 'A@SEP', 'A@OCT', 'A@NOV', 'A@DEC',
-                 'Y@JAN', 'WOM@1MON', 'WOM@2MON', 'WOM@3MON',
-                 'WOM@4MON', 'WOM@1TUE', 'WOM@2TUE', 'WOM@3TUE',
-                 'WOM@4TUE', 'WOM@1WED', 'WOM@2WED', 'WOM@3WED',
-                 'WOM@4WED', 'WOM@1THU', 'WOM@2THU', 'WOM@3THU',
-                 'WOM@4THU', 'WOM@1FRI', 'WOM@2FRI', 'WOM@3FRI',
-                 'WOM@4FRI']
+                 'WOM@1MON', 'WOM@2MON', 'WOM@3MON', 'WOM@4MON',
+                 'WOM@1TUE', 'WOM@2TUE', 'WOM@3TUE', 'WOM@4TUE',
+                 'WOM@1WED', 'WOM@2WED', 'WOM@3WED', 'WOM@4WED',
+                 'WOM@1THU', 'WOM@2THU', 'WOM@3THU', 'WOM@4THU'
+                 'WOM@1FRI', 'WOM@2FRI', 'WOM@3FRI', 'WOM@4FRI']
 
         msg = frequencies._INVALID_FREQ_ERROR
         for freq in freqs:
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index 5c3c90520d1c3..c5f6c00a4005a 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -422,27 +422,6 @@ def get_period_alias(offset_str):
     return _offset_to_period_map.get(offset_str, None)
 
 
-_pure_alias = {
-    # 'A' is equivalent to 'Y'.
-    'Y': 'A',
-    'YS': 'AS',
-    'BY': 'BA',
-    'BYS': 'BAS',
-    'Y-DEC': 'A-DEC',
-    'Y-JAN': 'A-JAN',
-    'Y-FEB': 'A-FEB',
-    'Y-MAR': 'A-MAR',
-    'Y-APR': 'A-APR',
-    'Y-MAY': 'A-MAY',
-    'Y-JUN': 'A-JUN',
-    'Y-JUL': 'A-JUL',
-    'Y-AUG': 'A-AUG',
-    'Y-SEP': 'A-SEP',
-    'Y-OCT': 'A-OCT',
-    'Y-NOV': 'A-NOV',
-}
-
-
 _lite_rule_alias = {
     'W': 'W-SUN',
     'Q': 'Q-DEC',
@@ -739,7 +718,6 @@ def get_standard_freq(freq):
 
 
 def _period_str_to_code(freqstr):
-    freqstr = _pure_alias.get(freqstr, freqstr)
     freqstr = _lite_rule_alias.get(freqstr, freqstr)
 
     if freqstr not in _dont_uppercase:

From 1d1c03ef807b5ea3cd589b60ea578c88a0c1227c Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Sun, 16 Jul 2017 17:23:30 +0200
Subject: [PATCH 41/54] DOC: behavior when slicing with missing bounds (#16932)

closes #16917
---
 doc/source/indexing.rst | 41 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index f988fb7cd6806..1659d57b33b84 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -78,8 +78,10 @@ of multi-axis indexing.
     *label* of the index. This use is **not** an integer position along the
     index)
   - A list or array of labels ``['a', 'b', 'c']``
-  - A slice object with labels ``'a':'f'``, (note that contrary to usual python
-    slices, **both** the start and the stop are included!)
+  - A slice object with labels ``'a':'f'`` (note that contrary to usual python
+    slices, **both** the start and the stop are included, when present in the
+    index! - also see :ref:`Slicing with labels
+    <indexing.slicing_with_labels>`)
   - A boolean array
   - A ``callable`` function with one argument (the calling Series, DataFrame or Panel) and
     that returns valid output for indexing (one of the above)
@@ -330,13 +332,16 @@ Selection By Label
      dfl.loc['20130102':'20130104']
 
 pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol.
-**At least 1** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, the start bound is *included*, **AND** the stop bound is *included*. Integers are valid labels, but they refer to the label **and not the position**.
+**At least 1** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, both the start bound **AND** the stop bound are *included*, if present in the index. Integers are valid labels, but they refer to the label **and not the position**.
 
 The ``.loc`` attribute is the primary access method. The following are valid inputs:
 
 - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is interpreted as a *label* of the index. This use is **not** an integer position along the index)
 - A list or array of labels ``['a', 'b', 'c']``
-- A slice object with labels ``'a':'f'`` (note that contrary to usual python slices, **both** the start and the stop are included!)
+- A slice object with labels ``'a':'f'`` (note that contrary to usual python
+  slices, **both** the start and the stop are included, when present in the
+  index! - also See :ref:`Slicing with labels
+  <indexing.slicing_with_labels>`)
 - A boolean array
 - A ``callable``, see :ref:`Selection By Callable <indexing.callable>`
 
@@ -390,6 +395,34 @@ For getting a value explicitly (equiv to deprecated ``df.get_value('a','A')``)
    # this is also equivalent to ``df1.at['a','A']``
    df1.loc['a', 'A']
 
+.. _indexing.slicing_with_labels:
+
+Slicing with labels
+~~~~~~~~~~~~~~~~~~~
+
+When using ``.loc`` with slices, if both the start and the stop labels are
+present in the index, then elements *located* between the two (including them)
+are returned:
+
+.. ipython:: python
+
+   s = pd.Series(list('abcde'), index=[0,3,2,5,4])
+   s.loc[3:5]
+
+If at least one of the two is absent, but the index is sorted, and can be
+compared against start and stop labels, then slicing will still work as
+expected, by selecting labels which *rank* between the two:
+
+.. ipython:: python
+
+   s.sort_index()
+   s.sort_index().loc[1:6]
+
+However, if at least one of the two is absent *and* the index is not sorted, an
+error will be raised (since doing otherwise would be computationally expensive,
+as well as potentially ambiguous for mixed type indexes). For instance, in the
+above example, ``s.loc[1:6]`` would raise ``KeyError``.
+
 .. _indexing.integer:
 
 Selection By Position

From 745c01265e31afb9048fe461dfd8c88ad2606702 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Sun, 16 Jul 2017 08:31:12 -0700
Subject: [PATCH 42/54] TST: Add test for sub-char in read_csv (#16977)

Closes gh-16893.
---
 pandas/tests/io/parser/common.py         | 10 ++++++++++
 pandas/tests/io/parser/data/sub_char.csv |  2 ++
 2 files changed, 12 insertions(+)
 create mode 100644 pandas/tests/io/parser/data/sub_char.csv

diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index 584a6561b505b..4d1f9936af983 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -1677,6 +1677,16 @@ def test_internal_eof_byte_to_file(self):
             result = self.read_csv(path)
             tm.assert_frame_equal(result, expected)
 
+    def test_sub_character(self):
+        # see gh-16893
+        dirpath = tm.get_data_path()
+        filename = os.path.join(dirpath, "sub_char.csv")
+
+        expected = DataFrame([[1, 2, 3]], columns=["a", "\x1ab", "c"])
+        result = self.read_csv(filename)
+
+        tm.assert_frame_equal(result, expected)
+
     def test_file_handles(self):
         # GH 14418 - don't close user provided file handles
 
diff --git a/pandas/tests/io/parser/data/sub_char.csv b/pandas/tests/io/parser/data/sub_char.csv
new file mode 100644
index 0000000000000..ff1fa777832c7
--- /dev/null
+++ b/pandas/tests/io/parser/data/sub_char.csv
@@ -0,0 +1,2 @@
+a,"b",c
+1,2,3
\ No newline at end of file

From cbd0354d024d6d45c67fceab69f908eb51339f70 Mon Sep 17 00:00:00 2001
From: rdk1024 <kackley@hawaii.edu>
Date: Sun, 16 Jul 2017 05:32:45 -1000
Subject: [PATCH 43/54] DEPR: deprecate html.border option (#16970)

---
 doc/source/options.rst                  |  2 +-
 doc/source/whatsnew/v0.21.0.txt         |  1 +
 pandas/core/config_init.py              | 22 ++++++++++++++++------
 pandas/io/formats/format.py             |  2 +-
 pandas/tests/io/formats/test_to_html.py |  7 ++++++-
 5 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/doc/source/options.rst b/doc/source/options.rst
index 6ff5b76014c95..f373705a96f48 100644
--- a/doc/source/options.rst
+++ b/doc/source/options.rst
@@ -400,7 +400,7 @@ display.width                       80           Width of the display in charact
 display.html.table_schema           False        Whether to publish a Table Schema
                                                  representation for frontends that
                                                  support it.
-html.border                         1            A ``border=value`` attribute is
+display.html.border                 1            A ``border=value`` attribute is
                                                  inserted in the ``<table>`` tag
                                                  for the DataFrame HTML repr.
 io.excel.xls.writer                 xlwt         The default Excel writer engine for
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 762107a261090..7c52cf6f450b2 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -116,6 +116,7 @@ Deprecations
 ~~~~~~~~~~~~
 - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`).
 
+- ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
 
 .. _whatsnew_0210.prior_deprecations:
 
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 04563907582ee..ae3001564a62f 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -202,6 +202,17 @@ def use_numexpr_cb(key):
     (default: False)
 """
 
+pc_html_border_doc = """
+: int
+    A ``border=value`` attribute is inserted in the ``<table>`` tag
+    for the DataFrame HTML repr.
+"""
+
+pc_html_border_deprecation_warning = """\
+html.border has been deprecated, use display.html.border instead
+(currently both are identical)
+"""
+
 pc_line_width_deprecation_warning = """\
 line_width has been deprecated, use display.width instead (currently both are
 identical)
@@ -369,6 +380,8 @@ def table_schema_cb(key):
                        validator=is_bool)
     cf.register_option('html.table_schema', False, pc_table_schema_doc,
                        validator=is_bool, cb=table_schema_cb)
+    cf.register_option('html.border', 1, pc_html_border_doc,
+                       validator=is_int)
 
 
 cf.deprecate_option('display.line_width',
@@ -378,16 +391,13 @@ def table_schema_cb(key):
 cf.deprecate_option('display.height', msg=pc_height_deprecation_warning,
                     rkey='display.max_rows')
 
-pc_html_border_doc = """
-: int
-    A ``border=value`` attribute is inserted in the ``<table>`` tag
-    for the DataFrame HTML repr.
-"""
-
 with cf.config_prefix('html'):
     cf.register_option('border', 1, pc_html_border_doc,
                        validator=is_int)
 
+cf.deprecate_option('html.border', msg=pc_html_border_deprecation_warning,
+                    rkey='display.html.border')
+
 
 tc_sim_interactive_doc = """
 : boolean
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 0627ca9179509..23eb3bb05fd0a 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1064,7 +1064,7 @@ def __init__(self, formatter, classes=None, max_rows=None, max_cols=None,
                              self.max_cols < len(self.fmt.columns))
         self.notebook = notebook
         if border is None:
-            border = get_option('html.border')
+            border = get_option('display.html.border')
         self.border = border
 
     def write(self, s, indent=0):
diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py
index 9f4e532ec2287..1e174c34221d5 100644
--- a/pandas/tests/io/formats/test_to_html.py
+++ b/pandas/tests/io/formats/test_to_html.py
@@ -1401,7 +1401,7 @@ def test_to_html_border(self):
 
     def test_to_html_border_option(self):
         df = DataFrame({'A': [1, 2]})
-        with pd.option_context('html.border', 0):
+        with pd.option_context('display.html.border', 0):
             result = df.to_html()
             assert 'border="0"' in result
             assert 'border="0"' in df._repr_html_()
@@ -1411,6 +1411,11 @@ def test_to_html_border_zero(self):
         result = df.to_html(border=0)
         assert 'border="0"' in result
 
+    def test_display_option_warning(self):
+        with tm.assert_produces_warning(DeprecationWarning,
+                                        check_stacklevel=False):
+            pd.options.html.border
+
     def test_to_html(self):
         # big mixed
         biggie = DataFrame({'A': np.random.randn(200),

From 692b5eeeff9b8e8c750f3e64db0c39dc149a73e8 Mon Sep 17 00:00:00 2001
From: fding253 <fding253@users.noreply.github.com>
Date: Sun, 16 Jul 2017 10:55:33 -0500
Subject: [PATCH 44/54] DOC: document convention argument for resample()
 (#16965)

* DOC: document convention argument for resample()
---
 pandas/core/generic.py | 43 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a4bb746722c1e..e4e2e0093b1a6 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -4826,6 +4826,8 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
         label : {'right', 'left'}
             Which bin edge label to label bucket with
         convention : {'start', 'end', 's', 'e'}
+            For PeriodIndex only, controls whether to use the start or end of
+            `rule`
         loffset : timedelta
             Adjust the resampled time labels
         base : int, default 0
@@ -4946,6 +4948,47 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
         2000-01-01 00:06:00    26
         Freq: 3T, dtype: int64
 
+        For a Series with a PeriodIndex, the keyword `convention` can be
+        used to control whether to use the start or end of `rule`.
+
+        >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01',
+                                                        freq='A',
+                                                        periods=2))
+        >>> s
+        2012    1
+        2013    2
+        Freq: A-DEC, dtype: int64
+
+        Resample by month using 'start' `convention`. Values are assigned to
+        the first month of the period.
+
+        >>> s.resample('M', convention='start').asfreq().head()
+        2012-01    1.0
+        2012-02    NaN
+        2012-03    NaN
+        2012-04    NaN
+        2012-05    NaN
+        Freq: M, dtype: float64
+
+        Resample by month using 'end' `convention`. Values are assigned to
+        the last month of the period.
+
+        >>> s.resample('M', convention='end').asfreq()
+        2012-12    1.0
+        2013-01    NaN
+        2013-02    NaN
+        2013-03    NaN
+        2013-04    NaN
+        2013-05    NaN
+        2013-06    NaN
+        2013-07    NaN
+        2013-08    NaN
+        2013-09    NaN
+        2013-10    NaN
+        2013-11    NaN
+        2013-12    2.0
+        Freq: M, dtype: float64
+
         For DataFrame objects, the keyword ``on`` can be used to specify the
         column instead of the index for resampling.
 

From ea487fc9b197285f25b066450c46fc456db09e2a Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Sun, 16 Jul 2017 23:19:27 -0700
Subject: [PATCH 45/54] DOC: Clarify 'it' in aggregate doc (#16989)

Closes gh-16988.
---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index e4e2e0093b1a6..f12592feaa4c3 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3152,7 +3152,7 @@ def pipe(self, func, *args, **kwargs):
     (e.g., np.mean(arr_2d, axis=0)) as opposed to
     mimicking the default Numpy behavior (e.g., np.mean(arr_2d)).
 
-    agg is an alias for aggregate. Use it.
+    `agg` is an alias for `aggregate`. Use the alias.
 
     Returns
     -------

From ec927a47e472eebb5ba7086dcc15f3dda1c832cd Mon Sep 17 00:00:00 2001
From: cclauss <cclauss@bluewin.ch>
Date: Mon, 17 Jul 2017 14:59:14 +0200
Subject: [PATCH 46/54] CLN/COMPAT: for various py2/py3 in doc/bench scripts
 (#16984)

---
 asv_bench/vbench_to_asv.py                    |   6 +-
 bench/alignment.py                            |  22 --
 bench/bench_dense_to_sparse.py                |  14 -
 bench/bench_get_put_value.py                  |  56 ----
 bench/bench_groupby.py                        |  66 -----
 bench/bench_join_panel.py                     |  85 ------
 bench/bench_khash_dict.py                     |  89 ------
 bench/bench_merge.R                           | 161 ----------
 bench/bench_merge.py                          | 105 -------
 bench/bench_merge_sqlite.py                   |  87 ------
 bench/bench_pivot.R                           |  27 --
 bench/bench_pivot.py                          |  16 -
 bench/bench_take_indexing.py                  |  55 ----
 bench/bench_unique.py                         | 278 ------------------
 bench/bench_with_subset.R                     |  53 ----
 bench/bench_with_subset.py                    | 116 --------
 bench/better_unique.py                        |  80 -----
 bench/duplicated.R                            |  22 --
 bench/io_roundtrip.py                         | 116 --------
 bench/larry.py                                |   0
 bench/serialize.py                            |  89 ------
 bench/test.py                                 |  70 -----
 bench/zoo_bench.R                             |  71 -----
 bench/zoo_bench.py                            |  36 ---
 doc/source/conf.py                            |   5 +
 .../ipython_sphinxext/ipython_directive.py    |   4 +-
 scripts/find_commits_touching_func.py         |  10 +-
 scripts/windows_builder/build_27-32.bat       |  25 --
 scripts/windows_builder/build_27-64.bat       |  25 --
 scripts/windows_builder/build_34-32.bat       |  27 --
 scripts/windows_builder/build_34-64.bat       |  27 --
 scripts/windows_builder/check_and_build.bat   |   2 -
 scripts/windows_builder/check_and_build.py    | 194 ------------
 scripts/windows_builder/readme.txt            |  17 --
 34 files changed, 14 insertions(+), 2042 deletions(-)
 delete mode 100644 bench/alignment.py
 delete mode 100644 bench/bench_dense_to_sparse.py
 delete mode 100644 bench/bench_get_put_value.py
 delete mode 100644 bench/bench_groupby.py
 delete mode 100644 bench/bench_join_panel.py
 delete mode 100644 bench/bench_khash_dict.py
 delete mode 100644 bench/bench_merge.R
 delete mode 100644 bench/bench_merge.py
 delete mode 100644 bench/bench_merge_sqlite.py
 delete mode 100644 bench/bench_pivot.R
 delete mode 100644 bench/bench_pivot.py
 delete mode 100644 bench/bench_take_indexing.py
 delete mode 100644 bench/bench_unique.py
 delete mode 100644 bench/bench_with_subset.R
 delete mode 100644 bench/bench_with_subset.py
 delete mode 100644 bench/better_unique.py
 delete mode 100644 bench/duplicated.R
 delete mode 100644 bench/io_roundtrip.py
 delete mode 100644 bench/larry.py
 delete mode 100644 bench/serialize.py
 delete mode 100644 bench/test.py
 delete mode 100644 bench/zoo_bench.R
 delete mode 100644 bench/zoo_bench.py
 delete mode 100644 scripts/windows_builder/build_27-32.bat
 delete mode 100644 scripts/windows_builder/build_27-64.bat
 delete mode 100644 scripts/windows_builder/build_34-32.bat
 delete mode 100644 scripts/windows_builder/build_34-64.bat
 delete mode 100644 scripts/windows_builder/check_and_build.bat
 delete mode 100644 scripts/windows_builder/check_and_build.py
 delete mode 100644 scripts/windows_builder/readme.txt

diff --git a/asv_bench/vbench_to_asv.py b/asv_bench/vbench_to_asv.py
index c3041ec2b1ba1..2a4ce5d183ea2 100644
--- a/asv_bench/vbench_to_asv.py
+++ b/asv_bench/vbench_to_asv.py
@@ -114,7 +114,7 @@ def translate_module(target_module):
     l_vars = {}
     exec('import ' + target_module) in g_vars
 
-    print target_module
+    print(target_module)
     module = eval(target_module, g_vars)
 
     benchmarks = []
@@ -157,7 +157,7 @@ def translate_module(target_module):
         mod = os.path.basename(module)
         if mod in ['make.py', 'measure_memory_consumption.py', 'perf_HEAD.py', 'run_suite.py', 'test_perf.py', 'generate_rst_files.py', 'test.py', 'suite.py']:
             continue
-        print
-        print mod
+        print('')
+        print(mod)
 
         translate_module(mod.replace('.py', ''))
diff --git a/bench/alignment.py b/bench/alignment.py
deleted file mode 100644
index bc3134f597ee0..0000000000000
--- a/bench/alignment.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Setup
-from pandas.compat import range, lrange
-import numpy as np
-import pandas
-import la
-N = 1000
-K = 50
-arr1 = np.random.randn(N, K)
-arr2 = np.random.randn(N, K)
-idx1 = lrange(N)
-idx2 = lrange(K)
-
-# pandas
-dma1 = pandas.DataFrame(arr1, idx1, idx2)
-dma2 = pandas.DataFrame(arr2, idx1[::-1], idx2[::-1])
-
-# larry
-lar1 = la.larry(arr1, [idx1, idx2])
-lar2 = la.larry(arr2, [idx1[::-1], idx2[::-1]])
-
-for i in range(100):
-    result = lar1 + lar2
diff --git a/bench/bench_dense_to_sparse.py b/bench/bench_dense_to_sparse.py
deleted file mode 100644
index e1dcd3456e88d..0000000000000
--- a/bench/bench_dense_to_sparse.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from pandas import *
-
-K = 100
-N = 100000
-rng = DatetimeIndex('1/1/2000', periods=N, offset=datetools.Minute())
-
-rng2 = np.asarray(rng).astype('M8[us]').astype('i8')
-
-series = {}
-for i in range(1, K + 1):
-    data = np.random.randn(N)[:-i]
-    this_rng = rng2[:-i]
-    data[100:] = np.nan
-    series[i] = SparseSeries(data, index=this_rng)
diff --git a/bench/bench_get_put_value.py b/bench/bench_get_put_value.py
deleted file mode 100644
index 427e0b1b10a22..0000000000000
--- a/bench/bench_get_put_value.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from pandas import *
-from pandas.util.testing import rands
-from pandas.compat import range
-
-N = 1000
-K = 50
-
-
-def _random_index(howmany):
-    return Index([rands(10) for _ in range(howmany)])
-
-df = DataFrame(np.random.randn(N, K), index=_random_index(N),
-               columns=_random_index(K))
-
-
-def get1():
-    for col in df.columns:
-        for row in df.index:
-            _ = df[col][row]
-
-
-def get2():
-    for col in df.columns:
-        for row in df.index:
-            _ = df.get_value(row, col)
-
-
-def put1():
-    for col in df.columns:
-        for row in df.index:
-            df[col][row] = 0
-
-
-def put2():
-    for col in df.columns:
-        for row in df.index:
-            df.set_value(row, col, 0)
-
-
-def resize1():
-    buf = DataFrame()
-    for col in df.columns:
-        for row in df.index:
-            buf = buf.set_value(row, col, 5.)
-    return buf
-
-
-def resize2():
-    from collections import defaultdict
-
-    buf = defaultdict(dict)
-    for col in df.columns:
-        for row in df.index:
-            buf[col][row] = 5.
-
-    return DataFrame(buf)
diff --git a/bench/bench_groupby.py b/bench/bench_groupby.py
deleted file mode 100644
index d7a2853e1e7b2..0000000000000
--- a/bench/bench_groupby.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from pandas import *
-from pandas.util.testing import rands
-from pandas.compat import range
-
-import string
-import random
-
-k = 20000
-n = 10
-
-foo = np.tile(np.array([rands(10) for _ in range(k)], dtype='O'), n)
-foo2 = list(foo)
-random.shuffle(foo)
-random.shuffle(foo2)
-
-df = DataFrame({'A': foo,
-                'B': foo2,
-                'C': np.random.randn(n * k)})
-
-import pandas._sandbox as sbx
-
-
-def f():
-    table = sbx.StringHashTable(len(df))
-    ret = table.factorize(df['A'])
-    return ret
-
-
-def g():
-    table = sbx.PyObjectHashTable(len(df))
-    ret = table.factorize(df['A'])
-    return ret
-
-ret = f()
-
-"""
-import pandas._tseries as lib
-
-f = np.std
-
-
-grouped = df.groupby(['A', 'B'])
-
-label_list = [ping.labels for ping in grouped.groupings]
-shape = [len(ping.ids) for ping in grouped.groupings]
-
-from pandas.core.groupby import get_group_index
-
-
-group_index = get_group_index(label_list, shape,
-                              sort=True, xnull=True).astype('i4')
-
-ngroups = np.prod(shape)
-
-indexer = lib.groupsort_indexer(group_index, ngroups)
-
-values = df['C'].values.take(indexer)
-group_index = group_index.take(indexer)
-
-f = lambda x: x.std(ddof=1)
-
-grouper = lib.Grouper(df['C'], np.ndarray.std, group_index, ngroups)
-result = grouper.get_result()
-
-expected = grouped.std()
-"""
diff --git a/bench/bench_join_panel.py b/bench/bench_join_panel.py
deleted file mode 100644
index f3c3f8ba15f70..0000000000000
--- a/bench/bench_join_panel.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# reasonably efficient
-
-
-def create_panels_append(cls, panels):
-        """ return an append list of panels """
-        panels = [a for a in panels if a is not None]
-        # corner cases
-        if len(panels) == 0:
-                return None
-        elif len(panels) == 1:
-                return panels[0]
-        elif len(panels) == 2 and panels[0] == panels[1]:
-                return panels[0]
-        # import pdb; pdb.set_trace()
-        # create a joint index for the axis
-
-        def joint_index_for_axis(panels, axis):
-                s = set()
-                for p in panels:
-                        s.update(list(getattr(p, axis)))
-                return sorted(list(s))
-
-        def reindex_on_axis(panels, axis, axis_reindex):
-                new_axis = joint_index_for_axis(panels, axis)
-                new_panels = [p.reindex(**{axis_reindex: new_axis,
-                                        'copy': False}) for p in panels]
-                return new_panels, new_axis
-        # create the joint major index, dont' reindex the sub-panels - we are
-        # appending
-        major = joint_index_for_axis(panels, 'major_axis')
-        # reindex on minor axis
-        panels, minor = reindex_on_axis(panels, 'minor_axis', 'minor')
-        # reindex on items
-        panels, items = reindex_on_axis(panels, 'items', 'items')
-        # concatenate values
-        try:
-                values = np.concatenate([p.values for p in panels], axis=1)
-        except Exception as detail:
-                raise Exception("cannot append values that dont' match dimensions! -> [%s] %s"
-                                % (','.join(["%s" % p for p in panels]), str(detail)))
-        # pm('append - create_panel')
-        p = Panel(values, items=items, major_axis=major,
-                  minor_axis=minor)
-        # pm('append - done')
-        return p
-
-
-# does the job but inefficient (better to handle like you read a table in
-# pytables...e.g create a LongPanel then convert to Wide)
-def create_panels_join(cls, panels):
-        """ given an array of panels's, create a single panel """
-        panels = [a for a in panels if a is not None]
-        # corner cases
-        if len(panels) == 0:
-                return None
-        elif len(panels) == 1:
-                return panels[0]
-        elif len(panels) == 2 and panels[0] == panels[1]:
-                return panels[0]
-        d = dict()
-        minor, major, items = set(), set(), set()
-        for panel in panels:
-                items.update(panel.items)
-                major.update(panel.major_axis)
-                minor.update(panel.minor_axis)
-                values = panel.values
-                for item, item_index in panel.items.indexMap.items():
-                        for minor_i, minor_index in panel.minor_axis.indexMap.items():
-                                for major_i, major_index in panel.major_axis.indexMap.items():
-                                        try:
-                                                d[(minor_i, major_i, item)] = values[item_index, major_index, minor_index]
-                                        except:
-                                                pass
-        # stack the values
-        minor = sorted(list(minor))
-        major = sorted(list(major))
-        items = sorted(list(items))
-        # create the 3d stack (items x columns x indicies)
-        data = np.dstack([np.asarray([np.asarray([d.get((minor_i, major_i, item), np.nan)
-                                                  for item in items])
-                                      for major_i in major]).transpose()
-                          for minor_i in minor])
-        # construct the panel
-        return Panel(data, items, major, minor)
-add_class_method(Panel, create_panels_join, 'join_many')
diff --git a/bench/bench_khash_dict.py b/bench/bench_khash_dict.py
deleted file mode 100644
index 054fc36131b65..0000000000000
--- a/bench/bench_khash_dict.py
+++ /dev/null
@@ -1,89 +0,0 @@
-"""
-Some comparisons of khash.h to Python dict
-"""
-from __future__ import print_function
-
-import numpy as np
-import os
-
-from vbench.api import Benchmark
-from pandas.util.testing import rands
-from pandas.compat import range
-import pandas._tseries as lib
-import pandas._sandbox as sbx
-import time
-
-import psutil
-
-pid = os.getpid()
-proc = psutil.Process(pid)
-
-
-def object_test_data(n):
-    pass
-
-
-def string_test_data(n):
-    return np.array([rands(10) for _ in range(n)], dtype='O')
-
-
-def int_test_data(n):
-    return np.arange(n, dtype='i8')
-
-N = 1000000
-
-#----------------------------------------------------------------------
-# Benchmark 1: map_locations
-
-
-def map_locations_python_object():
-    arr = string_test_data(N)
-    return _timeit(lambda: lib.map_indices_object(arr))
-
-
-def map_locations_khash_object():
-    arr = string_test_data(N)
-
-    def f():
-        table = sbx.PyObjectHashTable(len(arr))
-        table.map_locations(arr)
-    return _timeit(f)
-
-
-def _timeit(f, iterations=10):
-    start = time.time()
-    for _ in range(iterations):
-        foo = f()
-    elapsed = time.time() - start
-    return elapsed
-
-#----------------------------------------------------------------------
-# Benchmark 2: lookup_locations
-
-
-def lookup_python(values):
-    table = lib.map_indices_object(values)
-    return _timeit(lambda: lib.merge_indexer_object(values, table))
-
-
-def lookup_khash(values):
-    table = sbx.PyObjectHashTable(len(values))
-    table.map_locations(values)
-    locs = table.lookup_locations(values)
-    # elapsed = _timeit(lambda: table.lookup_locations2(values))
-    return table
-
-
-def leak(values):
-    for _ in range(100):
-        print(proc.get_memory_info())
-        table = lookup_khash(values)
-        # table.destroy()
-
-arr = string_test_data(N)
-
-#----------------------------------------------------------------------
-# Benchmark 3: unique
-
-#----------------------------------------------------------------------
-# Benchmark 4: factorize
diff --git a/bench/bench_merge.R b/bench/bench_merge.R
deleted file mode 100644
index 3ed4618494857..0000000000000
--- a/bench/bench_merge.R
+++ /dev/null
@@ -1,161 +0,0 @@
-library(plyr)
-library(data.table)
-N <- 10000
-indices = rep(NA, N)
-indices2 = rep(NA, N)
-for (i in 1:N) {
-  indices[i] <- paste(sample(letters, 10), collapse="")
-  indices2[i] <- paste(sample(letters, 10), collapse="")
-}
-left <- data.frame(key=rep(indices[1:8000], 10),
-                   key2=rep(indices2[1:8000], 10),
-                   value=rnorm(80000))
-right <- data.frame(key=indices[2001:10000],
-                    key2=indices2[2001:10000],
-                    value2=rnorm(8000))
-
-right2 <- data.frame(key=rep(right$key, 2),
-                     key2=rep(right$key2, 2),
-                     value2=rnorm(16000))
-
-left.dt <- data.table(left, key=c("key", "key2"))
-right.dt <- data.table(right, key=c("key", "key2"))
-right2.dt <- data.table(right2, key=c("key", "key2"))
-
-# left.dt2 <- data.table(left)
-# right.dt2 <- data.table(right)
-
-## left <- data.frame(key=rep(indices[1:1000], 10),
-##                    key2=rep(indices2[1:1000], 10),
-##                    value=rnorm(100000))
-## right <- data.frame(key=indices[1:1000],
-##                     key2=indices2[1:1000],
-##                     value2=rnorm(10000))
-
-timeit <- function(func, niter=10) {
-  timing = rep(NA, niter)
-  for (i in 1:niter) {
-    gc()
-    timing[i] <- system.time(func())[3]
-  }
-  mean(timing)
-}
-
-left.join <- function(sort=FALSE) {
-  result <- base::merge(left, right, all.x=TRUE, sort=sort)
-}
-
-right.join <- function(sort=FALSE) {
-  result <- base::merge(left, right, all.y=TRUE, sort=sort)
-}
-
-outer.join <- function(sort=FALSE) {
-  result <- base::merge(left, right, all=TRUE, sort=sort)
-}
-
-inner.join <- function(sort=FALSE) {
-  result <- base::merge(left, right, all=FALSE, sort=sort)
-}
-
-left.join.dt <- function(sort=FALSE) {
-  result <- right.dt[left.dt]
-}
-
-right.join.dt <- function(sort=FALSE) {
-  result <- left.dt[right.dt]
-}
-
-outer.join.dt <- function(sort=FALSE) {
-  result <- merge(left.dt, right.dt, all=TRUE, sort=sort)
-}
-
-inner.join.dt <- function(sort=FALSE) {
-  result <- merge(left.dt, right.dt, all=FALSE, sort=sort)
-}
-
-plyr.join <- function(type) {
-  result <- plyr::join(left, right, by=c("key", "key2"),
-                       type=type, match="first")
-}
-
-sort.options <- c(FALSE, TRUE)
-
-# many-to-one
-
-results <- matrix(nrow=4, ncol=3)
-colnames(results) <- c("base::merge", "plyr", "data.table")
-rownames(results) <- c("inner", "outer", "left", "right")
-
-base.functions <- c(inner.join, outer.join, left.join, right.join)
-plyr.functions <- c(function() plyr.join("inner"),
-                    function() plyr.join("full"),
-                    function() plyr.join("left"),
-					function() plyr.join("right"))
-dt.functions <- c(inner.join.dt, outer.join.dt, left.join.dt, right.join.dt)
-for (i in 1:4) {
-  base.func <- base.functions[[i]]
-  plyr.func <- plyr.functions[[i]]
-  dt.func <- dt.functions[[i]]
-  results[i, 1] <- timeit(base.func)
-  results[i, 2] <- timeit(plyr.func)
-  results[i, 3] <- timeit(dt.func)
-}
-
-
-# many-to-many
-
-left.join <- function(sort=FALSE) {
-  result <- base::merge(left, right2, all.x=TRUE, sort=sort)
-}
-
-right.join <- function(sort=FALSE) {
-  result <- base::merge(left, right2, all.y=TRUE, sort=sort)
-}
-
-outer.join <- function(sort=FALSE) {
-  result <- base::merge(left, right2, all=TRUE, sort=sort)
-}
-
-inner.join <- function(sort=FALSE) {
-  result <- base::merge(left, right2, all=FALSE, sort=sort)
-}
-
-left.join.dt <- function(sort=FALSE) {
-  result <- right2.dt[left.dt]
-}
-
-right.join.dt <- function(sort=FALSE) {
-  result <- left.dt[right2.dt]
-}
-
-outer.join.dt <- function(sort=FALSE) {
-  result <- merge(left.dt, right2.dt, all=TRUE, sort=sort)
-}
-
-inner.join.dt <- function(sort=FALSE) {
-  result <- merge(left.dt, right2.dt, all=FALSE, sort=sort)
-}
-
-sort.options <- c(FALSE, TRUE)
-
-# many-to-one
-
-results <- matrix(nrow=4, ncol=3)
-colnames(results) <- c("base::merge", "plyr", "data.table")
-rownames(results) <- c("inner", "outer", "left", "right")
-
-base.functions <- c(inner.join, outer.join, left.join, right.join)
-plyr.functions <- c(function() plyr.join("inner"),
-                    function() plyr.join("full"),
-                    function() plyr.join("left"),
-					function() plyr.join("right"))
-dt.functions <- c(inner.join.dt, outer.join.dt, left.join.dt, right.join.dt)
-for (i in 1:4) {
-  base.func <- base.functions[[i]]
-  plyr.func <- plyr.functions[[i]]
-  dt.func <- dt.functions[[i]]
-  results[i, 1] <- timeit(base.func)
-  results[i, 2] <- timeit(plyr.func)
-  results[i, 3] <- timeit(dt.func)
-}
-
diff --git a/bench/bench_merge.py b/bench/bench_merge.py
deleted file mode 100644
index 330dba7b9af69..0000000000000
--- a/bench/bench_merge.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import random
-import gc
-import time
-from pandas import *
-from pandas.compat import range, lrange, StringIO
-from pandas.util.testing import rands
-
-N = 10000
-ngroups = 10
-
-
-def get_test_data(ngroups=100, n=N):
-    unique_groups = lrange(ngroups)
-    arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object)
-
-    if len(arr) < n:
-        arr = np.asarray(list(arr) + unique_groups[:n - len(arr)],
-                         dtype=object)
-
-    random.shuffle(arr)
-    return arr
-
-# aggregate multiple columns
-# df = DataFrame({'key1' : get_test_data(ngroups=ngroups),
-#                 'key2' : get_test_data(ngroups=ngroups),
-#                 'data1' : np.random.randn(N),
-#                 'data2' : np.random.randn(N)})
-
-# df2 = DataFrame({'key1'  : get_test_data(ngroups=ngroups, n=N//10),
-#                  'key2'  : get_test_data(ngroups=ngroups//2, n=N//10),
-#                  'value' : np.random.randn(N // 10)})
-# result = merge.merge(df, df2, on='key2')
-
-N = 10000
-
-indices = np.array([rands(10) for _ in range(N)], dtype='O')
-indices2 = np.array([rands(10) for _ in range(N)], dtype='O')
-key = np.tile(indices[:8000], 10)
-key2 = np.tile(indices2[:8000], 10)
-
-left = DataFrame({'key': key, 'key2': key2,
-                  'value': np.random.randn(80000)})
-right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:],
-                   'value2': np.random.randn(8000)})
-
-right2 = right.append(right, ignore_index=True)
-
-
-join_methods = ['inner', 'outer', 'left', 'right']
-results = DataFrame(index=join_methods, columns=[False, True])
-niter = 10
-for sort in [False, True]:
-    for join_method in join_methods:
-        f = lambda: merge(left, right, how=join_method, sort=sort)
-        gc.disable()
-        start = time.time()
-        for _ in range(niter):
-            f()
-        elapsed = (time.time() - start) / niter
-        gc.enable()
-        results[sort][join_method] = elapsed
-# results.columns = ['pandas']
-results.columns = ['dont_sort', 'sort']
-
-
-# R results
-# many to one
-r_results = read_table(StringIO("""      base::merge   plyr data.table
-inner      0.2475 0.1183     0.1100
-outer      0.4213 0.1916     0.2090
-left       0.2998 0.1188     0.0572
-right      0.3102 0.0536     0.0376
-"""), sep='\s+')
-
-presults = results[['dont_sort']].rename(columns={'dont_sort': 'pandas'})
-all_results = presults.join(r_results)
-
-all_results = all_results.div(all_results['pandas'], axis=0)
-
-all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr',
-                                 'base::merge']]
-
-sort_results = DataFrame.from_items([('pandas', results['sort']),
-                                     ('R', r_results['base::merge'])])
-sort_results['Ratio'] = sort_results['R'] / sort_results['pandas']
-
-
-nosort_results = DataFrame.from_items([('pandas', results['dont_sort']),
-                                       ('R', r_results['base::merge'])])
-nosort_results['Ratio'] = nosort_results['R'] / nosort_results['pandas']
-
-# many to many
-
-# many to one
-r_results = read_table(StringIO("""base::merge   plyr data.table
-inner      0.4610 0.1276     0.1269
-outer      0.9195 0.1881     0.2725
-left       0.6559 0.1257     0.0678
-right      0.6425 0.0522     0.0428
-"""), sep='\s+')
-
-all_results = presults.join(r_results)
-all_results = all_results.div(all_results['pandas'], axis=0)
-all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr',
-                                 'base::merge']]
diff --git a/bench/bench_merge_sqlite.py b/bench/bench_merge_sqlite.py
deleted file mode 100644
index 3ad4b810119c3..0000000000000
--- a/bench/bench_merge_sqlite.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import numpy as np
-from collections import defaultdict
-import gc
-import time
-from pandas import DataFrame
-from pandas.util.testing import rands
-from pandas.compat import range, zip
-import random
-
-N = 10000
-
-indices = np.array([rands(10) for _ in range(N)], dtype='O')
-indices2 = np.array([rands(10) for _ in range(N)], dtype='O')
-key = np.tile(indices[:8000], 10)
-key2 = np.tile(indices2[:8000], 10)
-
-left = DataFrame({'key': key, 'key2': key2,
-                  'value': np.random.randn(80000)})
-right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:],
-                   'value2': np.random.randn(8000)})
-
-# right2 = right.append(right, ignore_index=True)
-# right = right2
-
-# random.shuffle(key2)
-# indices2 = indices.copy()
-# random.shuffle(indices2)
-
-# Prepare Database
-import sqlite3
-create_sql_indexes = True
-
-conn = sqlite3.connect(':memory:')
-conn.execute(
-    'create table left( key varchar(10), key2 varchar(10), value int);')
-conn.execute(
-    'create table right( key varchar(10), key2 varchar(10), value2 int);')
-conn.executemany('insert into left values (?, ?, ?)',
-                 zip(key, key2, left['value']))
-conn.executemany('insert into right values (?, ?, ?)',
-                 zip(right['key'], right['key2'], right['value2']))
-
-# Create Indices
-if create_sql_indexes:
-    conn.execute('create index left_ix on left(key, key2)')
-    conn.execute('create index right_ix on right(key, key2)')
-
-
-join_methods = ['inner', 'left outer', 'left']  # others not supported
-sql_results = DataFrame(index=join_methods, columns=[False])
-niter = 5
-for sort in [False]:
-    for join_method in join_methods:
-        sql = """CREATE TABLE test as select *
-        from left
-           %s join right
-             on left.key=right.key
-               and left.key2 = right.key2;""" % join_method
-        sql = """select *
-        from left
-           %s join right
-             on left.key=right.key
-               and left.key2 = right.key2;""" % join_method
-
-        if sort:
-            sql = '%s order by key, key2' % sql
-        f = lambda: list(conn.execute(sql))  # list fetches results
-        g = lambda: conn.execute(sql)  # list fetches results
-        gc.disable()
-        start = time.time()
-        # for _ in range(niter):
-        g()
-        elapsed = (time.time() - start) / niter
-        gc.enable()
-
-        cur = conn.execute("DROP TABLE test")
-        conn.commit()
-
-        sql_results[sort][join_method] = elapsed
-        sql_results.columns = ['sqlite3']  # ['dont_sort', 'sort']
-        sql_results.index = ['inner', 'outer', 'left']
-
-        sql = """select *
-        from left
-           inner join right
-             on left.key=right.key
-               and left.key2 = right.key2;"""
diff --git a/bench/bench_pivot.R b/bench/bench_pivot.R
deleted file mode 100644
index 06dc6a105bc43..0000000000000
--- a/bench/bench_pivot.R
+++ /dev/null
@@ -1,27 +0,0 @@
-library(reshape2)
-
-
-n <- 100000
-a.size <- 5
-b.size <- 5
-
-data <- data.frame(a=sample(letters[1:a.size], n, replace=T),
-                   b=sample(letters[1:b.size], n, replace=T),
-                   c=rnorm(n),
-                   d=rnorm(n))
-
-timings <- numeric()
-
-# acast(melt(data, id=c("a", "b")), a ~ b, mean)
-# acast(melt(data, id=c("a", "b")), a + b ~ variable, mean)
-
-for (i in 1:10) {
-  gc()
-  tim <- system.time(acast(melt(data, id=c("a", "b")), a ~ b, mean,
-                           subset=.(variable=="c")))
-  timings[i] = tim[3]
-}
-
-mean(timings)
-
-acast(melt(data, id=c("a", "b")), a ~ b, mean, subset=.(variable="c"))
diff --git a/bench/bench_pivot.py b/bench/bench_pivot.py
deleted file mode 100644
index 007bd0aaebc2f..0000000000000
--- a/bench/bench_pivot.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from pandas import *
-import string
-
-
-n = 100000
-asize = 5
-bsize = 5
-
-letters = np.asarray(list(string.letters), dtype=object)
-
-data = DataFrame(dict(foo=letters[:asize][np.random.randint(0, asize, n)],
-                      bar=letters[:bsize][np.random.randint(0, bsize, n)],
-                      baz=np.random.randn(n),
-                      qux=np.random.randn(n)))
-
-table = pivot_table(data, xby=['foo', 'bar'])
diff --git a/bench/bench_take_indexing.py b/bench/bench_take_indexing.py
deleted file mode 100644
index 5fb584bcfe45f..0000000000000
--- a/bench/bench_take_indexing.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from __future__ import print_function
-import numpy as np
-
-from pandas import *
-import pandas._tseries as lib
-
-from pandas import DataFrame
-import timeit
-from pandas.compat import zip
-
-setup = """
-from pandas import Series
-import pandas._tseries as lib
-import random
-import numpy as np
-
-import random
-n = %d
-k = %d
-arr = np.random.randn(n, k)
-indexer = np.arange(n, dtype=np.int32)
-indexer = indexer[::-1]
-"""
-
-sizes = [100, 1000, 10000, 100000]
-iters = [1000, 1000, 100, 1]
-
-fancy_2d = []
-take_2d = []
-cython_2d = []
-
-n = 1000
-
-
-def _timeit(stmt, size, k=5, iters=1000):
-    timer = timeit.Timer(stmt=stmt, setup=setup % (sz, k))
-    return timer.timeit(n) / n
-
-for sz, its in zip(sizes, iters):
-    print(sz)
-    fancy_2d.append(_timeit('arr[indexer]', sz, iters=its))
-    take_2d.append(_timeit('arr.take(indexer, axis=0)', sz, iters=its))
-    cython_2d.append(_timeit('lib.take_axis0(arr, indexer)', sz, iters=its))
-
-df = DataFrame({'fancy': fancy_2d,
-                'take': take_2d,
-                'cython': cython_2d})
-
-print(df)
-
-from pandas.rpy.common import r
-r('mat <- matrix(rnorm(50000), nrow=10000, ncol=5)')
-r('set.seed(12345')
-r('indexer <- sample(1:10000)')
-r('mat[indexer,]')
diff --git a/bench/bench_unique.py b/bench/bench_unique.py
deleted file mode 100644
index 87bd2f2df586c..0000000000000
--- a/bench/bench_unique.py
+++ /dev/null
@@ -1,278 +0,0 @@
-from __future__ import print_function
-from pandas import *
-from pandas.util.testing import rands
-from pandas.compat import range, zip
-import pandas._tseries as lib
-import numpy as np
-import matplotlib.pyplot as plt
-
-N = 50000
-K = 10000
-
-groups = np.array([rands(10) for _ in range(K)], dtype='O')
-groups2 = np.array([rands(10) for _ in range(K)], dtype='O')
-
-labels = np.tile(groups, N // K)
-labels2 = np.tile(groups2, N // K)
-data = np.random.randn(N)
-
-
-def timeit(f, niter):
-    import gc
-    import time
-    gc.disable()
-    start = time.time()
-    for _ in range(niter):
-        f()
-    elapsed = (time.time() - start) / niter
-    gc.enable()
-    return elapsed
-
-
-def algo1():
-    unique_labels = np.unique(labels)
-    result = np.empty(len(unique_labels))
-    for i, label in enumerate(unique_labels):
-        result[i] = data[labels == label].sum()
-
-
-def algo2():
-    unique_labels = np.unique(labels)
-    indices = lib.groupby_indices(labels)
-    result = np.empty(len(unique_labels))
-
-    for i, label in enumerate(unique_labels):
-        result[i] = data.take(indices[label]).sum()
-
-
-def algo3_nosort():
-    rizer = lib.DictFactorizer()
-    labs, counts = rizer.factorize(labels, sort=False)
-    k = len(rizer.uniques)
-    out = np.empty(k)
-    lib.group_add(out, counts, data, labs)
-
-
-def algo3_sort():
-    rizer = lib.DictFactorizer()
-    labs, counts = rizer.factorize(labels, sort=True)
-    k = len(rizer.uniques)
-    out = np.empty(k)
-    lib.group_add(out, counts, data, labs)
-
-import numpy as np
-import random
-
-
-# dict to hold results
-counts = {}
-
-# a hack to generate random key, value pairs.
-# 5k keys, 100k values
-x = np.tile(np.arange(5000, dtype='O'), 20)
-random.shuffle(x)
-xarr = x
-x = [int(y) for y in x]
-data = np.random.uniform(0, 1, 100000)
-
-
-def f():
-    # groupby sum
-    for k, v in zip(x, data):
-        try:
-            counts[k] += v
-        except KeyError:
-            counts[k] = v
-
-
-def f2():
-    rizer = lib.DictFactorizer()
-    labs, counts = rizer.factorize(xarr, sort=False)
-    k = len(rizer.uniques)
-    out = np.empty(k)
-    lib.group_add(out, counts, data, labs)
-
-
-def algo4():
-    rizer = lib.DictFactorizer()
-    labs1, _ = rizer.factorize(labels, sort=False)
-    k1 = len(rizer.uniques)
-
-    rizer = lib.DictFactorizer()
-    labs2, _ = rizer.factorize(labels2, sort=False)
-    k2 = len(rizer.uniques)
-
-    group_id = labs1 * k2 + labs2
-    max_group = k1 * k2
-
-    if max_group > 1e6:
-        rizer = lib.Int64Factorizer(len(group_id))
-        group_id, _ = rizer.factorize(group_id.astype('i8'), sort=True)
-        max_group = len(rizer.uniques)
-
-    out = np.empty(max_group)
-    counts = np.zeros(max_group, dtype='i4')
-    lib.group_add(out, counts, data, group_id)
-
-# cumtime  percall filename:lineno(function)
-#   0.592    0.592 <string>:1(<module>)
-  # 0.584    0.006 groupby_ex.py:37(algo3_nosort)
-  # 0.535    0.005 {method 'factorize' of DictFactorizer' objects}
-  # 0.047    0.000 {pandas._tseries.group_add}
-  # 0.002    0.000 numeric.py:65(zeros_like)
-  # 0.001    0.000 {method 'fill' of 'numpy.ndarray' objects}
-  # 0.000    0.000 {numpy.core.multiarray.empty_like}
-  # 0.000    0.000 {numpy.core.multiarray.empty}
-
-# UNIQUE timings
-
-# N = 10000000
-# K = 500000
-
-# groups = np.array([rands(10) for _ in range(K)], dtype='O')
-
-# labels = np.tile(groups, N // K)
-data = np.random.randn(N)
-
-data = np.random.randn(N)
-
-Ks = [100, 1000, 5000, 10000, 25000, 50000, 100000]
-
-# Ks = [500000, 1000000, 2500000, 5000000, 10000000]
-
-import psutil
-import os
-import gc
-
-pid = os.getpid()
-proc = psutil.Process(pid)
-
-
-def dict_unique(values, expected_K, sort=False, memory=False):
-    if memory:
-        gc.collect()
-        before_mem = proc.get_memory_info().rss
-
-    rizer = lib.DictFactorizer()
-    result = rizer.unique_int64(values)
-
-    if memory:
-        result = proc.get_memory_info().rss - before_mem
-        return result
-
-    if sort:
-        result.sort()
-    assert(len(result) == expected_K)
-    return result
-
-
-def khash_unique(values, expected_K, size_hint=False, sort=False,
-                 memory=False):
-    if memory:
-        gc.collect()
-        before_mem = proc.get_memory_info().rss
-
-    if size_hint:
-        rizer = lib.Factorizer(len(values))
-    else:
-        rizer = lib.Factorizer(100)
-
-    result = []
-    result = rizer.unique(values)
-
-    if memory:
-        result = proc.get_memory_info().rss - before_mem
-        return result
-
-    if sort:
-        result.sort()
-    assert(len(result) == expected_K)
-
-
-def khash_unique_str(values, expected_K, size_hint=False, sort=False,
-                     memory=False):
-    if memory:
-        gc.collect()
-        before_mem = proc.get_memory_info().rss
-
-    if size_hint:
-        rizer = lib.StringHashTable(len(values))
-    else:
-        rizer = lib.StringHashTable(100)
-
-    result = []
-    result = rizer.unique(values)
-
-    if memory:
-        result = proc.get_memory_info().rss - before_mem
-        return result
-
-    if sort:
-        result.sort()
-    assert(len(result) == expected_K)
-
-
-def khash_unique_int64(values, expected_K, size_hint=False, sort=False):
-    if size_hint:
-        rizer = lib.Int64HashTable(len(values))
-    else:
-        rizer = lib.Int64HashTable(100)
-
-    result = []
-    result = rizer.unique(values)
-
-    if sort:
-        result.sort()
-    assert(len(result) == expected_K)
-
-
-def hash_bench():
-    numpy = []
-    dict_based = []
-    dict_based_sort = []
-    khash_hint = []
-    khash_nohint = []
-    for K in Ks:
-        print(K)
-        # groups = np.array([rands(10) for _ in range(K)])
-        # labels = np.tile(groups, N // K).astype('O')
-
-        groups = np.random.randint(0, long(100000000000), size=K)
-        labels = np.tile(groups, N // K)
-        dict_based.append(timeit(lambda: dict_unique(labels, K), 20))
-        khash_nohint.append(timeit(lambda: khash_unique_int64(labels, K), 20))
-        khash_hint.append(timeit(lambda: khash_unique_int64(labels, K,
-                                                            size_hint=True), 20))
-
-        # memory, hard to get
-        # dict_based.append(np.mean([dict_unique(labels, K, memory=True)
-        #                            for _ in range(10)]))
-        # khash_nohint.append(np.mean([khash_unique(labels, K, memory=True)
-        #                              for _ in range(10)]))
-        # khash_hint.append(np.mean([khash_unique(labels, K, size_hint=True, memory=True)
-        #                            for _ in range(10)]))
-
-        # dict_based_sort.append(timeit(lambda: dict_unique(labels, K,
-        #                                                   sort=True), 10))
-        # numpy.append(timeit(lambda: np.unique(labels), 10))
-
-    # unique_timings = DataFrame({'numpy.unique' : numpy,
-    #                             'dict, no sort' : dict_based,
-    #                             'dict, sort' : dict_based_sort},
-    #                            columns=['dict, no sort',
-    #                                     'dict, sort', 'numpy.unique'],
-    #                            index=Ks)
-
-    unique_timings = DataFrame({'dict': dict_based,
-                                'khash, preallocate': khash_hint,
-                                'khash': khash_nohint},
-                               columns=['khash, preallocate', 'khash', 'dict'],
-                               index=Ks)
-
-    unique_timings.plot(kind='bar', legend=False)
-    plt.legend(loc='best')
-    plt.title('Unique on 100,000 values, int64')
-    plt.xlabel('Number of unique labels')
-    plt.ylabel('Mean execution time')
-
-    plt.show()
diff --git a/bench/bench_with_subset.R b/bench/bench_with_subset.R
deleted file mode 100644
index 69d0f7a9eec63..0000000000000
--- a/bench/bench_with_subset.R
+++ /dev/null
@@ -1,53 +0,0 @@
-library(microbenchmark)
-library(data.table)
-
-
-data.frame.subset.bench <- function (n=1e7, times=30) {
-    df <- data.frame(a=rnorm(n), b=rnorm(n), c=rnorm(n))
-    print(microbenchmark(subset(df, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c),
-                         times=times))
-}
-
-
-# data.table allows something very similar to query with an expression
-# but we have chained comparisons AND we're faster BOO YAH!
-data.table.subset.expression.bench <- function (n=1e7, times=30) {
-    dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n))
-    print(microbenchmark(dt[, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c],
-                         times=times))
-}
-
-
-# compare against subset with data.table for good measure
-data.table.subset.bench <- function (n=1e7, times=30) {
-    dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n))
-    print(microbenchmark(subset(dt, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c),
-                         times=times))
-}
-
-
-data.frame.with.bench <- function (n=1e7, times=30) {
-    df <- data.frame(a=rnorm(n), b=rnorm(n), c=rnorm(n))
-
-    print(microbenchmark(with(df, a + b * (c ^ 2 + b ^ 2 - a) / (a * c) ^ 3),
-                         times=times))
-}
-
-
-data.table.with.bench <- function (n=1e7, times=30) {
-    dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n))
-    print(microbenchmark(with(dt, a + b * (c ^ 2 + b ^ 2 - a) / (a * c) ^ 3),
-                         times=times))
-}
-
-
-bench <- function () {
-    data.frame.subset.bench()
-    data.table.subset.expression.bench()
-    data.table.subset.bench()
-    data.frame.with.bench()
-    data.table.with.bench()
-}
-
-
-bench()
diff --git a/bench/bench_with_subset.py b/bench/bench_with_subset.py
deleted file mode 100644
index 017401df3f7f3..0000000000000
--- a/bench/bench_with_subset.py
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Microbenchmarks for comparison with R's "with" and "subset" functions
-"""
-
-from __future__ import print_function
-import numpy as np
-from numpy import array
-from timeit import repeat as timeit
-from pandas.compat import range, zip
-from pandas import DataFrame
-
-
-setup_common = """from pandas import DataFrame
-from numpy.random import randn
-df = DataFrame(randn(%d, 3), columns=list('abc'))
-%s"""
-
-
-setup_with = "s = 'a + b * (c ** 2 + b ** 2 - a) / (a * c) ** 3'"
-
-
-def bench_with(n, times=10, repeat=3, engine='numexpr'):
-    return np.array(timeit('df.eval(s, engine=%r)' % engine,
-                           setup=setup_common % (n, setup_with),
-                           repeat=repeat, number=times)) / times
-
-
-setup_subset = "s = 'a <= b <= c ** 2 + b ** 2 - a and b > c'"
-
-
-def bench_subset(n, times=10, repeat=3, engine='numexpr'):
-    return np.array(timeit('df.query(s, engine=%r)' % engine,
-                           setup=setup_common % (n, setup_subset),
-                           repeat=repeat, number=times)) / times
-
-
-def bench(mn=1, mx=7, num=100, engines=('python', 'numexpr'), verbose=False):
-    r = np.logspace(mn, mx, num=num).round().astype(int)
-
-    ev = DataFrame(np.empty((num, len(engines))), columns=engines)
-    qu = ev.copy(deep=True)
-
-    ev['size'] = qu['size'] = r
-
-    for engine in engines:
-        for i, n in enumerate(r):
-            if verbose:
-                print('engine: %r, i == %d' % (engine, i))
-            ev.loc[i, engine] = bench_with(n, times=1, repeat=1, engine=engine)
-            qu.loc[i, engine] = bench_subset(n, times=1, repeat=1,
-                                             engine=engine)
-
-    return ev, qu
-
-
-def plot_perf(df, engines, title, filename=None):
-    from matplotlib.pyplot import figure, rc
-
-    try:
-        from mpltools import style
-    except ImportError:
-        pass
-    else:
-        style.use('ggplot')
-
-    rc('text', usetex=True)
-
-    fig = figure(figsize=(4, 3), dpi=100)
-    ax = fig.add_subplot(111)
-
-    for engine in engines:
-        ax.plot(df.size, df[engine], label=engine, lw=2)
-
-    ax.set_xlabel('Number of Rows')
-    ax.set_ylabel('Time (s)')
-    ax.set_title(title)
-    ax.legend(loc='best')
-    ax.tick_params(top=False, right=False)
-
-    fig.tight_layout()
-
-    if filename is not None:
-        fig.savefig(filename)
-
-
-if __name__ == '__main__':
-    import os
-    import pandas as pd
-
-    pandas_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
-    static_path = os.path.join(pandas_dir, 'doc', 'source', '_static')
-
-    join = lambda p: os.path.join(static_path, p)
-
-    fn = join('eval-query-perf-data.h5')
-
-    engines = 'python', 'numexpr'
-
-    if not os.path.exists(fn):
-        ev, qu = bench(verbose=True)
-        ev.to_hdf(fn, 'eval')
-        qu.to_hdf(fn, 'query')
-    else:
-        ev = pd.read_hdf(fn, 'eval')
-        qu = pd.read_hdf(fn, 'query')
-
-    plot_perf(ev, engines, 'DataFrame.eval()', filename=join('eval-perf.png'))
-    plot_perf(qu, engines, 'DataFrame.query()',
-              filename=join('query-perf.png'))
-
-    plot_perf(ev[ev.size <= 50000], engines, 'DataFrame.eval()',
-              filename=join('eval-perf-small.png'))
-    plot_perf(qu[qu.size <= 500000], engines, 'DataFrame.query()',
-              filename=join('query-perf-small.png'))
diff --git a/bench/better_unique.py b/bench/better_unique.py
deleted file mode 100644
index e03a4f433ce66..0000000000000
--- a/bench/better_unique.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from __future__ import print_function
-from pandas import DataFrame
-from pandas.compat import range, zip
-import timeit
-
-setup = """
-from pandas import Series
-import pandas._tseries as _tseries
-from pandas.compat import range
-import random
-import numpy as np
-
-def better_unique(values):
-    uniques = _tseries.fast_unique(values)
-    id_map = _tseries.map_indices_buf(uniques)
-    labels = _tseries.get_unique_labels(values, id_map)
-    return uniques, labels
-
-tot = 100000
-
-def get_test_data(ngroups=100, n=tot):
-    unique_groups = range(ngroups)
-    random.shuffle(unique_groups)
-    arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object)
-
-    if len(arr) < n:
-        arr = np.asarray(list(arr) + unique_groups[:n - len(arr)],
-                         dtype=object)
-
-    return arr
-
-arr = get_test_data(ngroups=%d)
-"""
-
-group_sizes = [10, 100, 1000, 10000,
-               20000, 30000, 40000,
-               50000, 60000, 70000,
-               80000, 90000, 100000]
-
-numbers = [100, 100, 50] + [10] * 10
-
-numpy = []
-wes = []
-
-for sz, n in zip(group_sizes, numbers):
-    # wes_timer =  timeit.Timer(stmt='better_unique(arr)',
-    #                           setup=setup % sz)
-    wes_timer = timeit.Timer(stmt='_tseries.fast_unique(arr)',
-                             setup=setup % sz)
-
-    numpy_timer = timeit.Timer(stmt='np.unique(arr)',
-                               setup=setup % sz)
-
-    print(n)
-    numpy_result = numpy_timer.timeit(number=n) / n
-    wes_result = wes_timer.timeit(number=n) / n
-
-    print('Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result))
-
-    wes.append(wes_result)
-    numpy.append(numpy_result)
-
-result = DataFrame({'wes': wes, 'numpy': numpy}, index=group_sizes)
-
-
-def make_plot(numpy, wes):
-    pass
-
-# def get_test_data(ngroups=100, n=100000):
-#     unique_groups = range(ngroups)
-#     random.shuffle(unique_groups)
-#     arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object)
-
-#     if len(arr) < n:
-#         arr = np.asarray(list(arr) + unique_groups[:n - len(arr)],
-#                          dtype=object)
-
-#     return arr
-
-# arr = get_test_data(ngroups=1000)
diff --git a/bench/duplicated.R b/bench/duplicated.R
deleted file mode 100644
index eb2376df2932a..0000000000000
--- a/bench/duplicated.R
+++ /dev/null
@@ -1,22 +0,0 @@
-N <- 100000
-
-k1 = rep(NA, N)
-k2 = rep(NA, N)
-for (i in 1:N){
-  k1[i] <- paste(sample(letters, 1), collapse="")
-  k2[i] <- paste(sample(letters, 1), collapse="")
-}
-df <- data.frame(a=k1, b=k2, c=rep(1:100, N / 100))
-df2 <- data.frame(a=k1, b=k2)
-
-timings <- numeric()
-timings2 <- numeric()
-for (i in 1:50) {
-  gc()
-  timings[i] = system.time(deduped <- df[!duplicated(df),])[3]
-  gc()
-  timings2[i] = system.time(deduped <- df[!duplicated(df[,c("a", "b")]),])[3]
-}
-
-mean(timings)
-mean(timings2)
diff --git a/bench/io_roundtrip.py b/bench/io_roundtrip.py
deleted file mode 100644
index d87da0ec6321a..0000000000000
--- a/bench/io_roundtrip.py
+++ /dev/null
@@ -1,116 +0,0 @@
-from __future__ import print_function
-import time
-import os
-import numpy as np
-
-import la
-import pandas
-from pandas.compat import range
-from pandas import datetools, DatetimeIndex
-
-
-def timeit(f, iterations):
-    start = time.clock()
-
-    for i in range(iterations):
-        f()
-
-    return time.clock() - start
-
-
-def rountrip_archive(N, K=50, iterations=10):
-    # Create data
-    arr = np.random.randn(N, K)
-    # lar = la.larry(arr)
-    dma = pandas.DataFrame(arr,
-                           DatetimeIndex('1/1/2000', periods=N,
-                                     offset=datetools.Minute()))
-    dma[201] = 'bar'
-
-    # filenames
-    filename_numpy = '/Users/wesm/tmp/numpy.npz'
-    filename_larry = '/Users/wesm/tmp/archive.hdf5'
-    filename_pandas = '/Users/wesm/tmp/pandas_tmp'
-
-    # Delete old files
-    try:
-        os.unlink(filename_numpy)
-    except:
-        pass
-    try:
-        os.unlink(filename_larry)
-    except:
-        pass
-
-    try:
-        os.unlink(filename_pandas)
-    except:
-        pass
-
-    # Time a round trip save and load
-    # numpy_f = lambda: numpy_roundtrip(filename_numpy, arr, arr)
-    # numpy_time = timeit(numpy_f, iterations) / iterations
-
-    # larry_f = lambda: larry_roundtrip(filename_larry, lar, lar)
-    # larry_time = timeit(larry_f, iterations) / iterations
-
-    pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
-    pandas_time = timeit(pandas_f, iterations) / iterations
-    print('pandas (HDF5) %7.4f seconds' % pandas_time)
-
-    pickle_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
-    pickle_time = timeit(pickle_f, iterations) / iterations
-    print('pandas (pickle) %7.4f seconds' % pickle_time)
-
-    # print('Numpy (npz)   %7.4f seconds' % numpy_time)
-    # print('larry (HDF5)  %7.4f seconds' % larry_time)
-
-    # Delete old files
-    try:
-        os.unlink(filename_numpy)
-    except:
-        pass
-    try:
-        os.unlink(filename_larry)
-    except:
-        pass
-
-    try:
-        os.unlink(filename_pandas)
-    except:
-        pass
-
-
-def numpy_roundtrip(filename, arr1, arr2):
-    np.savez(filename, arr1=arr1, arr2=arr2)
-    npz = np.load(filename)
-    arr1 = npz['arr1']
-    arr2 = npz['arr2']
-
-
-def larry_roundtrip(filename, lar1, lar2):
-    io = la.IO(filename)
-    io['lar1'] = lar1
-    io['lar2'] = lar2
-    lar1 = io['lar1']
-    lar2 = io['lar2']
-
-
-def pandas_roundtrip(filename, dma1, dma2):
-    # What's the best way to code this?
-    from pandas.io.pytables import HDFStore
-    store = HDFStore(filename)
-    store['dma1'] = dma1
-    store['dma2'] = dma2
-    dma1 = store['dma1']
-    dma2 = store['dma2']
-
-
-def pandas_roundtrip_pickle(filename, dma1, dma2):
-    dma1.save(filename)
-    dma1 = pandas.DataFrame.load(filename)
-    dma2.save(filename)
-    dma2 = pandas.DataFrame.load(filename)
-
-if __name__ == '__main__':
-    rountrip_archive(10000, K=200)
diff --git a/bench/larry.py b/bench/larry.py
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/bench/serialize.py b/bench/serialize.py
deleted file mode 100644
index b0edd6a5752d2..0000000000000
--- a/bench/serialize.py
+++ /dev/null
@@ -1,89 +0,0 @@
-from __future__ import print_function
-from pandas.compat import range, lrange
-import time
-import os
-import numpy as np
-
-import la
-import pandas
-
-
-def timeit(f, iterations):
-    start = time.clock()
-
-    for i in range(iterations):
-        f()
-
-    return time.clock() - start
-
-
-def roundtrip_archive(N, iterations=10):
-
-    # Create data
-    arr = np.random.randn(N, N)
-    lar = la.larry(arr)
-    dma = pandas.DataFrame(arr, lrange(N), lrange(N))
-
-    # filenames
-    filename_numpy = '/Users/wesm/tmp/numpy.npz'
-    filename_larry = '/Users/wesm/tmp/archive.hdf5'
-    filename_pandas = '/Users/wesm/tmp/pandas_tmp'
-
-    # Delete old files
-    try:
-        os.unlink(filename_numpy)
-    except:
-        pass
-    try:
-        os.unlink(filename_larry)
-    except:
-        pass
-    try:
-        os.unlink(filename_pandas)
-    except:
-        pass
-
-    # Time a round trip save and load
-    numpy_f = lambda: numpy_roundtrip(filename_numpy, arr, arr)
-    numpy_time = timeit(numpy_f, iterations) / iterations
-
-    larry_f = lambda: larry_roundtrip(filename_larry, lar, lar)
-    larry_time = timeit(larry_f, iterations) / iterations
-
-    pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
-    pandas_time = timeit(pandas_f, iterations) / iterations
-
-    print('Numpy (npz)   %7.4f seconds' % numpy_time)
-    print('larry (HDF5)  %7.4f seconds' % larry_time)
-    print('pandas (HDF5) %7.4f seconds' % pandas_time)
-
-
-def numpy_roundtrip(filename, arr1, arr2):
-    np.savez(filename, arr1=arr1, arr2=arr2)
-    npz = np.load(filename)
-    arr1 = npz['arr1']
-    arr2 = npz['arr2']
-
-
-def larry_roundtrip(filename, lar1, lar2):
-    io = la.IO(filename)
-    io['lar1'] = lar1
-    io['lar2'] = lar2
-    lar1 = io['lar1']
-    lar2 = io['lar2']
-
-
-def pandas_roundtrip(filename, dma1, dma2):
-    from pandas.io.pytables import HDFStore
-    store = HDFStore(filename)
-    store['dma1'] = dma1
-    store['dma2'] = dma2
-    dma1 = store['dma1']
-    dma2 = store['dma2']
-
-
-def pandas_roundtrip_pickle(filename, dma1, dma2):
-    dma1.save(filename)
-    dma1 = pandas.DataFrame.load(filename)
-    dma2.save(filename)
-    dma2 = pandas.DataFrame.load(filename)
diff --git a/bench/test.py b/bench/test.py
deleted file mode 100644
index 2339deab313a1..0000000000000
--- a/bench/test.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import numpy as np
-import itertools
-import collections
-import scipy.ndimage as ndi
-from pandas.compat import zip, range
-
-N = 10000
-
-lat = np.random.randint(0, 360, N)
-lon = np.random.randint(0, 360, N)
-data = np.random.randn(N)
-
-
-def groupby1(lat, lon, data):
-    indexer = np.lexsort((lon, lat))
-    lat = lat.take(indexer)
-    lon = lon.take(indexer)
-    sorted_data = data.take(indexer)
-
-    keys = 1000. * lat + lon
-    unique_keys = np.unique(keys)
-    bounds = keys.searchsorted(unique_keys)
-
-    result = group_agg(sorted_data, bounds, lambda x: x.mean())
-
-    decoder = keys.searchsorted(unique_keys)
-
-    return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result))
-
-
-def group_mean(lat, lon, data):
-    indexer = np.lexsort((lon, lat))
-    lat = lat.take(indexer)
-    lon = lon.take(indexer)
-    sorted_data = data.take(indexer)
-
-    keys = 1000 * lat + lon
-    unique_keys = np.unique(keys)
-
-    result = ndi.mean(sorted_data, labels=keys, index=unique_keys)
-    decoder = keys.searchsorted(unique_keys)
-
-    return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result))
-
-
-def group_mean_naive(lat, lon, data):
-    grouped = collections.defaultdict(list)
-    for lt, ln, da in zip(lat, lon, data):
-        grouped[(lt, ln)].append(da)
-
-    averaged = dict((ltln, np.mean(da)) for ltln, da in grouped.items())
-
-    return averaged
-
-
-def group_agg(values, bounds, f):
-    N = len(values)
-    result = np.empty(len(bounds), dtype=float)
-    for i, left_bound in enumerate(bounds):
-        if i == len(bounds) - 1:
-            right_bound = N
-        else:
-            right_bound = bounds[i + 1]
-
-        result[i] = f(values[left_bound: right_bound])
-
-    return result
-
-# for i in range(10):
-#     groupby1(lat, lon, data)
diff --git a/bench/zoo_bench.R b/bench/zoo_bench.R
deleted file mode 100644
index 294d55f51a9ab..0000000000000
--- a/bench/zoo_bench.R
+++ /dev/null
@@ -1,71 +0,0 @@
-library(zoo)
-library(xts)
-library(fts)
-library(tseries)
-library(its)
-library(xtable)
-
-## indices = rep(NA, 100000)
-## for (i in 1:100000)
-##   indices[i] <- paste(sample(letters, 10), collapse="")
-
-
-
-## x <- zoo(rnorm(100000), indices)
-## y <- zoo(rnorm(90000), indices[sample(1:100000, 90000)])
-
-## indices <- as.POSIXct(1:100000)
-
-indices <- as.POSIXct(Sys.Date()) + seq(1, 100000000, 100)
-
-sz <- 500000
-
-## x <- xts(rnorm(sz), sample(indices, sz))
-## y <- xts(rnorm(sz), sample(indices, sz))
-
-zoo.bench <- function(){
-    x <- zoo(rnorm(sz), sample(indices, sz))
-    y <- zoo(rnorm(sz), sample(indices, sz))
-    timeit(function() {x + y})
-}
-
-xts.bench <- function(){
-    x <- xts(rnorm(sz), sample(indices, sz))
-    y <- xts(rnorm(sz), sample(indices, sz))
-    timeit(function() {x + y})
-}
-
-fts.bench <- function(){
-    x <- fts(rnorm(sz), sort(sample(indices, sz)))
-    y <- fts(rnorm(sz), sort(sample(indices, sz))
-    timeit(function() {x + y})
-}
-
-its.bench <- function(){
-    x <- its(rnorm(sz), sort(sample(indices, sz)))
-    y <- its(rnorm(sz), sort(sample(indices, sz)))
-    timeit(function() {x + y})
-}
-
-irts.bench <- function(){
-    x <- irts(sort(sample(indices, sz)), rnorm(sz))
-    y <- irts(sort(sample(indices, sz)), rnorm(sz))
-    timeit(function() {x + y})
-}
-
-timeit <- function(f){
-  timings <- numeric()
-  for (i in 1:10) {
-    gc()
-    timings[i] = system.time(f())[3]
-  }
-  mean(timings)
-}
-
-bench <- function(){
-  results <- c(xts.bench(), fts.bench(), its.bench(), zoo.bench())
-  names <- c("xts", "fts", "its", "zoo")
-  data.frame(results, names)
-}
-
-result <- bench()
diff --git a/bench/zoo_bench.py b/bench/zoo_bench.py
deleted file mode 100644
index 74cb1952a5a2a..0000000000000
--- a/bench/zoo_bench.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from pandas import *
-from pandas.util.testing import rands
-
-n = 1000000
-# indices = Index([rands(10) for _ in xrange(n)])
-
-
-def sample(values, k):
-    sampler = np.random.permutation(len(values))
-    return values.take(sampler[:k])
-sz = 500000
-rng = np.arange(0, 10000000000000, 10000000)
-stamps = np.datetime64(datetime.now()).view('i8') + rng
-idx1 = np.sort(sample(stamps, sz))
-idx2 = np.sort(sample(stamps, sz))
-ts1 = Series(np.random.randn(sz), idx1)
-ts2 = Series(np.random.randn(sz), idx2)
-
-
-# subsample_size = 90000
-
-# x = Series(np.random.randn(100000), indices)
-# y = Series(np.random.randn(subsample_size),
-#            index=sample(indices, subsample_size))
-
-
-# lx = larry(np.random.randn(100000), [list(indices)])
-# ly = larry(np.random.randn(subsample_size), [list(y.index)])
-
-# Benchmark 1: Two 1-million length time series (int64-based index) with
-# randomly chosen timestamps
-
-# Benchmark 2: Join two 5-variate time series DataFrames (outer and inner join)
-
-# df1 = DataFrame(np.random.randn(1000000, 5), idx1, columns=range(5))
-# df2 = DataFrame(np.random.randn(1000000, 5), idx2, columns=range(5, 10))
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 394fa44c30573..cb3063d59beae 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -17,6 +17,11 @@
 import importlib
 from pandas.compat import u, PY3
 
+try:
+    raw_input          # Python 2
+except NameError:
+    raw_input = input  # Python 3
+
 # https://github.com/sphinx-doc/sphinx/pull/2325/files
 # Workaround for sphinx-build recursion limit overflow:
 # pickle.dump(doctree, f, pickle.HIGHEST_PROTOCOL)
diff --git a/doc/sphinxext/ipython_sphinxext/ipython_directive.py b/doc/sphinxext/ipython_sphinxext/ipython_directive.py
index 49fbacba99592..922767a8e2d46 100644
--- a/doc/sphinxext/ipython_sphinxext/ipython_directive.py
+++ b/doc/sphinxext/ipython_sphinxext/ipython_directive.py
@@ -111,7 +111,7 @@
 import sys
 import tempfile
 import ast
-from pandas.compat import zip, range, map, lmap, u, cStringIO as StringIO
+from pandas.compat import zip, range, map, lmap, u, text_type, cStringIO as StringIO
 import warnings
 
 # To keep compatibility with various python versions
@@ -138,10 +138,8 @@
 
 if PY3:
     from io import StringIO
-    text_type = str
 else:
     from StringIO import StringIO
-    text_type = unicode
 
 #-----------------------------------------------------------------------------
 # Globals
diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py
index 099761f38bb44..74ea120bf0b64 100755
--- a/scripts/find_commits_touching_func.py
+++ b/scripts/find_commits_touching_func.py
@@ -4,7 +4,7 @@
 # copryright 2013, y-p @ github
 
 from __future__ import print_function
-from pandas.compat import range, lrange, map
+from pandas.compat import range, lrange, map, string_types, text_type
 
 """Search the git history for all commits touching a named method
 
@@ -94,7 +94,7 @@ def get_hits(defname,files=()):
 
 def get_commit_info(c,fmt,sep='\t'):
     r=sh.git('log', "--format={}".format(fmt), '{}^..{}'.format(c,c),"-n","1",_tty_out=False)
-    return compat.text_type(r).split(sep)
+    return text_type(r).split(sep)
 
 def get_commit_vitals(c,hlen=HASH_LEN):
     h,s,d= get_commit_info(c,'%H\t%s\t%ci',"\t")
@@ -183,11 +183,11 @@ def main():
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 """)
         return
-    if isinstance(args.file_masks,compat.string_types):
+    if isinstance(args.file_masks, string_types):
         args.file_masks = args.file_masks.split(',')
-    if isinstance(args.path_masks,compat.string_types):
+    if isinstance(args.path_masks, string_types):
         args.path_masks = args.path_masks.split(',')
-    if isinstance(args.dir_masks,compat.string_types):
+    if isinstance(args.dir_masks, string_types):
         args.dir_masks = args.dir_masks.split(',')
 
     logger.setLevel(getattr(logging,args.debug_level))
diff --git a/scripts/windows_builder/build_27-32.bat b/scripts/windows_builder/build_27-32.bat
deleted file mode 100644
index 37eb4d436d567..0000000000000
--- a/scripts/windows_builder/build_27-32.bat
+++ /dev/null
@@ -1,25 +0,0 @@
-@echo off
-echo "starting 27-32"
-
-setlocal EnableDelayedExpansion
-set MSSdk=1
-CALL "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x86 /release
-set DISTUTILS_USE_SDK=1
-
-title 27-32 build
-echo "building"
-cd "c:\users\Jeff Reback\documents\github\pandas"
-C:\python27-32\python.exe setup.py build > build.27-32.log 2>&1
-
-title "installing"
-C:\python27-32\python.exe setup.py bdist --formats=wininst > install.27-32.log 2>&1
-
-echo "testing"
-C:\python27-32\scripts\nosetests -A "not slow" build\lib.win32-2.7\pandas > test.27-32.log 2>&1
-
-echo "versions"
-cd build\lib.win32-2.7
-C:\python27-32\python.exe ../../ci/print_versions.py > ../../versions.27-32.log 2>&1
-
-exit
-
diff --git a/scripts/windows_builder/build_27-64.bat b/scripts/windows_builder/build_27-64.bat
deleted file mode 100644
index e76e25d0ef39c..0000000000000
--- a/scripts/windows_builder/build_27-64.bat
+++ /dev/null
@@ -1,25 +0,0 @@
-@echo off
-echo "starting 27-64"
-
-setlocal EnableDelayedExpansion
-set MSSdk=1
-CALL "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x64 /release
-set DISTUTILS_USE_SDK=1
-
-title 27-64 build
-echo "building"
-cd "c:\users\Jeff Reback\documents\github\pandas"
-C:\python27-64\python.exe setup.py build > build.27-64.log 2>&1
-
-echo "installing"
-C:\python27-64\python.exe setup.py bdist --formats=wininst > install.27-64.log 2>&1
-
-echo "testing"
-C:\python27-64\scripts\nosetests -A "not slow" build\lib.win-amd64-2.7\pandas > test.27-64.log 2>&1
-
-echo "versions"
-cd build\lib.win-amd64-2.7
-C:\python27-64\python.exe ../../ci/print_versions.py > ../../versions.27-64.log 2>&1
-
-exit
-
diff --git a/scripts/windows_builder/build_34-32.bat b/scripts/windows_builder/build_34-32.bat
deleted file mode 100644
index 8e060e000bc8f..0000000000000
--- a/scripts/windows_builder/build_34-32.bat
+++ /dev/null
@@ -1,27 +0,0 @@
-@echo off
-echo "starting 34-32"
-
-setlocal EnableDelayedExpansion
-set MSSdk=1
-CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x86 /release
-set DISTUTILS_USE_SDK=1
-
-title 34-32 build
-echo "building"
-cd "c:\users\Jeff Reback\documents\github\pandas"
-C:\python34-32\python.exe setup.py build > build.34-32.log 2>&1
-
-echo "installing"
-C:\python34-32\python.exe setup.py bdist --formats=wininst > install.34-32.log 2>&1
-
-echo "testing"
-C:\python34-32\scripts\nosetests -A "not slow" build\lib.win32-3.4\pandas > test.34-32.log 2>&1
-
-echo "versions"
-cd build\lib.win32-3.4
-C:\python34-32\python.exe ../../ci/print_versions.py > ../../versions.34-32.log 2>&1
-
-exit
-
-
-
diff --git a/scripts/windows_builder/build_34-64.bat b/scripts/windows_builder/build_34-64.bat
deleted file mode 100644
index 3a8512b730346..0000000000000
--- a/scripts/windows_builder/build_34-64.bat
+++ /dev/null
@@ -1,27 +0,0 @@
-@echo off
-echo "starting 34-64"
-
-setlocal EnableDelayedExpansion
-set MSSdk=1
-CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release
-set DISTUTILS_USE_SDK=1
-
-title 34-64 build
-echo "building"
-cd "c:\users\Jeff Reback\documents\github\pandas"
-C:\python34-64\python.exe setup.py build > build.34-64.log 2>&1
-
-echo "installing"
-C:\python34-64\python.exe setup.py bdist --formats=wininst > install.34-64.log 2>&1
-
-echo "testing"
-C:\python34-64\scripts\nosetests -A "not slow" build\lib.win-amd64-3.4\pandas > test.34-64.log 2>&1
-
-echo "versions"
-cd build\lib.win-amd64-3.4
-C:\python34-64\python.exe ../../ci/print_versions.py > ../../versions.34-64.log 2>&1
-
-exit
-
-
-
diff --git a/scripts/windows_builder/check_and_build.bat b/scripts/windows_builder/check_and_build.bat
deleted file mode 100644
index 32be1bde1f7f3..0000000000000
--- a/scripts/windows_builder/check_and_build.bat
+++ /dev/null
@@ -1,2 +0,0 @@
-set PYTHONPATH=c:/python27-64/lib
-c:/python27-64/python.exe c:/Builds/check_and_build.py %1 %2 %3 %4 %4 %6 %7 %8 %9
diff --git a/scripts/windows_builder/check_and_build.py b/scripts/windows_builder/check_and_build.py
deleted file mode 100644
index 2eb32fb4265d9..0000000000000
--- a/scripts/windows_builder/check_and_build.py
+++ /dev/null
@@ -1,194 +0,0 @@
-import datetime
-import git
-import logging
-import os, re, time
-import subprocess
-import argparse
-import pysftp
-
-# parse the args
-parser = argparse.ArgumentParser(description='build, test, and install updated versions of master pandas')
-parser.add_argument('-b', '--build',
-                    help='run just this build',
-                    dest='build')
-parser.add_argument('-u', '--update',
-                    help='get a git update',
-                    dest='update',
-                    action='store_true',
-                    default=False)
-parser.add_argument('-t', '--test',
-                    help='run the tests',
-                    dest='test',
-                    action='store_true',
-                    default=False)
-parser.add_argument('-c', '--compare',
-                    help='show the last tests compare',
-                    dest='compare',
-                    action='store_true',
-                    default=False)
-parser.add_argument('-v', '--version',
-                    help='show the last versions',
-                    dest='version',
-                    action='store_true',
-                    default=False)
-parser.add_argument('-i', '--install',
-                    help='run the install',
-                    dest='install',
-                    action='store_true',
-                    default=False)
-parser.add_argument('--dry',
-                    help='dry run',
-                    dest='dry',
-                    action='store_true',
-                    default=False)
-
-args = parser.parse_args()
-dry_run = args.dry
-
-builds = ['27-32','27-64','34-32','34-64']
-base_dir = "C:\Users\Jeff Reback\Documents\GitHub\pandas"
-remote_host='pandas.pydata.org'
-username='pandas'
-password=############
-
-# drop python from our environment to avoid
-# passing this onto sub-processes
-env = os.environ
-del env['PYTHONPATH']
-
-# the stdout logger
-fmt = '%(asctime)s: %(message)s'
-logger = logging.getLogger('check_and_build')
-logger.setLevel(logging.DEBUG)
-stream_handler = logging.StreamHandler()
-stream_handler.setFormatter(logging.Formatter(fmt))
-logger.addHandler(stream_handler)
-
-def run_all(test=False,compare=False,install=False,version=False,build=None):
-    # run everything
-
-    for b in builds:
-        if build is not None and build != b:
-            continue
-        if test:
-            do_rebuild(b)
-        if compare or test:
-            try:
-                do_compare(b)
-            except (Exception) as e:
-                logger.info("ERROR COMPARE {0} : {1}".format(b,e))
-        if version:
-            try:
-                do_version(b)
-            except (Exception) as e:
-                logger.info("ERROR VERSION {0} : {1}".format(b,e))
-
-    if install:
-        run_install()
-
-def do_rebuild(build):
-    # trigger the rebuild
-
-    cmd = "c:/Builds/build_{0}.bat".format(build)
-    logger.info("rebuild : {0}".format(cmd))
-    p = subprocess.Popen("start /wait /min {0}".format(cmd),env=env,shell=True,close_fds=True)
-    ret = p.wait()
-
-def do_compare(build):
-    # print the test outputs
-
-    f = os.path.join(base_dir,"test.{0}.log".format(build))
-    with open(f,'r') as fh:
-        for l in fh:
-            l = l.rstrip()
-            if l.startswith('ERROR:'):
-                logger.info("{0} : {1}".format(build,l))
-            if l.startswith('Ran') or l.startswith('OK') or l.startswith('FAIL'):
-                logger.info("{0} : {1}".format(build,l))
-
-def do_version(build):
-    # print the version strings
-
-    f = os.path.join(base_dir,"versions.{0}.log".format(build))
-    with open(f,'r') as fh:
-        for l in fh:
-            l = l.rstrip()
-            logger.info("{0} : {1}".format(build,l))
-
-def do_update(is_verbose=True):
-    # update git; return True if the commit has changed
-
-    repo = git.Repo(base_dir)
-    master = repo.heads.master
-    origin = repo.remotes.origin
-    start_commit = master.commit
-
-    if is_verbose:
-        logger.info("current commit   : {0}".format(start_commit))
-
-    try:
-        origin.update()
-    except (Exception) as e:
-        logger.info("update exception : {0}".format(e))
-    try:
-        origin.pull()
-    except (Exception) as e:
-        logger.info("pull exception : {0}".format(e))
-
-    result = start_commit != master.commit
-    if result:
-        if is_verbose:
-            logger.info("commits changed : {0} -> {1}".format(start_commit,master.commit))
-    return result
-
-def run_install():
-    # send the installation binaries
-
-    repo = git.Repo(base_dir)
-    master = repo.heads.master
-    commit = master.commit
-    short_hash = str(commit)[:7]
-
-    logger.info("sending files : {0}".format(commit))
-    d = os.path.join(base_dir,"dist")
-    files = [ f for f in os.listdir(d) if re.search(short_hash,f) ]
-    srv = pysftp.Connection(host=remote_host,username=username,password=password)
-    srv.chdir("www/pandas-build/dev")
-
-    # get current files
-    remote_files = set(srv.listdir(path='.'))
-
-    for f in files:
-        if f not in remote_files:
-            logger.info("sending: {0}".format(f))
-            local = os.path.join(d,f)
-            srv.put(localpath=local)
-
-    srv.close()
-    logger.info("sending files: done")
-
-# just perform the action
-if args.update or args.test or args.compare or args.install or args.version:
-    if args.update:
-        do_update()
-    run_all(test=args.test,compare=args.compare,install=args.install,version=args.version,build=args.build)
-    exit(0)
-
-# file logging
-file_handler = logging.FileHandler("C:\Builds\logs\check_and_build.log")
-file_handler.setFormatter(logging.Formatter(fmt))
-logger.addHandler(file_handler)
-
-logger.info("start")
-
-# main loop
-while(True):
-
-    if do_update():
-        run_all(test=True,install=False)
-
-    time.sleep(60*60)
-
-logger.info("exit")
-file_handler.close()
-
diff --git a/scripts/windows_builder/readme.txt b/scripts/windows_builder/readme.txt
deleted file mode 100644
index 789e2a9ee0c63..0000000000000
--- a/scripts/windows_builder/readme.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-This is a collection of windows batch scripts (and a python script)
-to rebuild the binaries, test, and upload the binaries for public distribution
-upon a commit on github.
-
-Obviously requires that these be setup on windows
-Requires an install of Windows SDK 3.5 and 4.0
-Full python installs for each version with the deps
-
-Currently supporting
-
-27-32,27-64,34-32,34-64
-
-Note that 34 use the 4.0 SDK, while the other suse 3.5 SDK
-
-I installed these scripts in C:\Builds
-
-Installed libaries in C:\Installs

From 0bd871fb9634e8b73efcc1aeabb93961fbc43d53 Mon Sep 17 00:00:00 2001
From: kernc <kerncece@gmail.com>
Date: Mon, 17 Jul 2017 17:11:37 +0200
Subject: [PATCH 47/54] PERF: SparseDataFrame._init_dict uses intermediary
 dict, not DataFrame (#16883)

Closes gh-16773.
---
 asv_bench/benchmarks/sparse.py       | 8 ++++++++
 doc/source/whatsnew/v0.21.0.txt      | 1 +
 pandas/core/sparse/frame.py          | 9 +++------
 pandas/tests/reshape/test_reshape.py | 4 ++++
 pandas/tests/sparse/test_frame.py    | 2 ++
 5 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
index 500149b89b08b..7259e8cdb7d61 100644
--- a/asv_bench/benchmarks/sparse.py
+++ b/asv_bench/benchmarks/sparse.py
@@ -1,3 +1,5 @@
+from itertools import repeat
+
 from .pandas_vb_common import *
 import scipy.sparse
 from pandas import SparseSeries, SparseDataFrame
@@ -27,6 +29,12 @@ class sparse_frame_constructor(object):
     def time_sparse_frame_constructor(self):
         SparseDataFrame(columns=np.arange(100), index=np.arange(1000))
 
+    def time_sparse_from_scipy(self):
+        SparseDataFrame(scipy.sparse.rand(1000, 1000, 0.005))
+
+    def time_sparse_from_dict(self):
+        SparseDataFrame(dict(zip(range(1000), repeat([0]))))
+
 
 class sparse_series_from_coo(object):
     goal_time = 0.2
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 7c52cf6f450b2..935e9d740b91c 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -136,6 +136,7 @@ Removal of prior version deprecations/changes
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
+- Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`)
 
 
 .. _whatsnew_0210.bug_fixes:
diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index 461dd50c5da6e..e157ae16e71f9 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -143,7 +143,7 @@ def _init_dict(self, data, index, columns, dtype=None):
         sp_maker = lambda x: SparseArray(x, kind=self._default_kind,
                                          fill_value=self._default_fill_value,
                                          copy=True, dtype=dtype)
-        sdict = DataFrame()
+        sdict = {}
         for k, v in compat.iteritems(data):
             if isinstance(v, Series):
                 # Force alignment, no copy necessary
@@ -163,11 +163,8 @@ def _init_dict(self, data, index, columns, dtype=None):
 
         # TODO: figure out how to handle this case, all nan's?
         # add in any other columns we want to have (completeness)
-        nan_vec = np.empty(len(index))
-        nan_vec.fill(nan)
-        for c in columns:
-            if c not in sdict:
-                sdict[c] = sp_maker(nan_vec)
+        nan_arr = sp_maker(np.full(len(index), np.nan))
+        sdict.update((c, nan_arr) for c in columns if c not in sdict)
 
         return to_manager(sdict, columns, index)
 
diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py
index d47a95924bd10..632d3b4ad2e7a 100644
--- a/pandas/tests/reshape/test_reshape.py
+++ b/pandas/tests/reshape/test_reshape.py
@@ -643,6 +643,10 @@ def test_dataframe_dummies_preserve_categorical_dtype(self):
 class TestGetDummiesSparse(TestGetDummies):
     sparse = True
 
+    @pytest.mark.xfail(reason='nan in index is problematic (GH 16894)')
+    def test_include_na(self):
+        super(TestGetDummiesSparse, self).test_include_na()
+
 
 class TestMakeAxisDummies(object):
 
diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py
index 654d12b782f37..a5d514644a8f1 100644
--- a/pandas/tests/sparse/test_frame.py
+++ b/pandas/tests/sparse/test_frame.py
@@ -1095,6 +1095,8 @@ def test_as_blocks(self):
         assert list(df_blocks.keys()) == ['float64']
         tm.assert_frame_equal(df_blocks['float64'], df)
 
+    @pytest.mark.xfail(reason='nan column names in _init_dict problematic '
+                              '(GH 16894)')
     def test_nan_columnname(self):
         # GH 8822
         nan_colname = DataFrame(Series(1.0, index=[0]), columns=[nan])

From dc54b6bbfd1da0947f3b66d4919e4b80e3207bce Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 17 Jul 2017 16:18:55 -0700
Subject: [PATCH 48/54] MAINT: Drop line_width and height from options (#16993)

Deprecated since 0.11 and 0.12 respectively.
---
 doc/source/options.rst                 |  2 --
 doc/source/whatsnew/v0.21.0.txt        |  2 ++
 pandas/core/config_init.py             | 16 ----------------
 pandas/io/formats/console.py           |  4 ++--
 pandas/tests/io/formats/test_format.py |  2 +-
 5 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/doc/source/options.rst b/doc/source/options.rst
index f373705a96f48..c585da64efece 100644
--- a/doc/source/options.rst
+++ b/doc/source/options.rst
@@ -304,7 +304,6 @@ display.float_format                None         The callable should accept a fl
                                                  This is used in some places like
                                                  SeriesFormatter.
                                                  See core.format.EngFormatter for an example.
-display.height                      60           Deprecated. Use `display.max_rows` instead.
 display.large_repr                  truncate     For DataFrames exceeding max_rows/max_cols,
                                                  the repr (and HTML repr) can show
                                                  a truncated table (the default from 0.13),
@@ -323,7 +322,6 @@ display.latex.multicolumn_format    'l'          Alignment of multicolumn labels
 display.latex.multirow              False        Combines rows when using a MultiIndex.
                                                  Centered instead of top-aligned,
                                                  separated by clines.
-display.line_width                  80           Deprecated. Use `display.width` instead.
 display.max_columns                 20           max_rows and max_columns are used
                                                  in __repr__() methods to decide if
                                                  to_string() or info() is used to
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 935e9d740b91c..c63d4575bac43 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -124,6 +124,8 @@ Removal of prior version deprecations/changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 - :func:`read_excel()` has dropped the ``has_index_names`` parameter (:issue:`10967`)
+- The ``pd.options.display.height`` configuration has been dropped (:issue:`3663`)
+- The ``pd.options.display.line_width`` configuration has been dropped (:issue:`2881`)
 - The ``pd.options.display.mpl_style`` configuration has been dropped (:issue:`12190`)
 - ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`)
 - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`)
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index ae3001564a62f..06ce811703a8c 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -213,14 +213,6 @@ def use_numexpr_cb(key):
 (currently both are identical)
 """
 
-pc_line_width_deprecation_warning = """\
-line_width has been deprecated, use display.width instead (currently both are
-identical)
-"""
-
-pc_height_deprecation_warning = """\
-height has been deprecated.
-"""
 
 pc_width_doc = """
 : int
@@ -383,14 +375,6 @@ def table_schema_cb(key):
     cf.register_option('html.border', 1, pc_html_border_doc,
                        validator=is_int)
 
-
-cf.deprecate_option('display.line_width',
-                    msg=pc_line_width_deprecation_warning,
-                    rkey='display.width')
-
-cf.deprecate_option('display.height', msg=pc_height_deprecation_warning,
-                    rkey='display.max_rows')
-
 with cf.config_prefix('html'):
     cf.register_option('border', 1, pc_html_border_doc,
                        validator=is_int)
diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py
index ab75e3fa253ce..bdff59939a4de 100644
--- a/pandas/io/formats/console.py
+++ b/pandas/io/formats/console.py
@@ -53,7 +53,7 @@ def get_console_size():
 
     display_width = get_option('display.width')
     # deprecated.
-    display_height = get_option('display.height', silent=True)
+    display_height = get_option('display.max_rows')
 
     # Consider
     # interactive shell terminal, can detect term size
@@ -71,7 +71,7 @@ def get_console_size():
             # match default for width,height in config_init
             from pandas.core.config import get_default_val
             terminal_width = get_default_val('display.width')
-            terminal_height = get_default_val('display.height')
+            terminal_height = get_default_val('display.max_rows')
         else:
             # pure terminal
             terminal_width, terminal_height = get_terminal_size()
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index 679d43ac492ca..e1499565ce4a6 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -302,7 +302,7 @@ def test_repr_non_interactive(self):
         df = DataFrame('hello', lrange(1000), lrange(5))
 
         with option_context('mode.sim_interactive', False, 'display.width', 0,
-                            'display.height', 0, 'display.max_rows', 5000):
+                            'display.max_rows', 5000):
             assert not has_truncated_repr(df)
             assert not has_expanded_repr(df)
 

From 81f8acef11e8d1e2f0ea78a7b57ee04bef1f6038 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 17 Jul 2017 16:29:57 -0700
Subject: [PATCH 49/54] COMPAT: Add back remove_na for seaborn (#16992)

Closes gh-16971.
---
 pandas/core/series.py               | 12 +++++++++++-
 pandas/tests/series/test_missing.py |  6 ++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 4d5b718ce0ae9..219eca4277f32 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -37,7 +37,6 @@
     maybe_convert_platform,
     maybe_cast_to_datetime, maybe_castable)
 from pandas.core.dtypes.missing import isnull, notnull, remove_na_arraylike
-
 from pandas.core.common import (is_bool_indexer,
                                 _default_index,
                                 _asarray_tuplesafe,
@@ -88,6 +87,17 @@
     versionadded_to_excel='\n    .. versionadded:: 0.20.0\n')
 
 
+# see gh-16971
+def remove_na(arr):
+    """
+    DEPRECATED : this function will be removed in a future version.
+    """
+
+    warnings.warn("remove_na is deprecated and is a private "
+                  "function. Do not use.", FutureWarning, stacklevel=2)
+    return remove_na_arraylike(arr)
+
+
 def _coerce_method(converter):
     """ install the scalar coercion methods """
 
diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py
index 8e73c17684a16..b5948e75aa73e 100644
--- a/pandas/tests/series/test_missing.py
+++ b/pandas/tests/series/test_missing.py
@@ -15,6 +15,7 @@
                     MultiIndex, Index, Timestamp, NaT, IntervalIndex)
 from pandas.compat import range
 from pandas._libs.tslib import iNaT
+from pandas.core.series import remove_na
 from pandas.util.testing import assert_series_equal, assert_frame_equal
 import pandas.util.testing as tm
 
@@ -50,6 +51,11 @@ def _simple_ts(start, end, freq='D'):
 
 class TestSeriesMissingData(TestData):
 
+    def test_remove_na_deprecation(self):
+        # see gh-16971
+        with tm.assert_produces_warning(FutureWarning):
+            remove_na(Series([]))
+
     def test_timedelta_fillna(self):
         # GH 3371
         s = Series([Timestamp('20130101'), Timestamp('20130101'), Timestamp(

From 7b9a57fc99fcd63c55b041ea7c76f5c390c12aa0 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 17 Jul 2017 21:31:42 -0400
Subject: [PATCH 50/54] COMPAT: np.full not available in all versions, xref
 #16773 (#17000)

---
 pandas/core/sparse/frame.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index e157ae16e71f9..5fe96d70fc16f 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -163,7 +163,9 @@ def _init_dict(self, data, index, columns, dtype=None):
 
         # TODO: figure out how to handle this case, all nan's?
         # add in any other columns we want to have (completeness)
-        nan_arr = sp_maker(np.full(len(index), np.nan))
+        nan_arr = np.empty(len(index), dtype='float64')
+        nan_arr.fill(np.nan)
+        nan_arr = sp_maker(nan_arr)
         sdict.update((c, nan_arr) for c in columns if c not in sdict)
 
         return to_manager(sdict, columns, index)

From fcb0263762a31724ba6db39bf1564569dda068a0 Mon Sep 17 00:00:00 2001
From: Lucas Kushner <lphkspam@gmail.com>
Date: Tue, 18 Jul 2017 00:01:26 -0500
Subject: [PATCH 51/54] DOC, TST: Clarify whitespace behavior in read_fwf
 documentation (#16950)

Closes gh-16772
---
 doc/source/io.rst                       |  6 ++++-
 pandas/io/parsers.py                    | 13 ++++++-----
 pandas/tests/io/parser/test_read_fwf.py | 29 +++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 9bf84e5419ffa..495d4e9c3a5a3 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -1258,7 +1258,8 @@ Files with Fixed Width Columns
 
 While ``read_csv`` reads delimited data, the :func:`read_fwf` function works
 with data files that have known and fixed column widths. The function parameters
-to ``read_fwf`` are largely the same as `read_csv` with two extra parameters:
+to ``read_fwf`` are largely the same as `read_csv` with two extra parameters, and
+a different usage of the ``delimiter`` parameter:
 
   - ``colspecs``: A list of pairs (tuples) giving the extents of the
     fixed-width fields of each line as half-open intervals (i.e.,  [from, to[ ).
@@ -1267,6 +1268,9 @@ to ``read_fwf`` are largely the same as `read_csv` with two extra parameters:
     behaviour, if not specified, is to infer.
   - ``widths``: A list of field widths which can be used instead of 'colspecs'
     if the intervals are contiguous.
+  - ``delimiter``: Characters to consider as filler characters in the fixed-width file.
+    Can be used to specify the filler character of the fields
+    if it is not spaces (e.g., '~').
 
 .. ipython:: python
    :suppress:
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 343bc7a74fde8..1e7d9d420b35d 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -63,8 +63,6 @@
     file. For file URLs, a host is expected. For instance, a local file could
     be file ://localhost/path/to/table.csv
 %s
-delimiter : str, default ``None``
-    Alternative argument name for sep.
 delim_whitespace : boolean, default False
     Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be
     used as the sep. Equivalent to setting ``sep='\s+'``. If this option
@@ -316,7 +314,9 @@
     be used automatically. In addition, separators longer than 1 character and
     different from ``'\s+'`` will be interpreted as regular expressions and
     will also force the use of the Python parsing engine. Note that regex
-    delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``"""
+    delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``
+delimiter : str, default ``None``
+    Alternative argument name for sep."""
 
 _read_csv_doc = """
 Read CSV (comma-separated) file into DataFrame
@@ -341,15 +341,16 @@
 widths : list of ints. optional
     A list of field widths which can be used instead of 'colspecs' if
     the intervals are contiguous.
+delimiter : str, default ``'\t' + ' '``
+    Characters to consider as filler characters in the fixed-width file.
+    Can be used to specify the filler character of the fields
+    if it is not spaces (e.g., '~').
 """
 
 _read_fwf_doc = """
 Read a table of fixed-width formatted lines into DataFrame
 
 %s
-
-Also, 'delimiter' is used to specify the filler character of the
-fields if it is not spaces (e.g., '~').
 """ % (_parser_params % (_fwf_widths, ''))
 
 
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
index 0bfeb5215f370..ec1d1a2a51cdc 100644
--- a/pandas/tests/io/parser/test_read_fwf.py
+++ b/pandas/tests/io/parser/test_read_fwf.py
@@ -405,3 +405,32 @@ def test_skiprows_inference_empty(self):
 
         with pytest.raises(EmptyDataError):
             read_fwf(StringIO(test), skiprows=3)
+
+    def test_whitespace_preservation(self):
+        # Addresses Issue #16772
+        data_expected = """
+ a ,bbb
+ cc,dd """
+        expected = read_csv(StringIO(data_expected), header=None)
+
+        test_data = """
+ a bbb
+ ccdd """
+        result = read_fwf(StringIO(test_data), widths=[3, 3],
+                          header=None, skiprows=[0], delimiter="\n\t")
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_default_delimiter(self):
+        data_expected = """
+a,bbb
+cc,dd"""
+        expected = read_csv(StringIO(data_expected), header=None)
+
+        test_data = """
+a \tbbb
+cc\tdd """
+        result = read_fwf(StringIO(test_data), widths=[3, 3],
+                          header=None, skiprows=[0])
+
+        tm.assert_frame_equal(result, expected)

From 9e7666dae3b3b10d987ce154a51c78bcee6e0728 Mon Sep 17 00:00:00 2001
From: chris-b1 <cbartak@gmail.com>
Date: Tue, 18 Jul 2017 06:26:44 -0500
Subject: [PATCH 52/54] API: add infer_objects for soft conversions (#16915)

* API: add infer_objects for soft conversions

* doc fixups

* fixups

* doc
---
 doc/source/api.rst                         |  2 +
 doc/source/basics.rst                      | 23 ++++++++-
 doc/source/whatsnew/v0.21.0.txt            | 32 +++++++++++++
 pandas/core/generic.py                     | 56 ++++++++++++++++++++--
 pandas/tests/frame/test_block_internals.py | 26 ++++++++++
 pandas/tests/series/test_dtypes.py         | 18 +++++++
 6 files changed, 153 insertions(+), 4 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index d6053791d6f4b..77d095a965221 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -270,6 +270,7 @@ Conversion
    :toctree: generated/
 
    Series.astype
+   Series.infer_objects
    Series.copy
    Series.isnull
    Series.notnull
@@ -777,6 +778,7 @@ Conversion
 
    DataFrame.astype
    DataFrame.convert_objects
+   DataFrame.infer_objects
    DataFrame.copy
    DataFrame.isnull
    DataFrame.notnull
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index d8b1602fb104d..4211b15203721 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -2024,7 +2024,28 @@ object conversion
 ~~~~~~~~~~~~~~~~~
 
 pandas offers various functions to try to force conversion of types from the ``object`` dtype to other types.
-The following functions are available for one dimensional object arrays or scalars:
+In cases where the data is already of the correct type, but stored in an ``object`` array, the
+:meth:`~DataFrame.infer_objects` and :meth:`~Series.infer_objects` can be used to soft convert
+to the correct type.
+
+  .. ipython:: python
+
+     df = pd.DataFrame([[1, 2],
+                        ['a', 'b'],
+                        [datetime.datetime(2016, 3, 2), datetime.datetime(2016, 3, 2)]])
+     df = df.T
+     df
+     df.dtypes
+
+Because the data transposed the original inference stored all columns as object, which
+``infer_objects`` will correct.
+
+  .. ipython:: python
+
+     df.infer_objects().dtypes
+
+The following functions are available for one dimensional object arrays or scalars to perform
+hard conversion of objects to a specified type:
 
 - :meth:`~pandas.to_numeric` (conversion to numeric dtypes)
 
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index c63d4575bac43..cba3691b25ab1 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -25,6 +25,38 @@ New features
 - Added ``__fspath__`` method to :class:`~pandas.HDFStore`, :class:`~pandas.ExcelFile`,
   and :class:`~pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`)
 
+
+.. _whatsnew_0210.enhancements.infer_objects:
+
+``infer_objects`` type conversion
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The `:meth:`~DataFrame.infer_objects` and :meth:`~Series.infer_objects`
+methods have been added to perform dtype inference on object columns, replacing
+some of the functionality of the deprecated ``convert_objects``
+method. See the documentation :ref:`here <basics.object_conversion>`
+for more details. (:issue:`11221`)
+
+This function only performs soft conversions on object columns, converting Python objects
+to native types, but not any coercive conversions.  For example:
+
+.. ipython:: python
+
+   df = pd.DataFrame({'A': [1, 2, 3],
+                      'B': np.array([1, 2, 3], dtype='object'),
+                      'C': ['1', '2', '3']})
+   df.dtypes
+   df.infer_objects().dtype
+
+Note that column ``'C'`` was not converted - only scalar numeric types
+will be inferred to a new type.  Other types of conversion should be accomplished
+using :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedelta`).
+.. ipython:: python
+
+   df = df.infer_objects()
+   df['C'] = pd.to_numeric(df['C'], errors='coerce')
+   df.dtypes
+
 .. _whatsnew_0210.enhancements.other:
 
 Other Enhancements
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index f12592feaa4c3..c95129bdaa005 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3671,9 +3671,12 @@ def convert_objects(self, convert_dates=True, convert_numeric=False,
         converted : same as input object
         """
         from warnings import warn
-        warn("convert_objects is deprecated.  Use the data-type specific "
-             "converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.",
-             FutureWarning, stacklevel=2)
+        msg = ("convert_objects is deprecated.  To re-infer data dtypes for "
+               "object columns, use {klass}.infer_objects()\nFor all "
+               "other conversions use the data-type specific converters "
+               "pd.to_datetime, pd.to_timedelta and pd.to_numeric."
+               ).format(klass=self.__class__.__name__)
+        warn(msg, FutureWarning, stacklevel=2)
 
         return self._constructor(
             self._data.convert(convert_dates=convert_dates,
@@ -3681,6 +3684,53 @@ def convert_objects(self, convert_dates=True, convert_numeric=False,
                                convert_timedeltas=convert_timedeltas,
                                copy=copy)).__finalize__(self)
 
+    def infer_objects(self):
+        """
+        Attempt to infer better dtypes for object columns.
+
+        Attempts soft conversion of object-dtyped
+        columns, leaving non-object and unconvertible
+        columns unchanged. The inference rules are the
+        same as during normal Series/DataFrame construction.
+
+        .. versionadded:: 0.20.0
+
+        See Also
+        --------
+        pandas.to_datetime : Convert argument to datetime.
+        pandas.to_timedelta : Convert argument to timedelta.
+        pandas.to_numeric : Convert argument to numeric typeR
+
+        Returns
+        -------
+        converted : same type as input object
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]})
+        >>> df = df.iloc[1:]
+        >>> df
+           A
+        1  1
+        2  2
+        3  3
+
+        >>> df.dtypes
+        A    object
+        dtype: object
+
+        >>> df.infer_objects().dtypes
+        A    int64
+        dtype: object
+        """
+        # numeric=False necessary to only soft convert;
+        # python objects will still be converted to
+        # native numpy numeric types
+        return self._constructor(
+            self._data.convert(datetime=True, numeric=False,
+                               timedelta=True, coerce=False,
+                               copy=True)).__finalize__(self)
+
     # ----------------------------------------------------------------------
     # Filling NA's
 
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
index c1a5b437be5d0..f66070fd66813 100644
--- a/pandas/tests/frame/test_block_internals.py
+++ b/pandas/tests/frame/test_block_internals.py
@@ -495,6 +495,32 @@ def test_convert_objects_no_conversion(self):
         mixed2 = mixed1._convert(datetime=True)
         assert_frame_equal(mixed1, mixed2)
 
+    def test_infer_objects(self):
+        # GH 11221
+        df = DataFrame({'a': ['a', 1, 2, 3],
+                        'b': ['b', 2.0, 3.0, 4.1],
+                        'c': ['c', datetime(2016, 1, 1),
+                              datetime(2016, 1, 2),
+                              datetime(2016, 1, 3)],
+                        'd': [1, 2, 3, 'd']},
+                       columns=['a', 'b', 'c', 'd'])
+        df = df.iloc[1:].infer_objects()
+
+        assert df['a'].dtype == 'int64'
+        assert df['b'].dtype == 'float64'
+        assert df['c'].dtype == 'M8[ns]'
+        assert df['d'].dtype == 'object'
+
+        expected = DataFrame({'a': [1, 2, 3],
+                              'b': [2.0, 3.0, 4.1],
+                              'c': [datetime(2016, 1, 1),
+                                    datetime(2016, 1, 2),
+                                    datetime(2016, 1, 3)],
+                              'd': [2, 3, 'd']},
+                             columns=['a', 'b', 'c', 'd'])
+        # reconstruct frame to verify inference is same
+        tm.assert_frame_equal(df.reset_index(drop=True), expected)
+
     def test_stale_cached_series_bug_473(self):
 
         # this is chained, but ok
diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py
index 2ec579842e33f..c214280ee8386 100644
--- a/pandas/tests/series/test_dtypes.py
+++ b/pandas/tests/series/test_dtypes.py
@@ -268,3 +268,21 @@ def test_series_to_categorical(self):
         expected = Series(['a', 'b', 'c'], dtype='category')
 
         tm.assert_series_equal(result, expected)
+
+    def test_infer_objects_series(self):
+        # GH 11221
+        actual = Series(np.array([1, 2, 3], dtype='O')).infer_objects()
+        expected = Series([1, 2, 3])
+        tm.assert_series_equal(actual, expected)
+
+        actual = Series(np.array([1, 2, 3, None], dtype='O')).infer_objects()
+        expected = Series([1., 2., 3., np.nan])
+        tm.assert_series_equal(actual, expected)
+
+        # only soft conversions, uncovertable pass thru unchanged
+        actual = (Series(np.array([1, 2, 3, None, 'a'], dtype='O'))
+                  .infer_objects())
+        expected = Series([1, 2, 3, None, 'a'])
+
+        assert actual.dtype == 'object'
+        tm.assert_series_equal(actual, expected)

From 6a5e56dc9402136e74e8c818a6947fd495bcd3b2 Mon Sep 17 00:00:00 2001
From: Jon Crall <erotemic@gmail.com>
Date: Tue, 18 Jul 2017 11:58:55 -0400
Subject: [PATCH 53/54] BUG: np.inf now causes Index to upcast from int to
 float (#16996)

Closes gh-16957.
---
 doc/source/whatsnew/v0.21.0.txt        |  1 +
 pandas/core/indexes/base.py            |  6 +--
 pandas/tests/indexing/test_indexing.py | 56 ++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index cba3691b25ab1..2259eb7d89534 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -179,6 +179,7 @@ Bug Fixes
 ~~~~~~~~~
 
 - Fixes regression in 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`)
+- Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`)
 
 Conversion
 ^^^^^^^^^^
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index bbbc19b36964d..5d50f961927c7 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -666,7 +666,7 @@ def _try_convert_to_int_index(cls, data, copy, name):
             res = data.astype('u8', copy=False)
             if (res == data).all():
                 return UInt64Index(res, copy=copy, name=name)
-        except (TypeError, ValueError):
+        except (OverflowError, TypeError, ValueError):
             pass
 
         raise ValueError
@@ -1640,7 +1640,7 @@ def __contains__(self, key):
         hash(key)
         try:
             return key in self._engine
-        except (TypeError, ValueError):
+        except (OverflowError, TypeError, ValueError):
             return False
 
     _index_shared_docs['contains'] = """
@@ -3365,7 +3365,7 @@ def _maybe_cast_indexer(self, key):
                 ckey = int(key)
                 if ckey == key:
                     key = ckey
-            except (ValueError, TypeError):
+            except (OverflowError, ValueError, TypeError):
                 pass
         return key
 
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index 9fa677eb624ae..98f5d5eb140df 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -63,6 +63,34 @@ def f():
 
         pytest.raises(ValueError, f)
 
+    def test_inf_upcast(self):
+        # GH 16957
+        # We should be able to use np.inf as a key
+        # np.inf should cause an index to convert to float
+
+        # Test with np.inf in rows
+        df = pd.DataFrame(columns=[0])
+        df.loc[1] = 1
+        df.loc[2] = 2
+        df.loc[np.inf] = 3
+
+        # make sure we can look up the value
+        assert df.loc[np.inf, 0] == 3
+
+        result = df.index
+        expected = pd.Float64Index([1, 2, np.inf])
+        tm.assert_index_equal(result, expected)
+
+        # Test with np.inf in columns
+        df = pd.DataFrame()
+        df.loc[0, 0] = 1
+        df.loc[1, 1] = 2
+        df.loc[0, np.inf] = 3
+
+        result = df.columns
+        expected = pd.Float64Index([0, 1, np.inf])
+        tm.assert_index_equal(result, expected)
+
     def test_setitem_dtype_upcast(self):
 
         # GH3216
@@ -542,6 +570,34 @@ def test_astype_assignment_with_dups(self):
         # result = df.get_dtype_counts().sort_index()
         # expected = Series({'float64': 2, 'object': 1}).sort_index()
 
+    @pytest.mark.parametrize("index,val", [
+        (pd.Index([0, 1, 2]), 2),
+        (pd.Index([0, 1, '2']), '2'),
+        (pd.Index([0, 1, 2, np.inf, 4]), 4),
+        (pd.Index([0, 1, 2, np.nan, 4]), 4),
+        (pd.Index([0, 1, 2, np.inf]), np.inf),
+        (pd.Index([0, 1, 2, np.nan]), np.nan),
+    ])
+    def test_index_contains(self, index, val):
+        assert val in index
+
+    @pytest.mark.parametrize("index,val", [
+        (pd.Index([0, 1, 2]), '2'),
+        (pd.Index([0, 1, '2']), 2),
+        (pd.Index([0, 1, 2, np.inf]), 4),
+        (pd.Index([0, 1, 2, np.nan]), 4),
+        (pd.Index([0, 1, 2, np.inf]), np.nan),
+        (pd.Index([0, 1, 2, np.nan]), np.inf),
+        # Checking if np.inf in Int64Index should not cause an OverflowError
+        # Related to GH 16957
+        (pd.Int64Index([0, 1, 2]), np.inf),
+        (pd.Int64Index([0, 1, 2]), np.nan),
+        (pd.UInt64Index([0, 1, 2]), np.inf),
+        (pd.UInt64Index([0, 1, 2]), np.nan),
+    ])
+    def test_index_not_contains(self, index, val):
+        assert val not in index
+
     def test_index_type_coercion(self):
 
         with catch_warnings(record=True):

From 34210ac4d8c61ec4d695baba24d84bd7a1826af4 Mon Sep 17 00:00:00 2001
From: parchd-1 <parchd@archlinux.info>
Date: Tue, 18 Jul 2017 18:08:03 +0200
Subject: [PATCH 54/54] DOC: Make highlight functions match documentation
 (#16999)

Closes gh-16998.
---
 pandas/io/formats/style.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index b08d3877f3b03..d88a230b42403 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -1054,9 +1054,9 @@ def highlight_max(self, subset=None, color='yellow', axis=0):
         subset: IndexSlice, default None
             a valid slice for ``data`` to limit the style application to
         color: str, default 'yellow'
-        axis: int, str, or None; default None
-            0 or 'index' for columnwise, 1 or 'columns' for rowwise
-            or ``None`` for tablewise (the default)
+        axis: int, str, or None; default 0
+            0 or 'index' for columnwise (default), 1 or 'columns' for rowwise,
+            or ``None`` for tablewise
 
         Returns
         -------
@@ -1076,9 +1076,9 @@ def highlight_min(self, subset=None, color='yellow', axis=0):
         subset: IndexSlice, default None
             a valid slice for ``data`` to limit the style application to
         color: str, default 'yellow'
-        axis: int, str, or None; default None
-            0 or 'index' for columnwise, 1 or 'columns' for rowwise
-            or ``None`` for tablewise (the default)
+        axis: int, str, or None; default 0
+            0 or 'index' for columnwise (default), 1 or 'columns' for rowwise,
+            or ``None`` for tablewise
 
         Returns
         -------