`_.
index_col : str or list of str, optional, default: None
Column(s) to set as index(MultiIndex).
@@ -937,7 +938,7 @@ def _get_column_names_and_types(self, dtype_mapper):
return column_names_and_types
def _create_table_setup(self):
- from sqlalchemy import Table, Column, PrimaryKeyConstraint
+ from sqlalchemy import Column, PrimaryKeyConstraint, Table
column_names_and_types = self._get_column_names_and_types(self._sqlalchemy_type)
@@ -1026,15 +1027,15 @@ def _sqlalchemy_type(self, col):
col_type = lib.infer_dtype(col, skipna=True)
from sqlalchemy.types import (
+ TIMESTAMP,
BigInteger,
- Integer,
- Float,
- Text,
Boolean,
- DateTime,
Date,
+ DateTime,
+ Float,
+ Integer,
+ Text,
Time,
- TIMESTAMP,
)
if col_type == "datetime64" or col_type == "datetime":
@@ -1079,7 +1080,7 @@ def _sqlalchemy_type(self, col):
return Text
def _get_dtype(self, sqltype):
- from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date, TIMESTAMP
+ from sqlalchemy.types import TIMESTAMP, Boolean, Date, DateTime, Float, Integer
if isinstance(sqltype, Float):
return float
@@ -1374,7 +1375,7 @@ def to_sql(
dtype = {col_name: dtype for col_name in frame}
if dtype is not None:
- from sqlalchemy.types import to_instance, TypeEngine
+ from sqlalchemy.types import TypeEngine, to_instance
for col, my_type in dtype.items():
if not isinstance(to_instance(my_type), TypeEngine):
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 7677d8a94d521..cb23b781a7ad2 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -1643,8 +1643,7 @@ def read(
data = self._insert_strls(data)
- cols_ = np.where(self.dtyplist)[0]
-
+ cols_ = np.where([dtyp is not None for dtyp in self.dtyplist])[0]
# Convert columns (if needed) to match input type
ix = data.index
requires_type_conversion = False
@@ -1953,7 +1952,10 @@ def _open_file_binary_write(
"""
if hasattr(fname, "write"):
# See https://github.com/python/mypy/issues/1424 for hasattr challenges
- return fname, False, None # type: ignore
+ # error: Incompatible return value type (got "Tuple[Union[str, Path,
+ # IO[Any]], bool, None]", expected "Tuple[BinaryIO, bool, Union[str,
+ # Mapping[str, str], None]]")
+ return fname, False, None # type: ignore[return-value]
elif isinstance(fname, (str, Path)):
# Extract compression mode as given, if dict
compression_typ, compression_args = get_compression_method(compression)
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 353bc8a8936a5..b490e07e43753 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -1149,8 +1149,8 @@ def _plot(cls, ax, x, y, style=None, column_num=None, stacking_id=None, **kwds):
@classmethod
def _ts_plot(cls, ax, x, data, style=None, **kwds):
from pandas.plotting._matplotlib.timeseries import (
- _maybe_resample,
_decorate_axes,
+ _maybe_resample,
format_dateaxis,
)
diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py
index 8f3571cf13cbc..eef4276f0ed09 100644
--- a/pandas/plotting/_matplotlib/timeseries.py
+++ b/pandas/plotting/_matplotlib/timeseries.py
@@ -24,7 +24,7 @@
from pandas.tseries.frequencies import get_period_alias, is_subperiod, is_superperiod
if TYPE_CHECKING:
- from pandas import Series, Index # noqa:F401
+ from pandas import Index, Series # noqa:F401
# ---------------------------------------------------------------------
@@ -45,7 +45,10 @@ def _maybe_resample(series: "Series", ax, kwargs):
if ax_freq is not None and freq != ax_freq:
if is_superperiod(freq, ax_freq): # upsample input
series = series.copy()
- series.index = series.index.asfreq(ax_freq, how="s") # type: ignore
+ # error: "Index" has no attribute "asfreq"
+ series.index = series.index.asfreq( # type: ignore[attr-defined]
+ ax_freq, how="s"
+ )
freq = ax_freq
elif _is_sup(freq, ax_freq): # one is weekly
how = kwargs.pop("how", "last")
@@ -222,7 +225,8 @@ def _get_index_freq(index: "Index") -> Optional[BaseOffset]:
if freq is None:
freq = getattr(index, "inferred_freq", None)
if freq == "B":
- weekdays = np.unique(index.dayofweek) # type: ignore
+ # error: "Index" has no attribute "dayofweek"
+ weekdays = np.unique(index.dayofweek) # type: ignore[attr-defined]
if (5 in weekdays) or (6 in weekdays):
freq = None
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index ecd20796b6f21..caa348d3a1fb9 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -267,9 +267,10 @@ def test_sparsearray():
def test_np():
- import numpy as np
import warnings
+ import numpy as np
+
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
assert (pd.np.arange(0, 10) == np.arange(0, 10)).all()
diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
index 2155846b271fc..484f83deb0f55 100644
--- a/pandas/tests/arithmetic/test_numeric.py
+++ b/pandas/tests/arithmetic/test_numeric.py
@@ -548,20 +548,6 @@ class TestMultiplicationDivision:
# __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__
# for non-timestamp/timedelta/period dtypes
- @pytest.mark.parametrize(
- "box",
- [
- pytest.param(
- pd.Index,
- marks=pytest.mark.xfail(
- reason="Index.__div__ always raises", raises=TypeError
- ),
- ),
- pd.Series,
- pd.DataFrame,
- ],
- ids=lambda x: x.__name__,
- )
def test_divide_decimal(self, box):
# resolves issue GH#9787
ser = Series([Decimal(10)])
diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py
index f94408d657ae5..64d3d5b6d684d 100644
--- a/pandas/tests/arithmetic/test_timedelta64.py
+++ b/pandas/tests/arithmetic/test_timedelta64.py
@@ -1733,6 +1733,23 @@ def test_tdarr_div_length_mismatch(self, box_with_array):
# ------------------------------------------------------------------
# __floordiv__, __rfloordiv__
+ def test_td64arr_floordiv_td64arr_with_nat(self, box_with_array):
+ # GH#35529
+ box = box_with_array
+
+ left = pd.Series([1000, 222330, 30], dtype="timedelta64[ns]")
+ right = pd.Series([1000, 222330, None], dtype="timedelta64[ns]")
+
+ left = tm.box_expected(left, box)
+ right = tm.box_expected(right, box)
+
+ expected = np.array([1.0, 1.0, np.nan], dtype=np.float64)
+ expected = tm.box_expected(expected, box)
+
+ result = left // right
+
+ tm.assert_equal(result, expected)
+
def test_td64arr_floordiv_tdscalar(self, box_with_array, scalar_td):
# GH#18831
td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py
index d517eaaec68d2..0176755b54dd1 100644
--- a/pandas/tests/arrays/interval/test_interval.py
+++ b/pandas/tests/arrays/interval/test_interval.py
@@ -142,6 +142,7 @@ def test_repr():
@pyarrow_skip
def test_arrow_extension_type():
import pyarrow as pa
+
from pandas.core.arrays._arrow_utils import ArrowIntervalType
p1 = ArrowIntervalType(pa.int64(), "left")
@@ -158,6 +159,7 @@ def test_arrow_extension_type():
@pyarrow_skip
def test_arrow_array():
import pyarrow as pa
+
from pandas.core.arrays._arrow_utils import ArrowIntervalType
intervals = pd.interval_range(1, 5, freq=1).array
@@ -187,6 +189,7 @@ def test_arrow_array():
@pyarrow_skip
def test_arrow_array_missing():
import pyarrow as pa
+
from pandas.core.arrays._arrow_utils import ArrowIntervalType
arr = IntervalArray.from_breaks([0.0, 1.0, 2.0, 3.0])
@@ -221,6 +224,7 @@ def test_arrow_array_missing():
)
def test_arrow_table_roundtrip(breaks):
import pyarrow as pa
+
from pandas.core.arrays._arrow_utils import ArrowIntervalType
arr = IntervalArray.from_breaks(breaks)
diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py
index 8887dd0278afe..0d81e8e733842 100644
--- a/pandas/tests/arrays/test_period.py
+++ b/pandas/tests/arrays/test_period.py
@@ -359,6 +359,7 @@ def test_arrow_extension_type():
)
def test_arrow_array(data, freq):
import pyarrow as pa
+
from pandas.core.arrays._arrow_utils import ArrowPeriodType
periods = period_array(data, freq=freq)
@@ -384,6 +385,7 @@ def test_arrow_array(data, freq):
@pyarrow_skip
def test_arrow_array_missing():
import pyarrow as pa
+
from pandas.core.arrays._arrow_utils import ArrowPeriodType
arr = PeriodArray([1, 2, 3], freq="D")
@@ -399,6 +401,7 @@ def test_arrow_array_missing():
@pyarrow_skip
def test_arrow_table_roundtrip():
import pyarrow as pa
+
from pandas.core.arrays._arrow_utils import ArrowPeriodType
arr = PeriodArray([1, 2, 3], freq="D")
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index ce12718e48d0d..a6c526fcb008a 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -746,3 +746,13 @@ def test_astype_object_preserves_datetime_na(from_type):
result = astype_nansafe(arr, dtype="object")
assert isna(result)[0]
+
+
+def test_validate_allhashable():
+ assert com.validate_all_hashable(1, "a") is None
+
+ with pytest.raises(TypeError, match="All elements must be hashable"):
+ com.validate_all_hashable([])
+
+ with pytest.raises(TypeError, match="list must be a hashable type"):
+ com.validate_all_hashable([], error_name="list")
diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
index 359acf230ce14..c93603398977e 100644
--- a/pandas/tests/extension/base/ops.py
+++ b/pandas/tests/extension/base/ops.py
@@ -114,10 +114,13 @@ def test_error(self, data, all_arithmetic_operators):
with pytest.raises(AttributeError):
getattr(data, op_name)
- def test_direct_arith_with_series_returns_not_implemented(self, data):
- # EAs should return NotImplemented for ops with Series.
+ @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame])
+ def test_direct_arith_with_ndframe_returns_not_implemented(self, data, box):
+ # EAs should return NotImplemented for ops with Series/DataFrame
# Pandas takes care of unboxing the series and calling the EA's op.
other = pd.Series(data)
+ if box is pd.DataFrame:
+ other = other.to_frame()
if hasattr(data, "__add__"):
result = data.__add__(other)
assert result is NotImplemented
@@ -156,10 +159,14 @@ def test_compare_array(self, data, all_compare_operators):
other = pd.Series([data[0]] * len(data))
self._compare_other(s, data, op_name, other)
- def test_direct_arith_with_series_returns_not_implemented(self, data):
- # EAs should return NotImplemented for ops with Series.
+ @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame])
+ def test_direct_arith_with_ndframe_returns_not_implemented(self, data, box):
+ # EAs should return NotImplemented for ops with Series/DataFrame
# Pandas takes care of unboxing the series and calling the EA's op.
other = pd.Series(data)
+ if box is pd.DataFrame:
+ other = other.to_frame()
+
if hasattr(data, "__eq__"):
result = data.__eq__(other)
assert result is NotImplemented
diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py
index b1eb276bfc227..817881e00fa99 100644
--- a/pandas/tests/extension/test_period.py
+++ b/pandas/tests/extension/test_period.py
@@ -126,9 +126,13 @@ def test_add_series_with_extension_array(self, data):
def test_error(self):
pass
- def test_direct_arith_with_series_returns_not_implemented(self, data):
+ @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame])
+ def test_direct_arith_with_ndframe_returns_not_implemented(self, data, box):
# Override to use __sub__ instead of __add__
other = pd.Series(data)
+ if box is pd.DataFrame:
+ other = other.to_frame()
+
result = data.__sub__(other)
assert result is NotImplemented
diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py
index 9ec029a6c4304..8f6902eca816f 100644
--- a/pandas/tests/frame/methods/test_shift.py
+++ b/pandas/tests/frame/methods/test_shift.py
@@ -145,6 +145,33 @@ def test_shift_duplicate_columns(self):
tm.assert_frame_equal(shifted[0], shifted[1])
tm.assert_frame_equal(shifted[0], shifted[2])
+ def test_shift_axis1_multiple_blocks(self):
+ # GH#35488
+ df1 = pd.DataFrame(np.random.randint(1000, size=(5, 3)))
+ df2 = pd.DataFrame(np.random.randint(1000, size=(5, 2)))
+ df3 = pd.concat([df1, df2], axis=1)
+ assert len(df3._mgr.blocks) == 2
+
+ result = df3.shift(2, axis=1)
+
+ expected = df3.take([-1, -1, 0, 1, 2], axis=1)
+ expected.iloc[:, :2] = np.nan
+ expected.columns = df3.columns
+
+ tm.assert_frame_equal(result, expected)
+
+ # Case with periods < 0
+ # rebuild df3 because `take` call above consolidated
+ df3 = pd.concat([df1, df2], axis=1)
+ assert len(df3._mgr.blocks) == 2
+ result = df3.shift(-2, axis=1)
+
+ expected = df3.take([2, 3, 4, -1, -1], axis=1)
+ expected.iloc[:, -2:] = np.nan
+ expected.columns = df3.columns
+
+ tm.assert_frame_equal(result, expected)
+
@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning")
def test_tshift(self, datetime_frame):
# TODO: remove this test when tshift deprecation is enforced
diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py
index 5216c3be116e0..dcc33428d18a5 100644
--- a/pandas/tests/frame/methods/test_sort_index.py
+++ b/pandas/tests/frame/methods/test_sort_index.py
@@ -555,8 +555,8 @@ def test_sort_index_and_reconstruction(self):
),
)
- df.columns.set_levels(
- pd.to_datetime(df.columns.levels[1]), level=1, inplace=True
+ df.columns = df.columns.set_levels(
+ pd.to_datetime(df.columns.levels[1]), level=1
)
assert not df.columns.is_lexsorted()
assert not df.columns.is_monotonic
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 9d6b9f39a0578..52a1e3aae9058 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -287,7 +287,7 @@ def test_stat_op_api(self, float_frame, float_string_frame):
assert_stat_op_api("median", float_frame, float_string_frame)
try:
- from scipy.stats import skew, kurtosis # noqa:F401
+ from scipy.stats import kurtosis, skew # noqa:F401
assert_stat_op_api("skew", float_frame, float_string_frame)
assert_stat_op_api("kurt", float_frame, float_string_frame)
@@ -370,7 +370,7 @@ def kurt(x):
)
try:
- from scipy import skew, kurtosis # noqa:F401
+ from scipy import kurtosis, skew # noqa:F401
assert_stat_op_calc("skew", skewness, float_frame_with_na)
assert_stat_op_calc("kurt", kurt, float_frame_with_na)
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index 2b79fc8cd3406..cc57a3970d18b 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -367,6 +367,13 @@ def test_to_numpy_copy(self):
assert df.to_numpy(copy=False).base is arr
assert df.to_numpy(copy=True).base is not arr
+ def test_to_numpy_mixed_dtype_to_str(self):
+ # https://github.com/pandas-dev/pandas/issues/35455
+ df = pd.DataFrame([[pd.Timestamp("2020-01-01 00:00:00"), 100.0]])
+ result = df.to_numpy(dtype=str)
+ expected = np.array([["2020-01-01 00:00:00", "100.0"]], dtype=str)
+ tm.assert_numpy_array_equal(result, expected)
+
def test_swapaxes(self):
df = DataFrame(np.random.randn(10, 5))
tm.assert_frame_equal(df.T, df.swapaxes(0, 1))
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index a4ed548264d39..d0f774344a33d 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -1618,6 +1618,42 @@ def test_constructor_Series_differently_indexed(self):
tm.assert_index_equal(df2.index, other_index)
tm.assert_frame_equal(df2, exp2)
+ @pytest.mark.parametrize(
+ "name_in1,name_in2,name_in3,name_out",
+ [
+ ("idx", "idx", "idx", "idx"),
+ ("idx", "idx", None, "idx"),
+ ("idx", None, None, "idx"),
+ ("idx1", "idx2", None, None),
+ ("idx1", "idx1", "idx2", None),
+ ("idx1", "idx2", "idx3", None),
+ (None, None, None, None),
+ ],
+ )
+ def test_constructor_index_names(self, name_in1, name_in2, name_in3, name_out):
+ # GH13475
+ indices = [
+ pd.Index(["a", "b", "c"], name=name_in1),
+ pd.Index(["b", "c", "d"], name=name_in2),
+ pd.Index(["c", "d", "e"], name=name_in3),
+ ]
+ series = {
+ c: pd.Series([0, 1, 2], index=i) for i, c in zip(indices, ["x", "y", "z"])
+ }
+ result = pd.DataFrame(series)
+
+ exp_ind = pd.Index(["a", "b", "c", "d", "e"], name=name_out)
+ expected = pd.DataFrame(
+ {
+ "x": [0, 1, 2, np.nan, np.nan],
+ "y": [np.nan, 0, 1, 2, np.nan],
+ "z": [np.nan, np.nan, 0, 1, 2],
+ },
+ index=exp_ind,
+ )
+
+ tm.assert_frame_equal(result, expected)
+
def test_constructor_manager_resize(self, float_frame):
index = list(float_frame.index[:5])
columns = list(float_frame.columns[:3])
@@ -2619,6 +2655,12 @@ class DatetimeSubclass(datetime):
data = pd.DataFrame({"datetime": [DatetimeSubclass(2020, 1, 1, 1, 1)]})
assert data.datetime.dtype == "datetime64[ns]"
+ def test_with_mismatched_index_length_raises(self):
+ # GH#33437
+ dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
+ with pytest.raises(ValueError, match="Shape of passed values"):
+ DataFrame(dti, index=range(4))
+
class TestDataFrameConstructorWithDatetimeTZ:
def test_from_dict(self):
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 264cf40dc6984..e8cd6017a117c 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -486,13 +486,13 @@ def test_agg_timezone_round_trip():
assert ts == grouped.first()["B"].iloc[0]
# GH#27110 applying iloc should return a DataFrame
- assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 0]
+ assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 1]
ts = df["B"].iloc[2]
assert ts == grouped.last()["B"].iloc[0]
# GH#27110 applying iloc should return a DataFrame
- assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 0]
+ assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 1]
def test_sum_uint64_overflow():
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 5a1268bfb03db..ee38722ffb8ce 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1,4 +1,4 @@
-from datetime import datetime
+from datetime import date, datetime
from io import StringIO
import numpy as np
@@ -63,15 +63,8 @@ def test_apply_trivial():
tm.assert_frame_equal(result, expected)
-@pytest.mark.xfail(
- reason="GH#20066; function passed into apply "
- "returns a DataFrame with the same index "
- "as the one to create GroupBy object."
-)
def test_apply_trivial_fail():
# GH 20066
- # trivial apply fails if the constant dataframe has the same index
- # with the one used to create GroupBy object.
df = pd.DataFrame(
{"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]},
columns=["key", "data"],
@@ -1014,3 +1007,82 @@ def test_apply_with_timezones_aware():
result2 = df2.groupby("x", group_keys=False).apply(lambda df: df[["x", "y"]].copy())
tm.assert_frame_equal(result1, result2)
+
+
+def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func):
+ # GH #34656
+ # GH #34271
+ df = DataFrame(
+ {
+ "a": [99, 99, 99, 88, 88, 88],
+ "b": [1, 2, 3, 4, 5, 6],
+ "c": [10, 20, 30, 40, 50, 60],
+ }
+ )
+
+ expected = pd.DataFrame(
+ {"a": [264, 297], "b": [15, 6], "c": [150, 60]},
+ index=pd.Index([88, 99], name="a"),
+ )
+
+ # Check output when no other methods are called before .apply()
+ grp = df.groupby(by="a")
+ result = grp.apply(sum)
+ tm.assert_frame_equal(result, expected)
+
+ # Check output when another method is called before .apply()
+ grp = df.groupby(by="a")
+ args = {"nth": [0], "corrwith": [df]}.get(reduction_func, [])
+ _ = getattr(grp, reduction_func)(*args)
+ result = grp.apply(sum)
+ tm.assert_frame_equal(result, expected)
+
+
+def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp():
+ # GH 29617
+
+ df = pd.DataFrame(
+ {
+ "A": ["a", "a", "a", "b"],
+ "B": [
+ date(2020, 1, 10),
+ date(2020, 1, 10),
+ date(2020, 2, 10),
+ date(2020, 2, 10),
+ ],
+ "C": [1, 2, 3, 4],
+ },
+ index=pd.Index([100, 101, 102, 103], name="idx"),
+ )
+
+ grp = df.groupby(["A", "B"])
+ result = grp.apply(lambda x: x.head(1))
+
+ expected = df.iloc[[0, 2, 3]]
+ expected = expected.reset_index()
+ expected.index = pd.MultiIndex.from_frame(expected[["A", "B", "idx"]])
+ expected = expected.drop(columns="idx")
+
+ tm.assert_frame_equal(result, expected)
+ for val in result.index.levels[1]:
+ assert type(val) is date
+
+
+def test_apply_by_cols_equals_apply_by_rows_transposed():
+ # GH 16646
+ # Operating on the columns, or transposing and operating on the rows
+ # should give the same result. There was previously a bug where the
+ # by_rows operation would work fine, but by_cols would throw a ValueError
+
+ df = pd.DataFrame(
+ np.random.random([6, 4]),
+ columns=pd.MultiIndex.from_product([["A", "B"], [1, 2]]),
+ )
+
+ by_rows = df.T.groupby(axis=0, level=0).apply(
+ lambda x: x.droplevel(axis=0, level=0)
+ )
+ by_cols = df.groupby(axis=1, level=0).apply(lambda x: x.droplevel(axis=1, level=0))
+
+ tm.assert_frame_equal(by_cols, by_rows.T)
+ tm.assert_frame_equal(by_cols, df)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 0d447a70b540d..c74c1529eb537 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -19,7 +19,7 @@
import pandas._testing as tm
-def cartesian_product_for_groupers(result, args, names):
+def cartesian_product_for_groupers(result, args, names, fill_value=np.NaN):
""" Reindex to a cartesian production for the groupers,
preserving the nature (Categorical) of each grouper
"""
@@ -33,7 +33,7 @@ def f(a):
return a
index = MultiIndex.from_product(map(f, args), names=names)
- return result.reindex(index).sort_index()
+ return result.reindex(index, fill_value=fill_value).sort_index()
_results_for_groupbys_with_missing_categories = dict(
@@ -309,7 +309,7 @@ def test_observed(observed):
result = gb.sum()
if not observed:
expected = cartesian_product_for_groupers(
- expected, [cat1, cat2, ["foo", "bar"]], list("ABC")
+ expected, [cat1, cat2, ["foo", "bar"]], list("ABC"), fill_value=0
)
tm.assert_frame_equal(result, expected)
@@ -319,7 +319,9 @@ def test_observed(observed):
expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index)
result = gb.sum()
if not observed:
- expected = cartesian_product_for_groupers(expected, [cat1, cat2], list("AB"))
+ expected = cartesian_product_for_groupers(
+ expected, [cat1, cat2], list("AB"), fill_value=0
+ )
tm.assert_frame_equal(result, expected)
@@ -1189,6 +1191,8 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
).sortlevel()
expected = Series(data=[2, 4, np.nan, 1, np.nan, 3], index=index, name="C")
+ if operation == "agg":
+ expected = expected.fillna(0, downcast="infer")
grouped = df_cat.groupby(["A", "B"], observed=observed)["C"]
result = getattr(grouped, operation)(sum)
tm.assert_series_equal(result, expected)
@@ -1338,15 +1342,6 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans(
)
request.node.add_marker(mark)
- if reduction_func == "sum": # GH 31422
- mark = pytest.mark.xfail(
- reason=(
- "sum should return 0 but currently returns NaN. "
- "This is a known bug. See GH 31422."
- )
- )
- request.node.add_marker(mark)
-
df = pd.DataFrame(
{
"cat_1": pd.Categorical(list("AABB"), categories=list("ABC")),
@@ -1367,8 +1362,11 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans(
val = result.loc[idx]
assert (pd.isna(zero_or_nan) and pd.isna(val)) or (val == zero_or_nan)
- # If we expect unobserved values to be zero, we also expect the dtype to be int
- if zero_or_nan == 0:
+ # If we expect unobserved values to be zero, we also expect the dtype to be int.
+ # Except for .sum(). If the observed categories sum to dtype=float (i.e. their
+ # sums have decimals), then the zeros for the missing categories should also be
+ # floats.
+ if zero_or_nan == 0 and reduction_func != "sum":
assert np.issubdtype(result.dtype, np.integer)
@@ -1410,24 +1408,6 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
if reduction_func == "ngroup":
pytest.skip("ngroup does not return the Categories on the index")
- if reduction_func == "count": # GH 35028
- mark = pytest.mark.xfail(
- reason=(
- "DataFrameGroupBy.count returns np.NaN for missing "
- "categories, when it should return 0. See GH 35028"
- )
- )
- request.node.add_marker(mark)
-
- if reduction_func == "sum": # GH 31422
- mark = pytest.mark.xfail(
- reason=(
- "sum should return 0 but currently returns NaN. "
- "This is a known bug. See GH 31422."
- )
- )
- request.node.add_marker(mark)
-
df = pd.DataFrame(
{
"cat_1": pd.Categorical(list("AABB"), categories=list("ABC")),
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index e693962e57ac3..42945be923fa0 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -940,10 +940,6 @@ def test_frame_describe_multikey(tsframe):
groupedT = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1)
result = groupedT.describe()
expected = tsframe.describe().T
- expected.index = pd.MultiIndex(
- levels=[[0, 1], expected.index],
- codes=[[0, 0, 1, 1], range(len(expected.index))],
- )
tm.assert_frame_equal(result, expected)
@@ -992,6 +988,68 @@ def test_frame_describe_unstacked_format():
tm.assert_frame_equal(result, expected)
+@pytest.mark.filterwarnings(
+ "ignore:"
+ "indexing past lexsort depth may impact performance:"
+ "pandas.errors.PerformanceWarning"
+)
+@pytest.mark.parametrize("as_index", [True, False])
+def test_describe_with_duplicate_output_column_names(as_index):
+ # GH 35314
+ df = pd.DataFrame(
+ {
+ "a": [99, 99, 99, 88, 88, 88],
+ "b": [1, 2, 3, 4, 5, 6],
+ "c": [10, 20, 30, 40, 50, 60],
+ },
+ columns=["a", "b", "b"],
+ )
+
+ expected = (
+ pd.DataFrame.from_records(
+ [
+ ("a", "count", 3.0, 3.0),
+ ("a", "mean", 88.0, 99.0),
+ ("a", "std", 0.0, 0.0),
+ ("a", "min", 88.0, 99.0),
+ ("a", "25%", 88.0, 99.0),
+ ("a", "50%", 88.0, 99.0),
+ ("a", "75%", 88.0, 99.0),
+ ("a", "max", 88.0, 99.0),
+ ("b", "count", 3.0, 3.0),
+ ("b", "mean", 5.0, 2.0),
+ ("b", "std", 1.0, 1.0),
+ ("b", "min", 4.0, 1.0),
+ ("b", "25%", 4.5, 1.5),
+ ("b", "50%", 5.0, 2.0),
+ ("b", "75%", 5.5, 2.5),
+ ("b", "max", 6.0, 3.0),
+ ("b", "count", 3.0, 3.0),
+ ("b", "mean", 5.0, 2.0),
+ ("b", "std", 1.0, 1.0),
+ ("b", "min", 4.0, 1.0),
+ ("b", "25%", 4.5, 1.5),
+ ("b", "50%", 5.0, 2.0),
+ ("b", "75%", 5.5, 2.5),
+ ("b", "max", 6.0, 3.0),
+ ],
+ )
+ .set_index([0, 1])
+ .T
+ )
+ expected.columns.names = [None, None]
+ expected.index = pd.Index([88, 99], name="a")
+
+ if as_index:
+ expected = expected.drop(columns=["a"], level=0)
+ else:
+ expected = expected.reset_index(drop=True)
+
+ result = df.groupby("a", as_index=as_index).describe()
+
+ tm.assert_frame_equal(result, expected)
+
+
def test_groupby_mean_no_overflow():
# Regression test for (#22487)
df = pd.DataFrame(
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index ebce5b0ef0a66..8c51ebf89f5c0 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2055,3 +2055,17 @@ def test_groups_repr_truncates(max_seq_items, expected):
result = df.groupby(np.array(df.a)).groups.__repr__()
assert result == expected
+
+
+def test_group_on_two_row_multiindex_returns_one_tuple_key():
+ # GH 18451
+ df = pd.DataFrame([{"a": 1, "b": 2, "c": 99}, {"a": 1, "b": 2, "c": 88}])
+ df = df.set_index(["a", "b"])
+
+ grp = df.groupby(["a", "b"])
+ result = grp.indices
+ expected = {(1, 2): np.array([0, 1], dtype=np.int64)}
+
+ assert len(result) == 1
+ key = (1, 2)
+ assert (result[key] == expected[key]).all()
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 1a525d306e9f5..adf62c4723526 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -162,6 +162,40 @@ def test_groupby_dropna_series_by(dropna, expected):
tm.assert_series_equal(result, expected)
+@pytest.mark.parametrize(
+ "dropna,df_expected,s_expected",
+ [
+ pytest.param(
+ True,
+ pd.DataFrame({"B": [2, 2, 1]}),
+ pd.Series(data=[2, 2, 1], name="B"),
+ marks=pytest.mark.xfail(raises=ValueError),
+ ),
+ (
+ False,
+ pd.DataFrame({"B": [2, 2, 1, 1]}),
+ pd.Series(data=[2, 2, 1, 1], name="B"),
+ ),
+ ],
+)
+def test_slice_groupby_then_transform(dropna, df_expected, s_expected):
+ # GH35014
+
+ df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]})
+ gb = df.groupby("A", dropna=dropna)
+
+ res = gb.transform(len)
+ tm.assert_frame_equal(res, df_expected)
+
+ gb_slice = gb[["B"]]
+ res = gb_slice.transform(len)
+ tm.assert_frame_equal(res, df_expected)
+
+ gb_slice = gb["B"]
+ res = gb["B"].transform(len)
+ tm.assert_series_equal(res, s_expected)
+
+
@pytest.mark.parametrize(
"dropna, tuples, outputs",
[
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index efcd22f9c0c82..40b4ce46e550b 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -191,13 +191,15 @@ def test_grouper_creation_bug(self):
result = g.sum()
tm.assert_frame_equal(result, expected)
- result = g.apply(lambda x: x.sum())
- tm.assert_frame_equal(result, expected)
-
g = df.groupby(pd.Grouper(key="A", axis=0))
result = g.sum()
tm.assert_frame_equal(result, expected)
+ result = g.apply(lambda x: x.sum())
+ expected["A"] = [0, 2, 4]
+ expected = expected.loc[:, ["A", "B"]]
+ tm.assert_frame_equal(result, expected)
+
# GH14334
# pd.Grouper(key=...) may be passed in a list
df = DataFrame(
diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py
index 7f30a77872bc1..b325edb321ed4 100644
--- a/pandas/tests/indexes/categorical/test_category.py
+++ b/pandas/tests/indexes/categorical/test_category.py
@@ -43,7 +43,14 @@ def test_disallow_addsub_ops(self, func, op_name):
# GH 10039
# set ops (+/-) raise TypeError
idx = pd.Index(pd.Categorical(["a", "b"]))
- msg = f"cannot perform {op_name} with this index type: CategoricalIndex"
+ cat_or_list = "'(Categorical|list)' and '(Categorical|list)'"
+ msg = "|".join(
+ [
+ f"cannot perform {op_name} with this index type: CategoricalIndex",
+ "can only concatenate list",
+ rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}",
+ ]
+ )
with pytest.raises(TypeError, match=msg):
func(idx)
@@ -478,3 +485,9 @@ def test_reindex_base(self):
def test_map_str(self):
# See test_map.py
pass
+
+ def test_format_different_scalar_lengths(self):
+ # GH35439
+ idx = CategoricalIndex(["aaaaaaaaa", "b"])
+ expected = ["aaaaaaaaa", "b"]
+ assert idx.format() == expected
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
index c8b780455f862..98f7c0eadb4bb 100644
--- a/pandas/tests/indexes/common.py
+++ b/pandas/tests/indexes/common.py
@@ -5,6 +5,7 @@
import pytest
from pandas._libs import iNaT
+from pandas.compat.numpy import _is_numpy_dev
from pandas.errors import InvalidIndexError
from pandas.core.dtypes.common import is_datetime64tz_dtype
@@ -145,22 +146,41 @@ def test_numeric_compat(self):
# Check that this doesn't cover MultiIndex case, if/when it does,
# we can remove multi.test_compat.test_numeric_compat
assert not isinstance(idx, MultiIndex)
+ if type(idx) is Index:
+ return
- with pytest.raises(TypeError, match="cannot perform __mul__"):
+ typ = type(idx._data).__name__
+ lmsg = "|".join(
+ [
+ rf"unsupported operand type\(s\) for \*: '{typ}' and 'int'",
+ "cannot perform (__mul__|__truediv__|__floordiv__) with "
+ f"this index type: {typ}",
+ ]
+ )
+ with pytest.raises(TypeError, match=lmsg):
idx * 1
- with pytest.raises(TypeError, match="cannot perform __rmul__"):
+ rmsg = "|".join(
+ [
+ rf"unsupported operand type\(s\) for \*: 'int' and '{typ}'",
+ "cannot perform (__rmul__|__rtruediv__|__rfloordiv__) with "
+ f"this index type: {typ}",
+ ]
+ )
+ with pytest.raises(TypeError, match=rmsg):
1 * idx
- div_err = "cannot perform __truediv__"
+ div_err = lmsg.replace("*", "/")
with pytest.raises(TypeError, match=div_err):
idx / 1
-
- div_err = div_err.replace(" __", " __r")
+ div_err = rmsg.replace("*", "/")
with pytest.raises(TypeError, match=div_err):
1 / idx
- with pytest.raises(TypeError, match="cannot perform __floordiv__"):
+
+ floordiv_err = lmsg.replace("*", "//")
+ with pytest.raises(TypeError, match=floordiv_err):
idx // 1
- with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
+ floordiv_err = rmsg.replace("*", "//")
+ with pytest.raises(TypeError, match=floordiv_err):
1 // idx
def test_logical_compat(self):
@@ -250,6 +270,20 @@ def test_copy_name(self, index):
s3 = s1 * s2
assert s3.index.name == "mario"
+ def test_name2(self, index):
+ # gh-35592
+ if isinstance(index, MultiIndex):
+ return
+
+ assert index.copy(name="mario").name == "mario"
+
+ with pytest.raises(ValueError, match="Length of new names must be 1, got 2"):
+ index.copy(name=["mario", "luigi"])
+
+ msg = f"{type(index).__name__}.name must be a hashable type"
+ with pytest.raises(TypeError, match=msg):
+ index.copy(name=[["mario"]])
+
def test_ensure_copied_data(self, index):
# Check the "copy" argument of each Index.__new__ is honoured
# GH12309
@@ -417,7 +451,7 @@ def test_set_ops_error_cases(self, case, method, index):
with pytest.raises(TypeError, match=msg):
getattr(index, method)(case)
- def test_intersection_base(self, index):
+ def test_intersection_base(self, index, request):
if isinstance(index, CategoricalIndex):
return
@@ -434,6 +468,15 @@ def test_intersection_base(self, index):
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
+ # https://github.com/pandas-dev/pandas/issues/35481
+ if (
+ _is_numpy_dev
+ and isinstance(case, Series)
+ and isinstance(index, UInt64Index)
+ ):
+ mark = pytest.mark.xfail(reason="gh-35481")
+ request.node.add_marker(mark)
+
result = first.intersection(case)
assert tm.equalContents(result, second)
@@ -632,6 +675,12 @@ def test_equals_op(self):
tm.assert_numpy_array_equal(index_a == item, expected3)
tm.assert_series_equal(series_a == item, Series(expected3))
+ def test_format(self):
+ # GH35439
+ idx = self.create_index()
+ expected = [str(x) for x in idx]
+ assert idx.format() == expected
+
def test_hasnans_isnans(self, index):
# GH 11343, added tests for hasnans / isnans
if isinstance(index, MultiIndex):
diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
index c150e7901c86a..9a855a1624520 100644
--- a/pandas/tests/indexes/datetimes/test_constructors.py
+++ b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -787,6 +787,65 @@ def test_construction_with_nat_and_tzlocal(self):
expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT])
tm.assert_index_equal(result, expected)
+ def test_constructor_with_ambiguous_keyword_arg(self):
+ # GH 35297
+
+ expected = DatetimeIndex(
+ ["2020-11-01 01:00:00", "2020-11-02 01:00:00"],
+ dtype="datetime64[ns, America/New_York]",
+ freq="D",
+ ambiguous=False,
+ )
+
+ # ambiguous keyword in start
+ timezone = "America/New_York"
+ start = pd.Timestamp(year=2020, month=11, day=1, hour=1).tz_localize(
+ timezone, ambiguous=False
+ )
+ result = pd.date_range(start=start, periods=2, ambiguous=False)
+ tm.assert_index_equal(result, expected)
+
+ # ambiguous keyword in end
+ timezone = "America/New_York"
+ end = pd.Timestamp(year=2020, month=11, day=2, hour=1).tz_localize(
+ timezone, ambiguous=False
+ )
+ result = pd.date_range(end=end, periods=2, ambiguous=False)
+ tm.assert_index_equal(result, expected)
+
+ def test_constructor_with_nonexistent_keyword_arg(self):
+ # GH 35297
+
+ timezone = "Europe/Warsaw"
+
+ # nonexistent keyword in start
+ start = pd.Timestamp("2015-03-29 02:30:00").tz_localize(
+ timezone, nonexistent="shift_forward"
+ )
+ result = pd.date_range(start=start, periods=2, freq="H")
+ expected = DatetimeIndex(
+ [
+ pd.Timestamp("2015-03-29 03:00:00+02:00", tz=timezone),
+ pd.Timestamp("2015-03-29 04:00:00+02:00", tz=timezone),
+ ]
+ )
+
+ tm.assert_index_equal(result, expected)
+
+ # nonexistent keyword in end
+ end = pd.Timestamp("2015-03-29 02:30:00").tz_localize(
+ timezone, nonexistent="shift_forward"
+ )
+ result = pd.date_range(end=end, periods=2, freq="H")
+ expected = DatetimeIndex(
+ [
+ pd.Timestamp("2015-03-29 01:00:00+01:00", tz=timezone),
+ pd.Timestamp("2015-03-29 03:00:00+02:00", tz=timezone),
+ ]
+ )
+
+ tm.assert_index_equal(result, expected)
+
def test_constructor_no_precision_raises(self):
# GH-24753, GH-24739
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index ec4162f87010f..7bb1d98086a91 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -59,6 +59,7 @@ def test_reindex_with_same_tz(self):
def test_time_loc(self): # GH8667
from datetime import time
+
from pandas._libs.index import _SIZE_CUTOFF
ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64)
diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py
index 7345ae3032463..a5abf2946feda 100644
--- a/pandas/tests/indexes/datetimes/test_datetimelike.py
+++ b/pandas/tests/indexes/datetimes/test_datetimelike.py
@@ -20,6 +20,12 @@ def index(self, request):
def create_index(self) -> DatetimeIndex:
return date_range("20130101", periods=5)
+ def test_format(self):
+ # GH35439
+ idx = self.create_index()
+ expected = [f"{x:%Y-%m-%d}" for x in idx]
+ assert idx.format() == expected
+
def test_shift(self):
pass # handled in test_ops
diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py
index d1f66af4a8e83..b2500efef9e03 100644
--- a/pandas/tests/indexes/multi/test_compat.py
+++ b/pandas/tests/indexes/multi/test_compat.py
@@ -84,7 +84,8 @@ def test_inplace_mutation_resets_values():
tm.assert_almost_equal(mi1.values, vals)
# Inplace should kill _tuples
- mi1.set_levels(levels2, inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ mi1.set_levels(levels2, inplace=True)
tm.assert_almost_equal(mi1.values, vals2)
# Make sure label setting works too
@@ -103,7 +104,8 @@ def test_inplace_mutation_resets_values():
tm.assert_almost_equal(exp_values, new_values)
# ...and again setting inplace should kill _tuples, etc
- mi2.set_codes(codes2, inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ mi2.set_codes(codes2, inplace=True)
tm.assert_almost_equal(mi2.values, new_values)
diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py
index e48731b9c8099..9add4b478da47 100644
--- a/pandas/tests/indexes/multi/test_duplicates.py
+++ b/pandas/tests/indexes/multi/test_duplicates.py
@@ -91,7 +91,8 @@ def test_duplicate_multiindex_codes():
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
msg = r"Level values must be unique: \[[AB', ]+\] on level 0"
with pytest.raises(ValueError, match=msg):
- mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]], inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]], inplace=True)
@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]])
diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py
index 063ede028add7..b48f09457b96c 100644
--- a/pandas/tests/indexes/multi/test_equivalence.py
+++ b/pandas/tests/indexes/multi/test_equivalence.py
@@ -192,10 +192,12 @@ def test_is_():
mi4 = mi3.view()
# GH 17464 - Remove duplicate MultiIndex levels
- mi4.set_levels([list(range(10)), list(range(10))], inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ mi4.set_levels([list(range(10)), list(range(10))], inplace=True)
assert not mi4.is_(mi3)
mi5 = mi.view()
- mi5.set_levels(mi5.levels, inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ mi5.set_levels(mi5.levels, inplace=True)
assert not mi5.is_(mi)
diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py
index 8a3deca0236e4..b9132f429905d 100644
--- a/pandas/tests/indexes/multi/test_get_set.py
+++ b/pandas/tests/indexes/multi/test_get_set.py
@@ -93,7 +93,8 @@ def test_set_levels(idx):
# level changing [w/ mutation]
ind2 = idx.copy()
- inplace_return = ind2.set_levels(new_levels, inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ inplace_return = ind2.set_levels(new_levels, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, new_levels)
@@ -113,20 +114,23 @@ def test_set_levels(idx):
# level changing specific level [w/ mutation]
ind2 = idx.copy()
- inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, [new_levels[0], levels[1]])
assert_matching(idx.levels, levels)
ind2 = idx.copy()
- inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, [levels[0], new_levels[1]])
assert_matching(idx.levels, levels)
# level changing multiple levels [w/ mutation]
ind2 = idx.copy()
- inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
@@ -136,19 +140,23 @@ def test_set_levels(idx):
original_index = idx.copy()
for inplace in [True, False]:
with pytest.raises(ValueError, match="^On"):
- idx.set_levels(["c"], level=0, inplace=inplace)
+ with tm.assert_produces_warning(FutureWarning):
+ idx.set_levels(["c"], level=0, inplace=inplace)
assert_matching(idx.levels, original_index.levels, check_dtype=True)
with pytest.raises(ValueError, match="^On"):
- idx.set_codes([0, 1, 2, 3, 4, 5], level=0, inplace=inplace)
+ with tm.assert_produces_warning(FutureWarning):
+ idx.set_codes([0, 1, 2, 3, 4, 5], level=0, inplace=inplace)
assert_matching(idx.codes, original_index.codes, check_dtype=True)
with pytest.raises(TypeError, match="^Levels"):
- idx.set_levels("c", level=0, inplace=inplace)
+ with tm.assert_produces_warning(FutureWarning):
+ idx.set_levels("c", level=0, inplace=inplace)
assert_matching(idx.levels, original_index.levels, check_dtype=True)
with pytest.raises(TypeError, match="^Codes"):
- idx.set_codes(1, level=0, inplace=inplace)
+ with tm.assert_produces_warning(FutureWarning):
+ idx.set_codes(1, level=0, inplace=inplace)
assert_matching(idx.codes, original_index.codes, check_dtype=True)
@@ -168,7 +176,8 @@ def test_set_codes(idx):
# changing label w/ mutation
ind2 = idx.copy()
- inplace_return = ind2.set_codes(new_codes, inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ inplace_return = ind2.set_codes(new_codes, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, new_codes)
@@ -188,20 +197,23 @@ def test_set_codes(idx):
# label changing specific level w/ mutation
ind2 = idx.copy()
- inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, [new_codes[0], codes[1]])
assert_matching(idx.codes, codes)
ind2 = idx.copy()
- inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, [codes[0], new_codes[1]])
assert_matching(idx.codes, codes)
# codes changing multiple levels [w/ mutation]
ind2 = idx.copy()
- inplace_return = ind2.set_codes(new_codes, level=[0, 1], inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ inplace_return = ind2.set_codes(new_codes, level=[0, 1], inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
@@ -217,7 +229,8 @@ def test_set_codes(idx):
# [w/ mutation]
result = ind.copy()
- result.set_codes(codes=new_codes, level=1, inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ result.set_codes(codes=new_codes, level=1, inplace=True)
assert result.equals(expected)
@@ -329,3 +342,19 @@ def test_set_levels_with_iterable():
[expected_sizes, colors], names=["size", "color"]
)
tm.assert_index_equal(result, expected)
+
+
+@pytest.mark.parametrize("inplace", [True, False])
+def test_set_codes_inplace_deprecated(idx, inplace):
+ new_codes = idx.codes[1][::-1]
+
+ with tm.assert_produces_warning(FutureWarning):
+ idx.set_codes(codes=new_codes, level=1, inplace=inplace)
+
+
+@pytest.mark.parametrize("inplace", [True, False])
+def test_set_levels_inplace_deprecated(idx, inplace):
+ new_level = idx.levels[1].copy()
+
+ with tm.assert_produces_warning(FutureWarning):
+ idx.set_levels(levels=new_level, level=1, inplace=inplace)
diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py
index fd150bb4d57a2..c776a33717ccd 100644
--- a/pandas/tests/indexes/multi/test_integrity.py
+++ b/pandas/tests/indexes/multi/test_integrity.py
@@ -220,7 +220,8 @@ def test_metadata_immutable(idx):
def test_level_setting_resets_attributes():
ind = pd.MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
assert ind.is_monotonic
- ind.set_levels([["A", "B"], [1, 3, 2]], inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ ind.set_levels([["A", "B"], [1, 3, 2]], inplace=True)
# if this fails, probably didn't reset the cache correctly.
assert not ind.is_monotonic
diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py
index 479b5ef0211a0..f38da7ad2ae1c 100644
--- a/pandas/tests/indexes/multi/test_names.py
+++ b/pandas/tests/indexes/multi/test_names.py
@@ -75,6 +75,13 @@ def test_copy_names():
assert multi_idx.names == ["MyName1", "MyName2"]
assert multi_idx3.names == ["NewName1", "NewName2"]
+ # gh-35592
+ with pytest.raises(ValueError, match="Length of new names must be 2, got 1"):
+ multi_idx.copy(names=["mario"])
+
+ with pytest.raises(TypeError, match="MultiIndex.name must be a hashable type"):
+ multi_idx.copy(names=[["mario"], ["luigi"]])
+
def test_names(idx, index_names):
diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py
index 5b6f9cb358b7d..ef4bb9a0869b0 100644
--- a/pandas/tests/indexes/ranges/test_range.py
+++ b/pandas/tests/indexes/ranges/test_range.py
@@ -137,53 +137,58 @@ def test_dtype(self):
index = self.create_index()
assert index.dtype == np.int64
- def test_cached_data(self):
- # GH 26565, GH26617
- # Calling RangeIndex._data caches an int64 array of the same length at
- # self._cached_data. This test checks whether _cached_data has been set
+ def test_cache(self):
+ # GH 26565, GH26617, GH35432
+ # This test checks whether _cache has been set.
+ # Calling RangeIndex._cache["_data"] creates an int64 array of the same length
+ # as the RangeIndex and stores it in _cache.
idx = RangeIndex(0, 100, 10)
- assert idx._cached_data is None
+ assert idx._cache == {}
repr(idx)
- assert idx._cached_data is None
+ assert idx._cache == {}
str(idx)
- assert idx._cached_data is None
+ assert idx._cache == {}
idx.get_loc(20)
- assert idx._cached_data is None
+ assert idx._cache == {}
- 90 in idx
- assert idx._cached_data is None
+ 90 in idx # True
+ assert idx._cache == {}
- 91 in idx
- assert idx._cached_data is None
+ 91 in idx # False
+ assert idx._cache == {}
idx.all()
- assert idx._cached_data is None
+ assert idx._cache == {}
idx.any()
- assert idx._cached_data is None
+ assert idx._cache == {}
df = pd.DataFrame({"a": range(10)}, index=idx)
df.loc[50]
- assert idx._cached_data is None
+ assert idx._cache == {}
with pytest.raises(KeyError, match="51"):
df.loc[51]
- assert idx._cached_data is None
+ assert idx._cache == {}
df.loc[10:50]
- assert idx._cached_data is None
+ assert idx._cache == {}
df.iloc[5:10]
- assert idx._cached_data is None
+ assert idx._cache == {}
- # actually calling idx._data
+ # idx._cache should contain a _data entry after call to idx._data
+ idx._data
assert isinstance(idx._data, np.ndarray)
- assert isinstance(idx._cached_data, np.ndarray)
+ assert idx._data is idx._data # check cached value is reused
+ assert len(idx._cache) == 4
+ expected = np.arange(0, 100, 10, dtype="int64")
+ tm.assert_numpy_array_equal(idx._cache["_data"], expected)
def test_is_monotonic(self):
index = RangeIndex(0, 20, 2)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index eaf48421dc071..70eb9e502f78a 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -1171,8 +1171,11 @@ def test_summary_bug(self):
assert "~:{range}:0" in result
assert "{other}%s" in result
- def test_format(self, index):
- self._check_method_works(Index.format, index)
+ def test_format_different_scalar_lengths(self):
+ # GH35439
+ idx = Index(["aaaaaaaaa", "b"])
+ expected = ["aaaaaaaaa", "b"]
+ assert idx.format() == expected
def test_format_bug(self):
# GH 14626
@@ -1511,23 +1514,24 @@ def test_slice_locs_na_raises(self):
@pytest.mark.parametrize(
"in_slice,expected",
[
+ # error: Slice index must be an integer or None
(pd.IndexSlice[::-1], "yxdcb"),
- (pd.IndexSlice["b":"y":-1], ""), # type: ignore
- (pd.IndexSlice["b"::-1], "b"), # type: ignore
- (pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore
- (pd.IndexSlice[:"y":-1], "y"), # type: ignore
- (pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore
- (pd.IndexSlice["y"::-4], "yb"), # type: ignore
+ (pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
+ (pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
+ (pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
+ (pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
+ (pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
+ (pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
# absent labels
- (pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore
- (pd.IndexSlice[:"a":-2], "ydb"), # type: ignore
- (pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore
- (pd.IndexSlice["z"::-3], "yc"), # type: ignore
- (pd.IndexSlice["m"::-1], "dcb"), # type: ignore
- (pd.IndexSlice[:"m":-1], "yx"), # type: ignore
- (pd.IndexSlice["a":"a":-1], ""), # type: ignore
- (pd.IndexSlice["z":"z":-1], ""), # type: ignore
- (pd.IndexSlice["m":"m":-1], ""), # type: ignore
+ (pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
+ (pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
+ (pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
+ (pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
+ (pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
+ (pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
+ (pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
+ (pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
+ (pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
],
)
def test_slice_locs_negative_step(self, in_slice, expected):
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
index a7c5734ef9b02..bfcac5d433d2c 100644
--- a/pandas/tests/indexes/test_numeric.py
+++ b/pandas/tests/indexes/test_numeric.py
@@ -21,6 +21,13 @@ def test_can_hold_identifiers(self):
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is False
+ def test_format(self):
+ # GH35439
+ idx = self.create_index()
+ max_width = max(len(str(x)) for x in idx)
+ expected = [str(x).ljust(max_width) for x in idx]
+ assert idx.format() == expected
+
def test_numeric_compat(self):
pass # override Base method
diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py
index be193e0854d8d..d8e56661b7d61 100644
--- a/pandas/tests/indexing/multiindex/test_indexing_slow.py
+++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py
@@ -15,7 +15,7 @@ def test_multiindex_get_loc(): # GH7724, GH2646
with warnings.catch_warnings(record=True):
# test indexing into a multi-index before & past the lexsort depth
- from numpy.random import randint, choice, randn
+ from numpy.random import choice, randint, randn
cols = ["jim", "joe", "jolie", "joline", "jolia"]
diff --git a/pandas/tests/indexing/multiindex/test_sorted.py b/pandas/tests/indexing/multiindex/test_sorted.py
index 572cb9da405d1..bafe5068e1418 100644
--- a/pandas/tests/indexing/multiindex/test_sorted.py
+++ b/pandas/tests/indexing/multiindex/test_sorted.py
@@ -43,9 +43,13 @@ def test_frame_getitem_not_sorted2(self, key):
df2 = df.set_index(["col1", "col2"])
df2_original = df2.copy()
- return_value = df2.index.set_levels(["b", "d", "a"], level="col1", inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ return_value = df2.index.set_levels(
+ ["b", "d", "a"], level="col1", inplace=True
+ )
assert return_value is None
- return_value = df2.index.set_codes([0, 1, 0, 2], level="col1", inplace=True)
+ with tm.assert_produces_warning(FutureWarning):
+ return_value = df2.index.set_codes([0, 1, 0, 2], level="col1", inplace=True)
assert return_value is None
assert not df2.index.is_lexsorted()
assert not df2.index.is_monotonic
diff --git a/pandas/tests/indexing/multiindex/test_xs.py b/pandas/tests/indexing/multiindex/test_xs.py
index b807795b9c309..91be1d913001b 100644
--- a/pandas/tests/indexing/multiindex/test_xs.py
+++ b/pandas/tests/indexing/multiindex/test_xs.py
@@ -1,7 +1,7 @@
import numpy as np
import pytest
-from pandas import DataFrame, Index, MultiIndex, Series, concat, date_range
+from pandas import DataFrame, Index, IndexSlice, MultiIndex, Series, concat, date_range
import pandas._testing as tm
import pandas.core.common as com
@@ -220,6 +220,27 @@ def test_xs_level_series_slice_not_implemented(
s[2000, 3:4]
+def test_xs_IndexSlice_argument_not_implemented():
+ # GH 35301
+
+ index = MultiIndex(
+ levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]],
+ codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
+ )
+
+ series = Series(np.random.randn(6), index=index)
+ frame = DataFrame(np.random.randn(6, 4), index=index)
+
+ msg = (
+ "Expected label or tuple of labels, got "
+ r"\(\('foo', 'qux', 0\), slice\(None, None, None\)\)"
+ )
+ with pytest.raises(TypeError, match=msg):
+ frame.xs(IndexSlice[("foo", "qux", 0), :])
+ with pytest.raises(TypeError, match=msg):
+ series.xs(IndexSlice[("foo", "qux", 0), :])
+
+
def test_series_getitem_multiindex_xs():
# GH6258
dt = list(date_range("20130903", periods=3))
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 30b13b6ea9fce..193800fae751f 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -5,6 +5,8 @@
import numpy as np
import pytest
+from pandas.compat.numpy import _is_numpy_dev
+
import pandas as pd
from pandas import DataFrame, Series, Timestamp, date_range
import pandas._testing as tm
@@ -945,6 +947,7 @@ def test_loc_setitem_empty_append(self):
df.loc[0, "x"] = expected.loc[0, "x"]
tm.assert_frame_equal(df, expected)
+ @pytest.mark.xfail(_is_numpy_dev, reason="gh-35481")
def test_loc_setitem_empty_append_raises(self):
# GH6173, various appends to an empty dataframe
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index e236b3da73c69..84805d06df4a8 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -2141,6 +2141,15 @@ def test_dict_entries(self):
assert "'a': 1" in val
assert "'b': 2" in val
+ def test_categorical_columns(self):
+ # GH35439
+ data = [[4, 2], [3, 2], [4, 3]]
+ cols = ["aaaaaaaaa", "b"]
+ df = pd.DataFrame(data, columns=cols)
+ df_cat_cols = pd.DataFrame(data, columns=pd.CategoricalIndex(cols))
+
+ assert df.to_string() == df_cat_cols.to_string()
+
def test_period(self):
# GH 12615
df = pd.DataFrame(
diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py
index 9c6910637fa7e..3ef5157655e78 100644
--- a/pandas/tests/io/formats/test_style.py
+++ b/pandas/tests/io/formats/test_style.py
@@ -1682,6 +1682,12 @@ def f(a, b, styler):
result = styler.pipe((f, "styler"), a=1, b=2)
assert result == (1, 2, styler)
+ def test_no_cell_ids(self):
+ # GH 35588
+ df = pd.DataFrame(data=[[0]])
+ s = Styler(df, uuid="_", cell_ids=False).render()
+ assert s.find('') != -1
+
@td.skip_if_no_mpl
class TestStylerMatplotlibDep:
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 4c86e3a16b135..753b8b6eda9c5 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -607,3 +607,39 @@ def test_to_csv_errors(self, errors):
ser.to_csv(path, errors=errors)
# No use in reading back the data as it is not the same anymore
# due to the error handling
+
+ def test_to_csv_binary_handle(self):
+ """
+ Binary file objects should work if 'mode' contains a 'b'.
+
+ GH 35058 and GH 19827
+ """
+ df = tm.makeDataFrame()
+ with tm.ensure_clean() as path:
+ with open(path, mode="w+b") as handle:
+ df.to_csv(handle, mode="w+b")
+ tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
+
+ def test_to_csv_encoding_binary_handle(self):
+ """
+ Binary file objects should honor a specified encoding.
+
+ GH 23854 and GH 13068 with binary handles
+ """
+ # example from GH 23854
+ content = "a, b, 🐟".encode("utf-8-sig")
+ buffer = io.BytesIO(content)
+ df = pd.read_csv(buffer, encoding="utf-8-sig")
+
+ buffer = io.BytesIO()
+ df.to_csv(buffer, mode="w+b", encoding="utf-8-sig", index=False)
+ buffer.seek(0) # tests whether file handle wasn't closed
+ assert buffer.getvalue().startswith(content)
+
+ # example from GH 13068
+ with tm.ensure_clean() as path:
+ with open(path, "w+b") as handle:
+ pd.DataFrame().to_csv(handle, mode="w+b", encoding="utf-8-sig")
+
+ handle.seek(0)
+ assert handle.read().startswith(b'\xef\xbb\xbf""')
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
index 509e5bcb33304..93ad3739e59c7 100644
--- a/pandas/tests/io/formats/test_to_latex.py
+++ b/pandas/tests/io/formats/test_to_latex.py
@@ -573,6 +573,54 @@ def test_to_latex_longtable_caption_label(self):
"""
assert result_cl == expected_cl
+ def test_to_latex_position(self):
+ the_position = "h"
+
+ df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
+
+ # test when only the position is provided
+ result_p = df.to_latex(position=the_position)
+
+ expected_p = r"""\begin{table}[h]
+\centering
+\begin{tabular}{lrl}
+\toprule
+{} & a & b \\
+\midrule
+0 & 1 & b1 \\
+1 & 2 & b2 \\
+\bottomrule
+\end{tabular}
+\end{table}
+"""
+ assert result_p == expected_p
+
+ def test_to_latex_longtable_position(self):
+ the_position = "t"
+
+ df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
+
+ # test when only the position is provided
+ result_p = df.to_latex(longtable=True, position=the_position)
+
+ expected_p = r"""\begin{longtable}[t]{lrl}
+\toprule
+{} & a & b \\
+\midrule
+\endhead
+\midrule
+\multicolumn{3}{r}{{Continued on next page}} \\
+\midrule
+\endfoot
+
+\bottomrule
+\endlastfoot
+0 & 1 & b1 \\
+1 & 2 & b2 \\
+\end{longtable}
+"""
+ assert result_p == expected_p
+
def test_to_latex_escape_special_chars(self):
special_characters = ["&", "%", "$", "#", "_", "{", "}", "~", "^", "\\"]
df = DataFrame(data=special_characters)
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index 12e73bae40eac..5154a9ba6fdf0 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -18,7 +18,7 @@
from pandas.errors import DtypeWarning, EmptyDataError, ParserError
import pandas.util._test_decorators as td
-from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
+from pandas import DataFrame, Index, MultiIndex, Series, compat, concat, option_context
import pandas._testing as tm
from pandas.io.parsers import CParserWrapper, TextFileReader, TextParser
@@ -2179,3 +2179,13 @@ def test_read_csv_names_not_accepting_sets(all_parsers):
parser = all_parsers
with pytest.raises(ValueError, match="Names should be an ordered collection."):
parser.read_csv(StringIO(data), names=set("QAZ"))
+
+
+def test_read_csv_with_use_inf_as_na(all_parsers):
+ # https://github.com/pandas-dev/pandas/issues/35493
+ parser = all_parsers
+ data = "1.0\nNaN\n3.0"
+ with option_context("use_inf_as_na", True):
+ result = parser.read_csv(StringIO(data), header=None)
+ expected = DataFrame([1.0, np.nan, 3.0])
+ tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
index df014171be817..0942c79837e7c 100644
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -1751,9 +1751,9 @@ def col(t, column):
# try to index a col which isn't a data_column
msg = (
- f"column string2 is not a data_column.\n"
- f"In order to read column string2 you must reload the dataframe \n"
- f"into HDFStore and include string2 with the data_columns argument."
+ "column string2 is not a data_column.\n"
+ "In order to read column string2 you must reload the dataframe \n"
+ "into HDFStore and include string2 with the data_columns argument."
)
with pytest.raises(AttributeError, match=msg):
store.create_table_index("f", columns=["string2"])
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index dde38eb55ea7f..5ce2233bc0cd0 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -378,6 +378,17 @@ def test_unknown_engine(self):
with pytest.raises(ValueError, match="Unknown engine"):
pd.read_csv(path, engine="pyt")
+ def test_binary_mode(self):
+ """
+ 'encoding' shouldn't be passed to 'open' in binary mode.
+
+ GH 35058
+ """
+ with tm.ensure_clean() as path:
+ df = tm.makeDataFrame()
+ df.to_csv(path, mode="w+b")
+ tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
+
def test_is_fsspec_url():
assert icom.is_fsspec_url("gcs://pandas/somethingelse.com")
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index 59c9bd0a36d3d..902a3d5d2a397 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -114,6 +114,22 @@ def test_compression_warning(compression_only):
df.to_csv(f, compression=compression_only)
+def test_compression_binary(compression_only):
+ """
+ Binary file handles support compression.
+
+ GH22555
+ """
+ df = tm.makeDataFrame()
+ with tm.ensure_clean() as path:
+ with open(path, mode="wb") as file:
+ df.to_csv(file, mode="wb", compression=compression_only)
+ file.seek(0) # file shouldn't be closed
+ tm.assert_frame_equal(
+ df, pd.read_csv(path, index_col=0, compression=compression_only)
+ )
+
+
def test_with_missing_lzma():
"""Tests if import pandas works when lzma is not present."""
# https://github.com/pandas-dev/pandas/issues/27575
diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py
index c397a61616c1c..a0723452ccb70 100644
--- a/pandas/tests/io/test_fsspec.py
+++ b/pandas/tests/io/test_fsspec.py
@@ -15,7 +15,8 @@
)
# the ignore on the following line accounts for to_csv returning Optional(str)
# in general, but always str in the case we give no filename
-text = df1.to_csv(index=False).encode() # type: ignore
+# error: Item "None" of "Optional[str]" has no attribute "encode"
+text = df1.to_csv(index=False).encode() # type: ignore[union-attr]
@pytest.fixture
@@ -37,8 +38,8 @@ def test_read_csv(cleared_fs):
def test_reasonable_error(monkeypatch, cleared_fs):
- from fsspec.registry import known_implementations
from fsspec import registry
+ from fsspec.registry import known_implementations
registry.target.clear()
with pytest.raises(ValueError) as e:
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
index 4d93119ffa3f5..eacf4fa08545d 100644
--- a/pandas/tests/io/test_gcs.py
+++ b/pandas/tests/io/test_gcs.py
@@ -11,8 +11,7 @@
@td.skip_if_no("gcsfs")
def test_read_csv_gcs(monkeypatch):
- from fsspec import AbstractFileSystem
- from fsspec import registry
+ from fsspec import AbstractFileSystem, registry
registry.target.clear() # noqa # remove state
@@ -37,8 +36,7 @@ def open(*args, **kwargs):
@td.skip_if_no("gcsfs")
def test_to_csv_gcs(monkeypatch):
- from fsspec import AbstractFileSystem
- from fsspec import registry
+ from fsspec import AbstractFileSystem, registry
registry.target.clear() # noqa # remove state
df1 = DataFrame(
@@ -76,8 +74,7 @@ def mock_get_filepath_or_buffer(*args, **kwargs):
@td.skip_if_no("gcsfs")
def test_to_parquet_gcs_new_file(monkeypatch, tmpdir):
"""Regression test for writing to a not-yet-existent GCS Parquet file."""
- from fsspec import AbstractFileSystem
- from fsspec import registry
+ from fsspec import AbstractFileSystem, registry
registry.target.clear() # noqa # remove state
df1 = DataFrame(
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 0991fae39138e..29b787d39c09d 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -48,10 +48,10 @@
try:
import sqlalchemy
- import sqlalchemy.schema
- import sqlalchemy.sql.sqltypes as sqltypes
from sqlalchemy.ext import declarative
from sqlalchemy.orm import session as sa_session
+ import sqlalchemy.schema
+ import sqlalchemy.sql.sqltypes as sqltypes
SQLALCHEMY_INSTALLED = True
except ImportError:
diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py
index 896d3278cdde1..3b1ff233c5ec1 100644
--- a/pandas/tests/plotting/common.py
+++ b/pandas/tests/plotting/common.py
@@ -13,7 +13,6 @@
from pandas import DataFrame, Series
import pandas._testing as tm
-
"""
This is a common base class used for various plotting tests
"""
@@ -24,6 +23,7 @@ class TestPlotBase:
def setup_method(self, method):
import matplotlib as mpl
+
from pandas.plotting._matplotlib import compat
mpl.rcdefaults()
@@ -187,8 +187,8 @@ def _check_colors(
Series used for color grouping key
used for andrew_curves, parallel_coordinates, radviz test
"""
+ from matplotlib.collections import Collection, LineCollection, PolyCollection
from matplotlib.lines import Line2D
- from matplotlib.collections import Collection, PolyCollection, LineCollection
conv = self.colorconverter
if linecolors is not None:
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 317a994bd9a32..ee43e5d7072fe 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -2408,8 +2408,8 @@ def test_specified_props_kwd_plot_box(self, props, expected):
assert result[expected][0].get_color() == "C1"
def test_default_color_cycle(self):
- import matplotlib.pyplot as plt
import cycler
+ import matplotlib.pyplot as plt
colors = list("rgbk")
plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors)
@@ -2953,8 +2953,8 @@ def _check(axes):
@td.skip_if_no_scipy
def test_memory_leak(self):
""" Check that every plot type gets properly collected. """
- import weakref
import gc
+ import weakref
results = {}
for kind in plotting.PlotAccessor._all_kinds:
@@ -3032,8 +3032,8 @@ def test_df_subplots_patterns_minorticks(self):
@pytest.mark.slow
def test_df_gridspec_patterns(self):
# GH 10819
- import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
+ import matplotlib.pyplot as plt
ts = Series(np.random.randn(10), index=date_range("1/1/2000", periods=10))
@@ -3422,9 +3422,9 @@ def test_xlabel_ylabel_dataframe_subplots(
def _generate_4_axes_via_gridspec():
- import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.gridspec # noqa
+ import matplotlib.pyplot as plt
gs = mpl.gridspec.GridSpec(2, 2)
ax_tl = plt.subplot(gs[0, 0])
diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py
index b6a6c326c3df3..34c881855d16a 100644
--- a/pandas/tests/plotting/test_hist_method.py
+++ b/pandas/tests/plotting/test_hist_method.py
@@ -101,7 +101,7 @@ def test_hist_layout_with_by(self):
@pytest.mark.slow
def test_hist_no_overlap(self):
- from matplotlib.pyplot import subplot, gcf
+ from matplotlib.pyplot import gcf, subplot
x = Series(randn(2))
y = Series(randn(2))
@@ -352,6 +352,7 @@ class TestDataFrameGroupByPlots(TestPlotBase):
@pytest.mark.slow
def test_grouped_hist_legacy(self):
from matplotlib.patches import Rectangle
+
from pandas.plotting._matplotlib.hist import _grouped_hist
df = DataFrame(randn(500, 2), columns=["A", "B"])
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
index 75eeede472fe9..f5c1c58f3f7ed 100644
--- a/pandas/tests/plotting/test_misc.py
+++ b/pandas/tests/plotting/test_misc.py
@@ -131,9 +131,10 @@ def test_scatter_matrix_axis(self):
@pytest.mark.slow
def test_andrews_curves(self, iris):
- from pandas.plotting import andrews_curves
from matplotlib import cm
+ from pandas.plotting import andrews_curves
+
df = iris
_check_plot_works(andrews_curves, frame=df, class_column="Name")
@@ -206,9 +207,10 @@ def test_andrews_curves(self, iris):
@pytest.mark.slow
def test_parallel_coordinates(self, iris):
- from pandas.plotting import parallel_coordinates
from matplotlib import cm
+ from pandas.plotting import parallel_coordinates
+
df = iris
ax = _check_plot_works(parallel_coordinates, frame=df, class_column="Name")
@@ -279,9 +281,10 @@ def test_parallel_coordinates_with_sorted_labels(self):
@pytest.mark.slow
def test_radviz(self, iris):
- from pandas.plotting import radviz
from matplotlib import cm
+ from pandas.plotting import radviz
+
df = iris
_check_plot_works(radviz, frame=df, class_column="Name")
@@ -397,6 +400,7 @@ def test_get_standard_colors_no_appending(self):
# Make sure not to add more colors so that matplotlib can cycle
# correctly.
from matplotlib import cm
+
from pandas.plotting._matplotlib.style import _get_standard_colors
color_before = cm.gnuplot(range(5))
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
index 151bb3bed7207..cc00626e992f3 100644
--- a/pandas/tests/plotting/test_series.py
+++ b/pandas/tests/plotting/test_series.py
@@ -452,7 +452,7 @@ def test_hist_layout_with_by(self):
@pytest.mark.slow
def test_hist_no_overlap(self):
- from matplotlib.pyplot import subplot, gcf
+ from matplotlib.pyplot import gcf, subplot
x = Series(randn(2))
y = Series(randn(2))
@@ -827,6 +827,7 @@ def test_standard_colors(self):
@pytest.mark.slow
def test_standard_colors_all(self):
import matplotlib.colors as colors
+
from pandas.plotting._matplotlib.style import _get_standard_colors
# multiple colors like mediumaquamarine
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 0159fabd04d59..38cf2cc2402a1 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -1279,6 +1279,43 @@ def test_concat_ignore_index(self, sort):
tm.assert_frame_equal(v1, expected)
+ @pytest.mark.parametrize(
+ "name_in1,name_in2,name_in3,name_out",
+ [
+ ("idx", "idx", "idx", "idx"),
+ ("idx", "idx", None, "idx"),
+ ("idx", None, None, "idx"),
+ ("idx1", "idx2", None, None),
+ ("idx1", "idx1", "idx2", None),
+ ("idx1", "idx2", "idx3", None),
+ (None, None, None, None),
+ ],
+ )
+ def test_concat_same_index_names(self, name_in1, name_in2, name_in3, name_out):
+ # GH13475
+ indices = [
+ pd.Index(["a", "b", "c"], name=name_in1),
+ pd.Index(["b", "c", "d"], name=name_in2),
+ pd.Index(["c", "d", "e"], name=name_in3),
+ ]
+ frames = [
+ pd.DataFrame({c: [0, 1, 2]}, index=i)
+ for i, c in zip(indices, ["x", "y", "z"])
+ ]
+ result = pd.concat(frames, axis=1)
+
+ exp_ind = pd.Index(["a", "b", "c", "d", "e"], name=name_out)
+ expected = pd.DataFrame(
+ {
+ "x": [0, 1, 2, np.nan, np.nan],
+ "y": [np.nan, 0, 1, 2, np.nan],
+ "z": [np.nan, np.nan, 0, 1, 2],
+ },
+ index=exp_ind,
+ )
+
+ tm.assert_frame_equal(result, expected)
+
def test_concat_multiindex_with_keys(self):
index = MultiIndex(
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
index 2b75a1ec6ca6e..79879ef346f53 100644
--- a/pandas/tests/reshape/test_melt.py
+++ b/pandas/tests/reshape/test_melt.py
@@ -799,7 +799,7 @@ def test_invalid_separator(self):
expected = expected.set_index(["id", "year"])[
["X", "A2010", "A2011", "B2010", "A", "B"]
]
- expected.index.set_levels([0, 1], level=0, inplace=True)
+ expected.index = expected.index.set_levels([0, 1], level=0)
result = wide_to_long(df, ["A", "B"], i="id", j="year", sep=sep)
tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1))
@@ -861,7 +861,7 @@ def test_invalid_suffixtype(self):
expected = pd.DataFrame(exp_data).astype({"year": "int"})
expected = expected.set_index(["id", "year"])
- expected.index.set_levels([0, 1], level=0, inplace=True)
+ expected.index = expected.index.set_levels([0, 1], level=0)
result = wide_to_long(df, ["A", "B"], i="id", j="year")
tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1))
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index c07a5673fe503..67b3151b0ff9c 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1817,7 +1817,7 @@ def test_categorical_aggfunc(self, observed):
["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1"
)
expected_columns = pd.Index(["a", "b"], name="C2")
- expected_data = np.array([[1.0, np.nan], [1.0, np.nan], [np.nan, 2.0]])
+ expected_data = np.array([[1, 0], [1, 0], [0, 2]], dtype=np.int64)
expected = pd.DataFrame(
expected_data, index=expected_index, columns=expected_columns
)
@@ -1851,18 +1851,19 @@ def test_categorical_pivot_index_ordering(self, observed):
values="Sales",
index="Month",
columns="Year",
- dropna=observed,
+ observed=observed,
aggfunc="sum",
)
expected_columns = pd.Int64Index([2013, 2014], name="Year")
expected_index = pd.CategoricalIndex(
- ["January"], categories=months, ordered=False, name="Month"
+ months, categories=months, ordered=False, name="Month"
)
+ expected_data = [[320, 120]] + [[0, 0]] * 11
expected = pd.DataFrame(
- [[320, 120]], index=expected_index, columns=expected_columns
+ expected_data, index=expected_index, columns=expected_columns
)
- if not observed:
- result = result.dropna().astype(np.int64)
+ if observed:
+ expected = expected.loc[["January"]]
tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py
index e1e2ea1a5cec8..03830019affa1 100644
--- a/pandas/tests/scalar/test_nat.py
+++ b/pandas/tests/scalar/test_nat.py
@@ -513,11 +513,67 @@ def test_to_numpy_alias():
assert isna(expected) and isna(result)
-@pytest.mark.parametrize("other", [Timedelta(0), Timestamp(0)])
+@pytest.mark.parametrize(
+ "other",
+ [
+ Timedelta(0),
+ Timedelta(0).to_pytimedelta(),
+ pytest.param(
+ Timedelta(0).to_timedelta64(),
+ marks=pytest.mark.xfail(
+ reason="td64 doesnt return NotImplemented, see numpy#17017"
+ ),
+ ),
+ Timestamp(0),
+ Timestamp(0).to_pydatetime(),
+ pytest.param(
+ Timestamp(0).to_datetime64(),
+ marks=pytest.mark.xfail(
+ reason="dt64 doesnt return NotImplemented, see numpy#17017"
+ ),
+ ),
+ Timestamp(0).tz_localize("UTC"),
+ NaT,
+ ],
+)
def test_nat_comparisons(compare_operators_no_eq_ne, other):
# GH 26039
- assert getattr(NaT, compare_operators_no_eq_ne)(other) is False
- assert getattr(other, compare_operators_no_eq_ne)(NaT) is False
+ opname = compare_operators_no_eq_ne
+
+ assert getattr(NaT, opname)(other) is False
+
+ op = getattr(operator, opname.strip("_"))
+ assert op(NaT, other) is False
+ assert op(other, NaT) is False
+
+
+@pytest.mark.parametrize("other", [np.timedelta64(0, "ns"), np.datetime64("now", "ns")])
+def test_nat_comparisons_numpy(other):
+ # Once numpy#17017 is fixed and the xfailed cases in test_nat_comparisons
+ # pass, this test can be removed
+ assert not NaT == other
+ assert NaT != other
+ assert not NaT < other
+ assert not NaT > other
+ assert not NaT <= other
+ assert not NaT >= other
+
+
+@pytest.mark.parametrize("other", ["foo", 2, 2.0])
+@pytest.mark.parametrize("op", [operator.le, operator.lt, operator.ge, operator.gt])
+def test_nat_comparisons_invalid(other, op):
+ # GH#35585
+ assert not NaT == other
+ assert not other == NaT
+
+ assert NaT != other
+ assert other != NaT
+
+ with pytest.raises(TypeError):
+ op(NaT, other)
+
+ with pytest.raises(TypeError):
+ op(other, NaT)
@pytest.mark.parametrize(
diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py
index 0b34fab7b80b1..088f8681feb99 100644
--- a/pandas/tests/series/indexing/test_datetime.py
+++ b/pandas/tests/series/indexing/test_datetime.py
@@ -11,7 +11,6 @@
from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range
import pandas._testing as tm
-
"""
Also test support for datetime64[ns] in Series / DataFrame
"""
@@ -166,6 +165,7 @@ def test_getitem_setitem_datetime_tz_pytz():
def test_getitem_setitem_datetime_tz_dateutil():
from dateutil.tz import tzutc
+
from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
tz = (
diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py
index 19caf4eccf748..4b4ef5ea046be 100644
--- a/pandas/tests/series/methods/test_asof.py
+++ b/pandas/tests/series/methods/test_asof.py
@@ -90,7 +90,7 @@ def test_with_nan(self):
tm.assert_series_equal(result, expected)
def test_periodindex(self):
- from pandas import period_range, PeriodIndex
+ from pandas import PeriodIndex, period_range
# array or list or dates
N = 50
diff --git a/pandas/tests/series/methods/test_truncate.py b/pandas/tests/series/methods/test_truncate.py
index 7c82edbaec177..45592f8d99b93 100644
--- a/pandas/tests/series/methods/test_truncate.py
+++ b/pandas/tests/series/methods/test_truncate.py
@@ -141,3 +141,14 @@ def test_truncate_multiindex(self):
expected = df.col
tm.assert_series_equal(result, expected)
+
+ def test_truncate_one_element_series(self):
+ # GH 35544
+ series = pd.Series([0.1], index=pd.DatetimeIndex(["2020-08-04"]))
+ before = pd.Timestamp("2020-08-02")
+ after = pd.Timestamp("2020-08-04")
+
+ result = series.truncate(before=before, after=after)
+
+ # the input Series and the expected Series are the same
+ tm.assert_series_equal(result, series)
diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py
index 5c8a0d224c4f9..ef2bafd4ea2ad 100644
--- a/pandas/tests/series/test_arithmetic.py
+++ b/pandas/tests/series/test_arithmetic.py
@@ -195,8 +195,8 @@ def test_add_with_duplicate_index(self):
tm.assert_series_equal(result, expected)
def test_add_na_handling(self):
- from decimal import Decimal
from datetime import date
+ from decimal import Decimal
s = Series(
[Decimal("1.3"), Decimal("2.3")], index=[date(2012, 1, 1), date(2012, 1, 2)]
diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
index e718a6b759963..b32c5e91af295 100644
--- a/pandas/tests/test_downstream.py
+++ b/pandas/tests/test_downstream.py
@@ -90,7 +90,7 @@ def test_statsmodels():
def test_scikit_learn(df):
sklearn = import_module("sklearn") # noqa
- from sklearn import svm, datasets
+ from sklearn import datasets, svm
digits = datasets.load_digits()
clf = svm.SVC(gamma=0.001, C=100.0)
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 1ba73292dc0b4..724558bd49ea2 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -63,8 +63,8 @@ def setup_method(self, method):
).sum()
# use Int64Index, to make sure things work
- self.ymd.index.set_levels(
- [lev.astype("i8") for lev in self.ymd.index.levels], inplace=True
+ self.ymd.index = self.ymd.index.set_levels(
+ [lev.astype("i8") for lev in self.ymd.index.levels]
)
self.ymd.index.set_names(["year", "month", "day"], inplace=True)
diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py
index 1e193f22a6698..f68d83f7f4d58 100644
--- a/pandas/tests/tools/test_to_timedelta.py
+++ b/pandas/tests/tools/test_to_timedelta.py
@@ -166,3 +166,16 @@ def test_to_timedelta_ignore_strings_unit(self):
arr = np.array([1, 2, "error"], dtype=object)
result = pd.to_timedelta(arr, unit="ns", errors="ignore")
tm.assert_numpy_array_equal(result, arr)
+
+ def test_to_timedelta_nullable_int64_dtype(self):
+ # GH 35574
+ expected = Series([timedelta(days=1), timedelta(days=2)])
+ result = to_timedelta(Series([1, 2], dtype="Int64"), unit="days")
+
+ tm.assert_series_equal(result, expected)
+
+ # IntegerArray Series with nulls
+ expected = Series([timedelta(days=1), None])
+ result = to_timedelta(Series([1, None], dtype="Int64"), unit="days")
+
+ tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py
index 4f184b78f34a1..87cd97f853f4d 100644
--- a/pandas/tests/tslibs/test_conversion.py
+++ b/pandas/tests/tslibs/test_conversion.py
@@ -78,6 +78,14 @@ def test_tz_convert_corner(arr):
tm.assert_numpy_array_equal(result, arr)
+def test_tz_convert_readonly():
+ # GH#35530
+ arr = np.array([0], dtype=np.int64)
+ arr.setflags(write=False)
+ result = tzconversion.tz_convert_from_utc(arr, UTC)
+ tm.assert_numpy_array_equal(result, arr)
+
+
@pytest.mark.parametrize("copy", [True, False])
@pytest.mark.parametrize("dtype", ["M8[ns]", "M8[s]"])
def test_length_zero_copy(dtype, copy):
diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py
index 1284cc9d4f49b..a7b5aeac560e4 100644
--- a/pandas/tests/util/test_assert_series_equal.py
+++ b/pandas/tests/util/test_assert_series_equal.py
@@ -281,3 +281,18 @@ class MySeries(Series):
with pytest.raises(AssertionError, match="Series classes are different"):
tm.assert_series_equal(s3, s1, check_series_type=True)
+
+
+def test_series_equal_exact_for_nonnumeric():
+ # https://github.com/pandas-dev/pandas/issues/35446
+ s1 = Series(["a", "b"])
+ s2 = Series(["a", "b"])
+ s3 = Series(["b", "a"])
+
+ tm.assert_series_equal(s1, s2, check_exact=True)
+ tm.assert_series_equal(s2, s1, check_exact=True)
+
+ with pytest.raises(AssertionError):
+ tm.assert_series_equal(s1, s3, check_exact=True)
+ with pytest.raises(AssertionError):
+ tm.assert_series_equal(s3, s1, check_exact=True)
diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py
index 744ca264e91d9..e1dcac06c39cc 100644
--- a/pandas/tests/window/test_grouper.py
+++ b/pandas/tests/window/test_grouper.py
@@ -214,3 +214,144 @@ def foo(x):
name="value",
)
tm.assert_series_equal(result, expected)
+
+ def test_groupby_rolling_center_center(self):
+ # GH 35552
+ series = Series(range(1, 6))
+ result = series.groupby(series).rolling(center=True, window=3).mean()
+ expected = Series(
+ [np.nan] * 5,
+ index=pd.MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3), (5, 4))),
+ )
+ tm.assert_series_equal(result, expected)
+
+ series = Series(range(1, 5))
+ result = series.groupby(series).rolling(center=True, window=3).mean()
+ expected = Series(
+ [np.nan] * 4,
+ index=pd.MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3))),
+ )
+ tm.assert_series_equal(result, expected)
+
+ df = pd.DataFrame({"a": ["a"] * 5 + ["b"] * 6, "b": range(11)})
+ result = df.groupby("a").rolling(center=True, window=3).mean()
+ expected = pd.DataFrame(
+ [np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, 9, np.nan],
+ index=pd.MultiIndex.from_tuples(
+ (
+ ("a", 0),
+ ("a", 1),
+ ("a", 2),
+ ("a", 3),
+ ("a", 4),
+ ("b", 5),
+ ("b", 6),
+ ("b", 7),
+ ("b", 8),
+ ("b", 9),
+ ("b", 10),
+ ),
+ names=["a", None],
+ ),
+ columns=["b"],
+ )
+ tm.assert_frame_equal(result, expected)
+
+ df = pd.DataFrame({"a": ["a"] * 5 + ["b"] * 5, "b": range(10)})
+ result = df.groupby("a").rolling(center=True, window=3).mean()
+ expected = pd.DataFrame(
+ [np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, np.nan],
+ index=pd.MultiIndex.from_tuples(
+ (
+ ("a", 0),
+ ("a", 1),
+ ("a", 2),
+ ("a", 3),
+ ("a", 4),
+ ("b", 5),
+ ("b", 6),
+ ("b", 7),
+ ("b", 8),
+ ("b", 9),
+ ),
+ names=["a", None],
+ ),
+ columns=["b"],
+ )
+ tm.assert_frame_equal(result, expected)
+
+ def test_groupby_subselect_rolling(self):
+ # GH 35486
+ df = DataFrame(
+ {"a": [1, 2, 3, 2], "b": [4.0, 2.0, 3.0, 1.0], "c": [10, 20, 30, 20]}
+ )
+ result = df.groupby("a")[["b"]].rolling(2).max()
+ expected = DataFrame(
+ [np.nan, np.nan, 2.0, np.nan],
+ columns=["b"],
+ index=pd.MultiIndex.from_tuples(
+ ((1, 0), (2, 1), (2, 3), (3, 2)), names=["a", None]
+ ),
+ )
+ tm.assert_frame_equal(result, expected)
+
+ result = df.groupby("a")["b"].rolling(2).max()
+ expected = Series(
+ [np.nan, np.nan, 2.0, np.nan],
+ index=pd.MultiIndex.from_tuples(
+ ((1, 0), (2, 1), (2, 3), (3, 2)), names=["a", None]
+ ),
+ name="b",
+ )
+ tm.assert_series_equal(result, expected)
+
+ def test_groupby_rolling_subset_with_closed(self):
+ # GH 35549
+ df = pd.DataFrame(
+ {
+ "column1": range(6),
+ "column2": range(6),
+ "group": 3 * ["A", "B"],
+ "date": [pd.Timestamp("2019-01-01")] * 6,
+ }
+ )
+ result = (
+ df.groupby("group").rolling("1D", on="date", closed="left")["column1"].sum()
+ )
+ expected = Series(
+ [np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
+ index=pd.MultiIndex.from_tuples(
+ [("A", pd.Timestamp("2019-01-01"))] * 3
+ + [("B", pd.Timestamp("2019-01-01"))] * 3,
+ names=["group", "date"],
+ ),
+ name="column1",
+ )
+ tm.assert_series_equal(result, expected)
+
+ def test_groupby_subset_rolling_subset_with_closed(self):
+ # GH 35549
+ df = pd.DataFrame(
+ {
+ "column1": range(6),
+ "column2": range(6),
+ "group": 3 * ["A", "B"],
+ "date": [pd.Timestamp("2019-01-01")] * 6,
+ }
+ )
+
+ result = (
+ df.groupby("group")[["column1", "date"]]
+ .rolling("1D", on="date", closed="left")["column1"]
+ .sum()
+ )
+ expected = Series(
+ [np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
+ index=pd.MultiIndex.from_tuples(
+ [("A", pd.Timestamp("2019-01-01"))] * 3
+ + [("B", pd.Timestamp("2019-01-01"))] * 3,
+ names=["group", "date"],
+ ),
+ name="column1",
+ )
+ tm.assert_series_equal(result, expected)
diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py
index 6135ccba1573d..f81bca7e85156 100644
--- a/pandas/util/_decorators.py
+++ b/pandas/util/_decorators.py
@@ -323,7 +323,8 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]:
sig = inspect.Signature(params)
# https://github.com/python/typing/issues/598
- func.__signature__ = sig # type: ignore
+ # error: "F" has no attribute "__signature__"
+ func.__signature__ = sig # type: ignore[attr-defined]
return cast(F, wrapper)
return decorate
@@ -357,8 +358,12 @@ def decorator(decorated: F) -> F:
for docstring in docstrings:
if hasattr(docstring, "_docstring_components"):
+ # error: Item "str" of "Union[str, Callable[..., Any]]" has no
+ # attribute "_docstring_components" [union-attr]
+ # error: Item "function" of "Union[str, Callable[..., Any]]"
+ # has no attribute "_docstring_components" [union-attr]
docstring_components.extend(
- docstring._docstring_components # type: ignore
+ docstring._docstring_components # type: ignore[union-attr]
)
elif isinstance(docstring, str) or docstring.__doc__:
docstring_components.append(docstring)
@@ -373,7 +378,10 @@ def decorator(decorated: F) -> F:
]
)
- decorated._docstring_components = docstring_components # type: ignore
+ # error: "F" has no attribute "_docstring_components"
+ decorated._docstring_components = ( # type: ignore[attr-defined]
+ docstring_components
+ )
return decorated
return decorator
diff --git a/pandas/util/_doctools.py b/pandas/util/_doctools.py
index f413490764124..3a8a1a3144269 100644
--- a/pandas/util/_doctools.py
+++ b/pandas/util/_doctools.py
@@ -53,8 +53,8 @@ def plot(self, left, right, labels=None, vertical: bool = True):
vertical : bool, default True
If True, use vertical layout. If False, use horizontal layout.
"""
- import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
+ import matplotlib.pyplot as plt
if not isinstance(left, list):
left = [left]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 7bf3df176b378..6a87b0a99a4f8 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -11,7 +11,7 @@ cpplint
flake8<3.8.0
flake8-comprehensions>=3.1.0
flake8-rst>=0.6.0,<=0.7.0
-isort==4.3.21
+isort>=5.2.1
mypy==0.730
pycodestyle
gitpython
@@ -73,4 +73,5 @@ cftime
pyreadstat
tabulate>=0.8.3
git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master
-git+https://github.com/numpy/numpydoc
\ No newline at end of file
+git+https://github.com/numpy/numpydoc
+pyflakes>=2.2.0
\ No newline at end of file
diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py
index 5de2a07381ae5..62ec6b9ef07af 100755
--- a/scripts/validate_rst_title_capitalization.py
+++ b/scripts/validate_rst_title_capitalization.py
@@ -138,6 +138,9 @@
"CategoricalDtype",
"UTC",
"Panel",
+ "False",
+ "Styler",
+ "os",
}
CAP_EXCEPTIONS_DICT = {word.lower(): word for word in CAPITALIZATION_EXCEPTIONS}
diff --git a/setup.cfg b/setup.cfg
index ee5725e36d193..e4c0b3dcf37ef 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -122,6 +122,7 @@ check_untyped_defs=True
strict_equality=True
warn_redundant_casts = True
warn_unused_ignores = True
+show_error_codes = True
[mypy-pandas.tests.*]
check_untyped_defs=False
@@ -174,9 +175,6 @@ check_untyped_defs=False
[mypy-pandas.core.groupby.ops]
check_untyped_defs=False
-[mypy-pandas.core.indexes.base]
-check_untyped_defs=False
-
[mypy-pandas.core.indexes.datetimes]
check_untyped_defs=False
@@ -213,9 +211,6 @@ check_untyped_defs=False
[mypy-pandas.core.window.common]
check_untyped_defs=False
-[mypy-pandas.core.window.ewm]
-check_untyped_defs=False
-
[mypy-pandas.core.window.expanding]
check_untyped_defs=False
diff --git a/web/pandas/community/ecosystem.md b/web/pandas/community/ecosystem.md
index be109ea53eb7d..515d23afb93ec 100644
--- a/web/pandas/community/ecosystem.md
+++ b/web/pandas/community/ecosystem.md
@@ -42,6 +42,13 @@ datasets into feature matrices for machine learning using reusable
feature engineering "primitives". Users can contribute their own
primitives in Python and share them with the rest of the community.
+### [Compose](https://github.com/FeatureLabs/compose)
+
+Compose is a machine learning tool for labeling data and prediction engineering.
+It allows you to structure the labeling process by parameterizing
+prediction problems and transforming time-driven relational data into
+target values with cutoff times that can be used for supervised learning.
+
## Visualization
### [Altair](https://altair-viz.github.io/)
@@ -372,3 +379,4 @@ authors to coordinate on the namespace.
| [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` |
| [pandas_path](https://github.com/drivendataorg/pandas-path/) | `path` | `Index`, `Series` |
| [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` |
+ | [composeml](https://github.com/FeatureLabs/compose) | `slice` | `DataFrame` |
diff --git a/web/pandas/index.html b/web/pandas/index.html
index 83d0f48197033..75c797d6dd93d 100644
--- a/web/pandas/index.html
+++ b/web/pandas/index.html
@@ -63,7 +63,7 @@ With the support of:
{% if releases %}
Latest version: {{ releases[0].name }}
|