Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: set_index copy kwd #48043

Merged
merged 3 commits into from
Aug 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ Other enhancements
- :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`)
- :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`)
- :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support a ``copy`` argument. If ``False``, the underlying data is not copied in the returned object (:issue:`47934`)
- :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`)

.. ---------------------------------------------------------------------------
.. _whatsnew_150.notable_bug_fixes:
Expand Down
17 changes: 16 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5824,6 +5824,7 @@ def set_index(
append: bool = ...,
inplace: Literal[False] = ...,
verify_integrity: bool = ...,
copy: bool | lib.NoDefault = ...,
) -> DataFrame:
...

Expand All @@ -5836,6 +5837,7 @@ def set_index(
append: bool = ...,
inplace: Literal[True],
verify_integrity: bool = ...,
copy: bool | lib.NoDefault = ...,
) -> None:
...

Expand All @@ -5847,6 +5849,7 @@ def set_index(
append: bool = False,
inplace: bool = False,
verify_integrity: bool = False,
copy: bool | lib.NoDefault = lib.no_default,
) -> DataFrame | None:
"""
Set the DataFrame index using existing columns.
Expand All @@ -5873,6 +5876,11 @@ def set_index(
Check the new index for duplicates. Otherwise defer the check until
necessary. Setting to False will improve the performance of this
method.
copy : bool, default True
Whether to make a copy of the underlying data when returning a new
DataFrame.

.. versionadded:: 1.5.0

Returns
-------
Expand Down Expand Up @@ -5938,6 +5946,13 @@ def set_index(
4 16 10 2014 31
"""
inplace = validate_bool_kwarg(inplace, "inplace")
if inplace:
if copy is not lib.no_default:
raise ValueError("Cannot specify copy when inplace=True")
copy = False
elif copy is lib.no_default:
copy = True

self._check_inplace_and_allows_duplicate_labels(inplace)
if not isinstance(keys, list):
keys = [keys]
Expand Down Expand Up @@ -5973,7 +5988,7 @@ def set_index(
if inplace:
frame = self
else:
frame = self.copy()
frame = self.copy(deep=copy)

arrays = []
names: list[Hashable] = []
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/frame/methods/test_set_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,25 @@


class TestSetIndex:
def test_set_index_copy(self):
# GH#48043
df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
expected = DataFrame({"B": [3, 4], "C": [5, 6]}, index=Index([1, 2], name="A"))

res = df.set_index("A", copy=True)
tm.assert_frame_equal(res, expected)
assert not any(tm.shares_memory(df[col], res[col]) for col in res.columns)

res = df.set_index("A", copy=False)
tm.assert_frame_equal(res, expected)
assert all(tm.shares_memory(df[col], res[col]) for col in res.columns)

msg = "Cannot specify copy when inplace=True"
with pytest.raises(ValueError, match=msg):
df.set_index("A", inplace=True, copy=True)
with pytest.raises(ValueError, match=msg):
df.set_index("A", inplace=True, copy=False)

def test_set_index_multiindex(self):
# segfault in GH#3308
d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]}
Expand Down