Skip to content

Commit

Permalink
ENH: Add ignore_index for df.drop_duplicates (pandas-dev#30405)
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesdong1991 authored and AlexKirko committed Dec 29, 2019
1 parent b195dd6 commit b35a5f4
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ Other enhancements
- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)

- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`)

Build Changes
^^^^^^^^^^^^^
Expand Down
14 changes: 13 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4614,6 +4614,7 @@ def drop_duplicates(
subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
keep: Union[str, bool] = "first",
inplace: bool = False,
ignore_index: bool = False,
) -> Optional["DataFrame"]:
"""
Return DataFrame with duplicate rows removed.
Expand All @@ -4633,6 +4634,10 @@ def drop_duplicates(
- False : Drop all duplicates.
inplace : bool, default False
Whether to drop duplicates in place or to return a copy.
ignore_index : bool, default False
If True, the resulting axis will be labeled 0, 1, …, n - 1.
.. versionadded:: 1.0.0
Returns
-------
Expand All @@ -4648,9 +4653,16 @@ def drop_duplicates(
if inplace:
(inds,) = (-duplicated)._ndarray_values.nonzero()
new_data = self._data.take(inds)

if ignore_index:
new_data.axes[1] = ibase.default_index(len(inds))
self._update_inplace(new_data)
else:
return self[-duplicated]
result = self[-duplicated]

if ignore_index:
result.index = ibase.default_index(len(result))
return result

return None

Expand Down
33 changes: 33 additions & 0 deletions pandas/tests/frame/methods/test_drop_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,3 +391,36 @@ def test_drop_duplicates_inplace():
expected = orig2.drop_duplicates(["A", "B"], keep=False)
result = df2
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"origin_dict, output_dict, ignore_index, output_index",
[
({"A": [2, 2, 3]}, {"A": [2, 3]}, True, [0, 1]),
({"A": [2, 2, 3]}, {"A": [2, 3]}, False, [0, 2]),
({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, True, [0, 1]),
({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, False, [0, 2]),
],
)
def test_drop_duplicates_ignore_index(
origin_dict, output_dict, ignore_index, output_index
):
# GH 30114
df = DataFrame(origin_dict)
expected = DataFrame(output_dict, index=output_index)

# Test when inplace is False
result = df.drop_duplicates(ignore_index=ignore_index)
tm.assert_frame_equal(result, expected)

# to verify original dataframe is not mutated
tm.assert_frame_equal(df, DataFrame(origin_dict))

# Test when inplace is True
copied_df = df.copy()

copied_df.drop_duplicates(ignore_index=ignore_index, inplace=True)
tm.assert_frame_equal(copied_df, expected)

# to verify that input is unchanged
tm.assert_frame_equal(df, DataFrame(origin_dict))

0 comments on commit b35a5f4

Please sign in to comment.