Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clean: simplify lite-rule-alias and dont-uppercase #59240

Merged
merged 1 commit into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,8 @@ cdef dict c_PERIOD_AND_OFFSET_DEPR_FREQSTR = {
"b": "B",
"c": "C",
"MIN": "min",
"US": "us",
"NS": "ns",
}

cdef str INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}"
Expand Down
91 changes: 40 additions & 51 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4697,13 +4697,9 @@ _lite_rule_alias = {
"BYS": "BYS-JAN", # BYearBegin(month=1),

"Min": "min",
"min": "min",
"ms": "ms",
"us": "us",
"ns": "ns",
}

_dont_uppercase = {"h", "bh", "cbh", "MS", "ms", "s"}
_dont_uppercase = {"min", "h", "bh", "cbh", "s", "ms", "us", "ns"}
Comment on lines -4706 to +4702
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It never made much sense to me that 'MS' was in this list. It's been there since forever, but it looks odd - 'MS' is already uppercase 😄

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agree, it was always confusing for me.



INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}"
Expand All @@ -4713,6 +4709,37 @@ INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}"
_offset_map = {}


def _warn_about_deprecated_aliases(name: str, is_period: bool) -> str:
if name in _lite_rule_alias:
return name
if name in c_PERIOD_AND_OFFSET_DEPR_FREQSTR:
warnings.warn(
f"\'{name}\' is deprecated and will be removed "
f"in a future version, please use "
f"\'{c_PERIOD_AND_OFFSET_DEPR_FREQSTR.get(name)}\' "
f" instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
return c_PERIOD_AND_OFFSET_DEPR_FREQSTR[name]

for _name in (name.lower(), name.upper()):
if name == _name:
continue
if _name in c_PERIOD_AND_OFFSET_DEPR_FREQSTR.values():
warnings.warn(
f"\'{name}\' is deprecated and will be removed "
f"in a future version, please use "
f"\'{_name}\' "
f" instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
return _name

return name


def _validate_to_offset_alias(alias: str, is_period: bool) -> None:
if not is_period:
if alias.upper() in c_OFFSET_RENAMED_FREQSTR:
Expand Down Expand Up @@ -4750,35 +4777,6 @@ def _get_offset(name: str) -> BaseOffset:
--------
_get_offset('EOM') --> BMonthEnd(1)
"""
if (
name not in _lite_rule_alias
and (name.upper() in _lite_rule_alias)
and name != "ms"
):
warnings.warn(
f"\'{name}\' is deprecated and will be removed "
f"in a future version, please use \'{name.upper()}\' instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
elif (
name not in _lite_rule_alias
and (name.lower() in _lite_rule_alias)
and name != "MS"
):
warnings.warn(
f"\'{name}\' is deprecated and will be removed "
f"in a future version, please use \'{name.lower()}\' instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
if name not in _dont_uppercase:
name = name.upper()
name = _lite_rule_alias.get(name, name)
name = _lite_rule_alias.get(name.lower(), name)
else:
name = _lite_rule_alias.get(name, name)

if name not in _offset_map:
try:
split = name.split("-")
Expand Down Expand Up @@ -4880,6 +4878,7 @@ cpdef to_offset(freq, bint is_period=False):

tups = zip(split[0::4], split[1::4], split[2::4])
for n, (sep, stride, name) in enumerate(tups):
name = _warn_about_deprecated_aliases(name, is_period)
_validate_to_offset_alias(name, is_period)
if is_period:
if name.upper() in c_PERIOD_TO_OFFSET_FREQSTR:
Expand All @@ -4888,31 +4887,21 @@ cpdef to_offset(freq, bint is_period=False):
f"\'{name}\' is no longer supported, "
f"please use \'{name.upper()}\' instead.",
)
name = c_PERIOD_TO_OFFSET_FREQSTR.get(name.upper())

if name in c_PERIOD_AND_OFFSET_DEPR_FREQSTR:
warnings.warn(
f"\'{name}\' is deprecated and will be removed "
f"in a future version, please use "
f"\'{c_PERIOD_AND_OFFSET_DEPR_FREQSTR.get(name)}\' "
f"instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
name = c_PERIOD_AND_OFFSET_DEPR_FREQSTR.get(name)
name = c_PERIOD_TO_OFFSET_FREQSTR[name.upper()]
name = _lite_rule_alias.get(name, name)

if sep != "" and not sep.isspace():
raise ValueError("separator must be spaces")
prefix = _lite_rule_alias.get(name) or name
if stride_sign is None:
stride_sign = -1 if stride.startswith("-") else 1
if not stride:
stride = 1

if prefix in {"D", "h", "min", "s", "ms", "us", "ns"}:
if name in {"D", "h", "min", "s", "ms", "us", "ns"}:
# For these prefixes, we have something like "3h" or
# "2.5min", so we can construct a Timedelta with the
# matching unit and get our offset from delta_to_tick
td = Timedelta(1, unit=prefix)
td = Timedelta(1, unit=name)
off = delta_to_tick(td)
offset = off * float(stride)
if n != 0:
Expand All @@ -4921,7 +4910,7 @@ cpdef to_offset(freq, bint is_period=False):
offset *= stride_sign
else:
stride = int(stride)
offset = _get_offset(prefix)
offset = _get_offset(name)
offset = offset * int(np.fabs(stride) * stride_sign)

if result is None:
Expand All @@ -4931,7 +4920,7 @@ cpdef to_offset(freq, bint is_period=False):
except (ValueError, TypeError) as err:
raise ValueError(INVALID_FREQ_ERR_MSG.format(
f"{freq}, failed to parse with error message: {repr(err)}")
)
) from err
else:
result = None

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,7 @@ def test_date_range_frequency_M_Q_Y_raises(self, freq):
with pytest.raises(ValueError, match=msg):
pd.date_range("1/1/2000", periods=4, freq=freq)

@pytest.mark.parametrize("freq_depr", ["2MIN", "2mS", "2Us"])
@pytest.mark.parametrize("freq_depr", ["2MIN", "2nS", "2Us"])
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this PR, "2mS" would just raise a hard error. I think that's OK tbh, anyone using 'ms' case-insentively was already playing with fire

def test_date_range_uppercase_frequency_deprecated(self, freq_depr):
# GH#9586, GH#54939
depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a "
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/tseries/offsets/test_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,9 +788,7 @@ def test_get_offset():

pairs = [
("B", BDay()),
("b", BDay()),
("bme", BMonthEnd()),
("Bme", BMonthEnd()),
("BME", BMonthEnd()),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'b' raises a deprecation warning (and it's already tested, e.g. test_construction_bday and test_construction_bday

'bme' would raise a hard error, but I think that's fine too, as 'me' already went through a deprecation cycle

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, seems reasonable to me

("W-MON", Week(weekday=0)),
("W-TUE", Week(weekday=1)),
("W-WED", Week(weekday=2)),
Expand Down
Loading