-
Notifications
You must be signed in to change notification settings - Fork 651
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PERF-#4929: Compute dtype
when using Series.dt
accessor
#4930
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1440,32 +1440,34 @@ def searchsorted(df): | |
|
||
# Dt map partitions operations | ||
|
||
dt_date = Map.register(_dt_prop_map("date")) | ||
dt_time = Map.register(_dt_prop_map("time")) | ||
dt_timetz = Map.register(_dt_prop_map("timetz")) | ||
dt_year = Map.register(_dt_prop_map("year")) | ||
dt_month = Map.register(_dt_prop_map("month")) | ||
dt_day = Map.register(_dt_prop_map("day")) | ||
dt_hour = Map.register(_dt_prop_map("hour")) | ||
dt_minute = Map.register(_dt_prop_map("minute")) | ||
dt_second = Map.register(_dt_prop_map("second")) | ||
dt_microsecond = Map.register(_dt_prop_map("microsecond")) | ||
dt_nanosecond = Map.register(_dt_prop_map("nanosecond")) | ||
dt_week = Map.register(_dt_prop_map("week")) | ||
dt_weekofyear = Map.register(_dt_prop_map("weekofyear")) | ||
dt_dayofweek = Map.register(_dt_prop_map("dayofweek")) | ||
dt_weekday = Map.register(_dt_prop_map("weekday")) | ||
dt_dayofyear = Map.register(_dt_prop_map("dayofyear")) | ||
dt_quarter = Map.register(_dt_prop_map("quarter")) | ||
dt_is_month_start = Map.register(_dt_prop_map("is_month_start")) | ||
dt_is_month_end = Map.register(_dt_prop_map("is_month_end")) | ||
dt_is_quarter_start = Map.register(_dt_prop_map("is_quarter_start")) | ||
dt_is_quarter_end = Map.register(_dt_prop_map("is_quarter_end")) | ||
dt_is_year_start = Map.register(_dt_prop_map("is_year_start")) | ||
dt_is_year_end = Map.register(_dt_prop_map("is_year_end")) | ||
dt_is_leap_year = Map.register(_dt_prop_map("is_leap_year")) | ||
dt_daysinmonth = Map.register(_dt_prop_map("daysinmonth")) | ||
dt_days_in_month = Map.register(_dt_prop_map("days_in_month")) | ||
dt_date = Map.register(_dt_prop_map("date"), dtypes=np.object_) | ||
dt_time = Map.register(_dt_prop_map("time"), dtypes=np.object_) | ||
dt_timetz = Map.register(_dt_prop_map("timetz"), dtypes=np.object_) | ||
dt_year = Map.register(_dt_prop_map("year"), dtypes=np.int64) | ||
dt_month = Map.register(_dt_prop_map("month"), dtypes=np.int64) | ||
dt_day = Map.register(_dt_prop_map("day"), dtypes=np.int64) | ||
dt_hour = Map.register(_dt_prop_map("hour"), dtypes=np.int64) | ||
dt_minute = Map.register(_dt_prop_map("minute"), dtypes=np.int64) | ||
dt_second = Map.register(_dt_prop_map("second"), dtypes=np.int64) | ||
Comment on lines
+1446
to
+1451
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you please check this is correct? I mean, wasting int64 to store numbers from 0 to 59 feels a little too much... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think those are correct. See: https://pandas.pydata.org/docs/reference/api/pandas.Series.dt.minute.html There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @vnlitvinov agree that it does seem a bit wasteful. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this could probably be improved upstream. the lower-level calls return int32, get wrapped somewhere in int64 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I checked using the following example, everything is so. Do I need to do something else or we can merge? pd.Series(pd.to_timedelta(np.arange(5), unit='d')).dt |
||
dt_microsecond = Map.register(_dt_prop_map("microsecond"), dtypes=np.int64) | ||
dt_nanosecond = Map.register(_dt_prop_map("nanosecond"), dtypes=np.int64) | ||
dt_week = Map.register(_dt_prop_map("week"), dtypes=np.int64) | ||
dt_weekofyear = Map.register(_dt_prop_map("weekofyear"), dtypes=np.int64) | ||
dt_dayofweek = Map.register(_dt_prop_map("dayofweek"), dtypes=np.int64) | ||
dt_weekday = Map.register(_dt_prop_map("weekday"), dtypes=np.int64) | ||
dt_dayofyear = Map.register(_dt_prop_map("dayofyear"), dtypes=np.int64) | ||
dt_quarter = Map.register(_dt_prop_map("quarter"), dtypes=np.int64) | ||
dt_is_month_start = Map.register(_dt_prop_map("is_month_start"), dtypes=np.bool_) | ||
dt_is_month_end = Map.register(_dt_prop_map("is_month_end"), dtypes=np.bool_) | ||
dt_is_quarter_start = Map.register( | ||
_dt_prop_map("is_quarter_start"), dtypes=np.bool_ | ||
) | ||
dt_is_quarter_end = Map.register(_dt_prop_map("is_quarter_end"), dtypes=np.bool_) | ||
dt_is_year_start = Map.register(_dt_prop_map("is_year_start"), dtypes=np.bool_) | ||
dt_is_year_end = Map.register(_dt_prop_map("is_year_end"), dtypes=np.bool_) | ||
dt_is_leap_year = Map.register(_dt_prop_map("is_leap_year"), dtypes=np.bool_) | ||
dt_daysinmonth = Map.register(_dt_prop_map("daysinmonth"), dtypes=np.int64) | ||
dt_days_in_month = Map.register(_dt_prop_map("days_in_month"), dtypes=np.int64) | ||
|
||
def dt_tz(self): | ||
def datetime_tz(df): | ||
|
@@ -1480,23 +1482,23 @@ def datetime_freq(df): | |
return self.default_to_pandas(datetime_freq) | ||
|
||
dt_to_period = Map.register(_dt_func_map("to_period")) | ||
dt_to_pydatetime = Map.register(_dt_func_map("to_pydatetime")) | ||
dt_to_pydatetime = Map.register(_dt_func_map("to_pydatetime"), dtypes=np.object_) | ||
dt_tz_localize = Map.register(_dt_func_map("tz_localize")) | ||
dt_tz_convert = Map.register(_dt_func_map("tz_convert")) | ||
dt_normalize = Map.register(_dt_func_map("normalize")) | ||
dt_strftime = Map.register(_dt_func_map("strftime")) | ||
dt_strftime = Map.register(_dt_func_map("strftime"), dtypes=np.object_) | ||
dt_round = Map.register(_dt_func_map("round")) | ||
dt_floor = Map.register(_dt_func_map("floor")) | ||
dt_ceil = Map.register(_dt_func_map("ceil")) | ||
dt_month_name = Map.register(_dt_func_map("month_name")) | ||
dt_day_name = Map.register(_dt_func_map("day_name")) | ||
dt_to_pytimedelta = Map.register(_dt_func_map("to_pytimedelta")) | ||
dt_total_seconds = Map.register(_dt_func_map("total_seconds")) | ||
dt_seconds = Map.register(_dt_prop_map("seconds")) | ||
dt_days = Map.register(_dt_prop_map("days")) | ||
dt_microseconds = Map.register(_dt_prop_map("microseconds")) | ||
dt_nanoseconds = Map.register(_dt_prop_map("nanoseconds")) | ||
dt_qyear = Map.register(_dt_prop_map("qyear")) | ||
dt_month_name = Map.register(_dt_func_map("month_name"), dtypes=np.object_) | ||
dt_day_name = Map.register(_dt_func_map("day_name"), dtypes=np.object_) | ||
dt_to_pytimedelta = Map.register(_dt_func_map("to_pytimedelta"), dtypes=np.object_) | ||
dt_total_seconds = Map.register(_dt_func_map("total_seconds"), dtypes=np.float64) | ||
dt_seconds = Map.register(_dt_prop_map("seconds"), dtypes=np.int64) | ||
dt_days = Map.register(_dt_prop_map("days"), dtypes=np.int64) | ||
dt_microseconds = Map.register(_dt_prop_map("microseconds"), dtypes=np.int64) | ||
dt_nanoseconds = Map.register(_dt_prop_map("nanoseconds"), dtypes=np.int64) | ||
dt_qyear = Map.register(_dt_prop_map("qyear"), dtypes=np.int64) | ||
dt_start_time = Map.register(_dt_prop_map("start_time")) | ||
dt_end_time = Map.register(_dt_prop_map("end_time")) | ||
dt_to_timestamp = Map.register(_dt_func_map("to_timestamp")) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this feels weird, doesn't pandas have a specific dtype for datetime-s? I thought it did
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ser.dt is available on Series with datetime64 dtypes, the ser.dt.foo methods here return the dtypes specified