From fdc81b31e2a619b065982451944d76dd9dd09d99 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 17 Jan 2020 13:08:28 +0000 Subject: [PATCH] CLN: Add additional typing information --- pandas/io/stata.py | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 43af5eab92d74d..3743726971b874 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -167,7 +167,7 @@ stata_epoch = datetime.datetime(1960, 1, 1) -def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series: +def _stata_elapsed_date_to_datetime_vec(dates, fmt) -> Series: """ Convert from SIF to datetime. http://www.stata.com/help.cgi?datetime @@ -219,7 +219,7 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series: MIN_MS_DELTA = MIN_DAY_DELTA * 24 * 3600 * 1000 MAX_MS_DELTA = MAX_DAY_DELTA * 24 * 3600 * 1000 - def convert_year_month_safe(year: Series, month: Series) -> Series: + def convert_year_month_safe(year, month) -> Series: """ Convert year and month to datetimes, using pandas vectorized versions when the date range falls within the range supported by pandas. @@ -234,7 +234,7 @@ def convert_year_month_safe(year: Series, month: Series) -> Series: [datetime.datetime(y, m, 1) for y, m in zip(year, month)], index=index ) - def convert_year_days_safe(year: Series, days: Series) -> Series: + def convert_year_days_safe(year, days) -> Series: """ Converts year (e.g. 1999) and days since the start of the year to a datetime or datetime64 Series @@ -249,9 +249,7 @@ def convert_year_days_safe(year: Series, days: Series) -> Series: ] return Series(value, index=index) - def convert_delta_safe( - base: datetime.datetime, deltas: Series, unit: str - ) -> Series: + def convert_delta_safe(base, deltas, unit) -> Series: """ Convert base dates and deltas to datetimes, using pandas vectorized versions if the deltas satisfy restrictions required to be expressed @@ -298,21 +296,21 @@ def convert_delta_safe( # Delta days relative to base elif fmt.startswith(("%td", "td", "%d", "d")): base = stata_epoch - days: Series = dates + days = dates conv_dates = convert_delta_safe(base, days, "d") # does not count leap days - 7 days is a week. # 52nd week may have more than 7 days elif fmt.startswith(("%tw", "tw")): - year: Series = stata_epoch.year + dates // 52 + year = stata_epoch.year + dates // 52 days = (dates % 52) * 7 conv_dates = convert_year_days_safe(year, days) elif fmt.startswith(("%tm", "tm")): # Delta months relative to base year = stata_epoch.year + dates // 12 - month: Series = (dates % 12) + 1 + month = (dates % 12) + 1 conv_dates = convert_year_month_safe(year, month) elif fmt.startswith(("%tq", "tq")): # Delta quarters relative to base year = stata_epoch.year + dates // 4 - quarter_month: Series = (dates % 4) * 3 + 1 + quarter_month = (dates % 4) * 3 + 1 conv_dates = convert_year_month_safe(year, quarter_month) elif fmt.startswith(("%th", "th")): # Delta half-years relative to base year = stata_epoch.year + dates // 2 @@ -347,19 +345,19 @@ def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series: NS_PER_DAY = 24 * 3600 * 1000 * 1000 * 1000 US_PER_DAY = NS_PER_DAY / 1000 - def parse_dates_safe( - dates: Series, delta: bool = False, year: bool = False, days: bool = False - ): - d: Dict[str, Any] = {} + def parse_dates_safe(dates, delta=False, year=False, days=False): + d = {} if is_datetime64_dtype(dates.values): if delta: - time_delta: Series = dates - stata_epoch + time_delta = dates - stata_epoch d["delta"] = time_delta.values.astype(np.int64) // 1000 # microseconds if days or year: + # ignore since mypy reports that DatetimeIndex has no year/month date_index = DatetimeIndex(dates) - d["year"], d["month"] = date_index.year, date_index.month + d["year"] = date_index.year # type: ignore + d["month"] = date_index.month # type: ignore if days: - days_in_ns: Series = dates.astype(np.int64) - to_datetime( + days_in_ns = dates.astype(np.int64) - to_datetime( d["year"], format="%Y" ).astype(np.int64) d["days"] = days_in_ns // NS_PER_DAY @@ -588,10 +586,10 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"): categories = catarray.cat.categories self.value_labels = list(zip(np.arange(len(categories)), categories)) self.value_labels.sort(key=lambda x: x[0]) - self.text_len = np.int32(0) - self.off = [] - self.val = [] - self.txt = [] + self.text_len = 0 + self.off: List[int] = [] + self.val: List[int] = [] + self.txt: List[bytes] = [] self.n = 0 # Compute lengths and setup lists of offsets and labels @@ -2131,7 +2129,7 @@ def _prepare_categoricals(self, data: DataFrame) -> DataFrame: is_cat = [is_categorical_dtype(data[col]) for col in data] self._is_col_cat = is_cat - self._value_labels = [] + self._value_labels: List[StataValueLabel] = [] if not any(is_cat): return data @@ -2290,8 +2288,8 @@ def _check_column_names(self, data: DataFrame) -> DataFrame: return data def _set_formats_and_types(self, dtypes: Series) -> None: - self.typlist = [] - self.fmtlist = [] + self.fmtlist: List[str] = [] + self.typlist: List[int] = [] for col, dtype in dtypes.items(): self.fmtlist.append(_dtype_to_default_stata_fmt(dtype, self.data[col])) self.typlist.append(_dtype_to_stata_type(dtype, self.data[col]))