diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index d836ef3441e89..ec5b496278184 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -239,6 +239,7 @@ Other Enhancements - :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`) - :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`) - :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object. +- :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`) .. _whatsnew_0240.api_breaking: diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 66e996075f1ed..215391bbf217a 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -2558,6 +2558,8 @@ def generate_table(self): for o, (idx, row) in enumerate(selected.iterrows()): for j, (col, v) in enumerate(col_index): val = row[col] + # Allow columns with mixed str and None (GH 23633) + val = '' if val is None else val key = gso_table.get(val, None) if key is None: # Stata prefers human numbers diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index c6b6f6cab9ddd..47293e8765d26 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1505,3 +1505,20 @@ def test_unicode_dta_118(self): expected = pd.DataFrame(values, columns=columns) tm.assert_frame_equal(unicode_df, expected) + + def test_mixed_string_strl(self): + # GH 23633 + output = [ + {'mixed': 'string' * 500, + 'number': 0}, + {'mixed': None, + 'number': 1} + ] + + output = pd.DataFrame(output) + with tm.ensure_clean() as path: + output.to_stata(path, write_index=False, version=117) + reread = read_stata(path) + expected = output.fillna('') + expected.number = expected.number.astype('int32') + tm.assert_frame_equal(reread, expected)