From 776240b36d434ed516df1c7f4093d83620fc0d0d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 9 Sep 2018 13:42:19 -0700 Subject: [PATCH 01/25] ENH: Write timezone columns to SQL --- pandas/io/sql.py | 10 ++++------ pandas/tests/io/test_sql.py | 22 +++++++++++++++++++++- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index a582d32741ae9..2fdbff93e97c9 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -841,14 +841,12 @@ def _sqlalchemy_type(self, col): from sqlalchemy.types import (BigInteger, Integer, Float, Text, Boolean, - DateTime, Date, Time) + DateTime, Date, Time, TIMESTAMP) if col_type == 'datetime64' or col_type == 'datetime': - try: - tz = col.tzinfo # noqa - return DateTime(timezone=True) - except: - return DateTime + if col.dt.tz is not None: + return TIMESTAMP(timezone=True) + return DateTime if col_type == 'timedelta64': warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 824e5a2b23df3..21261571e63ca 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1354,9 +1354,29 @@ def check(col): df = sql.read_sql_table("types_test_data", self.conn) check(df.DateColWithTz) + @pytest.mark.skipif(flavor != 'postgresql', + reason="postgresql only supports timezones") + def test_datetime_with_timezone_writing(self): + # GH 9086 + df = DataFrame({'A': date_range( + '2013-01-01 09:00:00', periods=3, tz='US/Pacific')}) + df.to_sql('test_datetime_tz', self.conn) + + # with read_table -> type information from schema used + result = sql.read_sql_table('test_datetime_tz', self.conn) + result = result.drop('index', axis=1) + tm.assert_frame_equal(result, df) + + # with read_sql -> no type information -> sqlite has no native + result = sql.read_sql_query( + 'SELECT * FROM test_datetime_tz', self.conn + ) + result = result.drop('index', axis=1) + tm.assert_frame_equal(result, df) + + def test_date_parsing(self): # No Parsing - df = sql.read_sql_table("types_test_data", self.conn) df = sql.read_sql_table("types_test_data", self.conn, parse_dates=['DateCol']) From befd2000f1a1a3b1c9b06aa348011ef2c23bf0e4 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 9 Sep 2018 19:01:13 -0700 Subject: [PATCH 02/25] add tests and change type to Timestamp --- pandas/io/sql.py | 12 ++++++++---- pandas/tests/io/test_sql.py | 6 ++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 2fdbff93e97c9..1ca5b68fea2ec 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -741,8 +741,9 @@ def _get_column_names_and_types(self, dtype_mapper): def _create_table_setup(self): from sqlalchemy import Table, Column, PrimaryKeyConstraint - column_names_and_types = \ - self._get_column_names_and_types(self._sqlalchemy_type) + column_names_and_types = self._get_column_names_and_types( + self._sqlalchemy_type + ) columns = [Column(name, typ, index=is_index) for name, typ, is_index in column_names_and_types] @@ -844,6 +845,8 @@ def _sqlalchemy_type(self, col): DateTime, Date, Time, TIMESTAMP) if col_type == 'datetime64' or col_type == 'datetime': + # GH 9086: TIMESTAMP is the suggested type if the column contains + # timezone information if col.dt.tz is not None: return TIMESTAMP(timezone=True) return DateTime @@ -1273,8 +1276,9 @@ def _create_table_setup(self): structure of a DataFrame. The first entry will be a CREATE TABLE statement while the rest will be CREATE INDEX statements. """ - column_names_and_types = \ - self._get_column_names_and_types(self._sql_type_name) + column_names_and_types = self._get_column_names_and_types( + self._sql_type_name + ) pat = re.compile(r'\s+') column_names = [col_name for col_name, _, _ in column_names_and_types] diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 21261571e63ca..8eab4646f65cc 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -954,7 +954,8 @@ def test_sqlalchemy_type_mapping(self): utc=True)}) db = sql.SQLDatabase(self.conn) table = sql.SQLTable("test_type", db, frame=df) - assert isinstance(table.table.c['time'].type, sqltypes.DateTime) + # GH 9086: TIMESTAMP is the suggested type for datetimes with timezones + assert isinstance(table.table.c['time'].type, sqltypes.TIMESTAMP) def test_database_uri_string(self): @@ -1359,7 +1360,8 @@ def check(col): def test_datetime_with_timezone_writing(self): # GH 9086 df = DataFrame({'A': date_range( - '2013-01-01 09:00:00', periods=3, tz='US/Pacific')}) + '2013-01-01 09:00:00', periods=3, tz='US/Pacific' + )}) df.to_sql('test_datetime_tz', self.conn) # with read_table -> type information from schema used From e9f122fca9018f8289cdf63279ed1b807c112e97 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 10 Sep 2018 10:38:39 -0700 Subject: [PATCH 03/25] Lint error and comment our skipif --- pandas/tests/io/test_sql.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 8eab4646f65cc..7eddf425786fc 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1355,8 +1355,8 @@ def check(col): df = sql.read_sql_table("types_test_data", self.conn) check(df.DateColWithTz) - @pytest.mark.skipif(flavor != 'postgresql', - reason="postgresql only supports timezones") + # @pytest.mark.skipif(flavor != 'postgresql', + # reason="postgresql only supports timezones") def test_datetime_with_timezone_writing(self): # GH 9086 df = DataFrame({'A': date_range( @@ -1376,7 +1376,6 @@ def test_datetime_with_timezone_writing(self): result = result.drop('index', axis=1) tm.assert_frame_equal(result, df) - def test_date_parsing(self): # No Parsing From 969d2dacd509b0e9868f63bcbac27559bce10689 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 10 Sep 2018 13:38:13 -0700 Subject: [PATCH 04/25] Handle DatetimeTZ block --- pandas/io/sql.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 1ca5b68fea2ec..cb2d82bddf9e7 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -597,7 +597,11 @@ def insert_data(self): if b.is_datetime: # convert to microsecond resolution so this yields # datetime.datetime - d = b.values.astype('M8[us]').astype(object) + if b.is_datetimetz: + # GH 9086: Ensure we return datetimes with timezone info + d = b.values.to_pydatetime() + else: + d = b.values.astype('M8[us]').astype(object) else: d = np.array(b.get_values(), dtype=object) From cc79b907a46d474a7ec8848922bda2c43b92604a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 10 Sep 2018 21:32:05 -0700 Subject: [PATCH 05/25] Ensure the datetimetz data is 2D first --- pandas/io/sql.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index cb2d82bddf9e7..4af0730cbd3a0 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -592,15 +592,16 @@ def insert_data(self): data_list = [None] * ncols blocks = temp._data.blocks - for i in range(len(blocks)): - b = blocks[i] + for b in blocks: if b.is_datetime: - # convert to microsecond resolution so this yields - # datetime.datetime + # return datetime.datetime objects if b.is_datetimetz: # GH 9086: Ensure we return datetimes with timezone info + # Need to return 2-D data; DatetimeIndex is 1D d = b.values.to_pydatetime() + d = np.expand_dims(d, axis=0) else: + # convert to microsecond resolution for datetime.datetime d = b.values.astype('M8[us]').astype(object) else: d = np.array(b.get_values(), dtype=object) From 6e86d582140dbcf82c842be719fa221860b9dd5a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 11 Sep 2018 13:57:19 -0700 Subject: [PATCH 06/25] Reading timezones returns timezones in UTC --- pandas/tests/io/test_sql.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 7eddf425786fc..1477c34029d6b 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1355,26 +1355,30 @@ def check(col): df = sql.read_sql_table("types_test_data", self.conn) check(df.DateColWithTz) - # @pytest.mark.skipif(flavor != 'postgresql', - # reason="postgresql only supports timezones") def test_datetime_with_timezone_writing(self): # GH 9086 + if self.flavor != 'postgresql': + msg = "{} does not support datetime with time zone" + pytest.skip(msg.format(self.flavor)) + df = DataFrame({'A': date_range( '2013-01-01 09:00:00', periods=3, tz='US/Pacific' )}) df.to_sql('test_datetime_tz', self.conn) + expected = df.copy() + expected['A'] = expected['A'].dt.tz_convert('UTC') # with read_table -> type information from schema used result = sql.read_sql_table('test_datetime_tz', self.conn) result = result.drop('index', axis=1) - tm.assert_frame_equal(result, df) + tm.assert_frame_equal(result, expected) # with read_sql -> no type information -> sqlite has no native result = sql.read_sql_query( 'SELECT * FROM test_datetime_tz', self.conn ) result = result.drop('index', axis=1) - tm.assert_frame_equal(result, df) + tm.assert_frame_equal(result, expected) def test_date_parsing(self): # No Parsing From c7c4a7a28ca1e807c3acddc6d90ba15fa4b4fd26 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 11 Sep 2018 19:10:03 -0700 Subject: [PATCH 07/25] Add whatsnew and some touchups --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/io/sql.py | 2 +- pandas/tests/io/test_sql.py | 3 +-- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3660c1e843f6c..32fc09164de0a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -179,6 +179,7 @@ Other Enhancements - :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`) - :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`). The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`). +- :func:`to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` columns (:issue:`9086`) - :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 4af0730cbd3a0..a6708324053d0 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -617,7 +617,7 @@ def insert_data(self): return column_names, data_list def _execute_insert(self, conn, keys, data_iter): - data = [{k: v for k, v in zip(keys, row)} for row in data_iter] + data = [dict(zip(keys, row)) for row in data_iter] conn.execute(self.insert_statement(), data) def insert(self, chunksize=None): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 1477c34029d6b..b7033f8ff8f9f 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1368,12 +1368,11 @@ def test_datetime_with_timezone_writing(self): expected = df.copy() expected['A'] = expected['A'].dt.tz_convert('UTC') - # with read_table -> type information from schema used + result = sql.read_sql_table('test_datetime_tz', self.conn) result = result.drop('index', axis=1) tm.assert_frame_equal(result, expected) - # with read_sql -> no type information -> sqlite has no native result = sql.read_sql_query( 'SELECT * FROM test_datetime_tz', self.conn ) From 513bbc8cedabc72a2ba3eaabdf0d4e5b290522ca Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 13 Sep 2018 22:08:06 -0700 Subject: [PATCH 08/25] Test other dbs --- pandas/tests/io/test_sql.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a09e226db6c80..f16d08464f35f 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1363,19 +1363,20 @@ def check(col): df = sql.read_sql_table("types_test_data", self.conn) check(df.DateColWithTz) - def test_datetime_with_timezone_writing(self): + def test_datetime_with_timezone_roundtrip(self): # GH 9086 - if self.flavor != 'postgresql': - msg = "{} does not support datetime with time zone" - pytest.skip(msg.format(self.flavor)) - - df = DataFrame({'A': date_range( + # Write datetimetz data to a db and read it back + # For dbs that support timestamps with timezones, should get back UTC + # otherwise naive data should be returned + expected = DataFrame({'A': date_range( '2013-01-01 09:00:00', periods=3, tz='US/Pacific' )}) - df.to_sql('test_datetime_tz', self.conn) + expected.to_sql('test_datetime_tz', self.conn) - expected = df.copy() - expected['A'] = expected['A'].dt.tz_convert('UTC') + if self.flavor == 'postgresql': + expected['A'] = expected['A'].dt.tz_convert('UTC') + else: + expected['A'] = expected['A'].dt.tz_convert(None) result = sql.read_sql_table('test_datetime_tz', self.conn) result = result.drop('index', axis=1) @@ -1389,6 +1390,9 @@ def test_datetime_with_timezone_writing(self): def test_date_parsing(self): # No Parsing + df = sql.read_sql_table("types_test_data", self.conn) + expected_type = object if self.flavor == 'sqlite' else np.datetime64 + assert issubclass(df.DateCol.dtype.type, expected_type) df = sql.read_sql_table("types_test_data", self.conn, parse_dates=['DateCol']) From 58772e10fa033b64ba09c70c924c9dd85feaa390 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 14 Sep 2018 11:10:34 -0700 Subject: [PATCH 09/25] timestamps are actually returned as naive local for myself, sqlite --- pandas/tests/io/test_sql.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index f16d08464f35f..77f66ca7ed5e3 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1374,9 +1374,11 @@ def test_datetime_with_timezone_roundtrip(self): expected.to_sql('test_datetime_tz', self.conn) if self.flavor == 'postgresql': + # SQLalchemy "timezones" (i.e. offsets) are coerced to UTC expected['A'] = expected['A'].dt.tz_convert('UTC') else: - expected['A'] = expected['A'].dt.tz_convert(None) + # Otherwise, timestamps are returned as local, naive + expected['A'] = expected['A'].dt.localize(None) result = sql.read_sql_table('test_datetime_tz', self.conn) result = result.drop('index', axis=1) From 1a29148c0af9913db62a078cd61646543d0393fc Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 14 Sep 2018 15:43:42 -0700 Subject: [PATCH 10/25] localize -> tz_localize --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 77f66ca7ed5e3..04509af97b6a1 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1378,7 +1378,7 @@ def test_datetime_with_timezone_roundtrip(self): expected['A'] = expected['A'].dt.tz_convert('UTC') else: # Otherwise, timestamps are returned as local, naive - expected['A'] = expected['A'].dt.localize(None) + expected['A'] = expected['A'].dt.tz_localize(None) result = sql.read_sql_table('test_datetime_tz', self.conn) result = result.drop('index', axis=1) From 96e918879d612e606b55fafb94149e1560e2ca7a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 14 Sep 2018 17:05:21 -0700 Subject: [PATCH 11/25] sqlite doesnt support date types --- pandas/tests/io/test_sql.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 04509af97b6a1..cd44dc2e585b4 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1379,6 +1379,9 @@ def test_datetime_with_timezone_roundtrip(self): else: # Otherwise, timestamps are returned as local, naive expected['A'] = expected['A'].dt.tz_localize(None) + if self.flavor = 'sqlite': + # sqlite doesn't support date types + expected['A'] = expected['A'].astype(object) result = sql.read_sql_table('test_datetime_tz', self.conn) result = result.drop('index', axis=1) From ded5584275c3bc816b1c426e5d153dbccbaa1d5c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 14 Sep 2018 20:21:02 -0700 Subject: [PATCH 12/25] type --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index cd44dc2e585b4..cdf0d16f79895 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1379,7 +1379,7 @@ def test_datetime_with_timezone_roundtrip(self): else: # Otherwise, timestamps are returned as local, naive expected['A'] = expected['A'].dt.tz_localize(None) - if self.flavor = 'sqlite': + if self.flavor == 'sqlite': # sqlite doesn't support date types expected['A'] = expected['A'].astype(object) From a7d1b3efba75c6af05381cbae83a8c0d2276c59a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 15 Sep 2018 15:37:40 -0700 Subject: [PATCH 13/25] retest --- pandas/tests/io/test_sql.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index cdf0d16f79895..04509af97b6a1 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1379,9 +1379,6 @@ def test_datetime_with_timezone_roundtrip(self): else: # Otherwise, timestamps are returned as local, naive expected['A'] = expected['A'].dt.tz_localize(None) - if self.flavor == 'sqlite': - # sqlite doesn't support date types - expected['A'] = expected['A'].astype(object) result = sql.read_sql_table('test_datetime_tz', self.conn) result = result.drop('index', axis=1) From 305759cb7d0f037aa3f5934f310d77b56443f2a6 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 15 Sep 2018 18:08:48 -0700 Subject: [PATCH 14/25] read_table vs read_query sqlite difference --- pandas/tests/io/test_sql.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 04509af97b6a1..c2383b4fa4964 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1388,6 +1388,10 @@ def test_datetime_with_timezone_roundtrip(self): 'SELECT * FROM test_datetime_tz', self.conn ) result = result.drop('index', axis=1) + if self.flavor == 'sqlite': + # read_sql_query does not return datetime type like read_sql_table + assert isinstance(result.loc[0, 'A'], string_types) + result['A'] = to_datetime(result['A']) tm.assert_frame_equal(result, expected) def test_date_parsing(self): From 7a795317c1c4fad2b59acbbeef73215d63b4a7d3 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 18 Sep 2018 20:37:56 -0700 Subject: [PATCH 15/25] Add note in the to_sql docs --- pandas/core/generic.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cdc5b4310bce2..ec8080bc78654 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2306,6 +2306,13 @@ def to_sql(self, name, con, schema=None, if_exists='fail', index=True, -------- pandas.read_sql : read a DataFrame from a table + Notes + ----- + Timezone aware datetime columns will be written as + ``Timestamp with timezone`` type with SQLAlchemy if supported by the + database. Otherwise, the datetimes will be stored as local, naive + timestamps. + References ---------- .. [1] http://docs.sqlalchemy.org From 24823f8f008ac0aa4d36d4a04960ef7a3aa3c77e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 19 Sep 2018 09:42:20 -0700 Subject: [PATCH 16/25] Modify whatsnew --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index fd370ea6a4e4b..23fb5bc3eac6a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -179,7 +179,7 @@ Other Enhancements - :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`) - :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`). The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`). -- :func:`to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` columns (:issue:`9086`) +- :meth:`DataFrame.to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` columns (:issue:`9086`) - :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) From 978a0d39aa644408864a8a3bbc0df5adea77ae68 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 21 Sep 2018 11:53:38 -0700 Subject: [PATCH 17/25] Address review --- doc/source/io.rst | 26 +++++++++++++++++++++++++- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/generic.py | 4 ++-- pandas/tests/io/test_sql.py | 4 +--- 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index cb22bb9198e25..cb1bd7c623c49 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4777,7 +4777,7 @@ writes ``data`` to the database in batches of 1000 rows at a time: data.to_sql('data_chunked', engine, chunksize=1000) SQL data types -++++++++++++++ +'''''''''''''' :func:`~pandas.DataFrame.to_sql` will try to map your data to an appropriate SQL data type based on the dtype of the data. When you have columns of dtype @@ -4809,6 +4809,30 @@ default ``Text`` type for string columns: Because of this, reading the database table back in does **not** generate a categorical. +Datetime data types +''''''''''''''''''' + +Using SQLAlchemy, :func:`~pandas.DataFrame.to_sql` is capable of writing +datetime data that is timezone naive or timezone aware. However, the resulting +data stored in the database ultimately depends on the corresponding, +supported data type for datetime data. + +=========== ============================================= =================== +Database SQL Datetime Types Timezone Support +=========== ============================================= =================== +SQLite ``TEXT`` No +MySQL ``TIMESTAMP`` or ``DATETIME`` No +PostgreSQL ``TIMESTAMP`` or ``TIMESTAMP WITH TIME ZONE`` Yes +=========== ============================================= =================== + +When writing timezone aware data to databases that do not support timezones, +the data will be written as timezone naive timestamps that are in local time +with respect to the timezone. + +:func:`~pandas.read_sql_table` is also capable of reading datetime data that is +timezone aware of naive. When reading ``TIMESTAMP WITH TIME ZONE`` types, pandas +will convert the data to UTC. + Reading Tables '''''''''''''' diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 9322dc526a75a..d32f7a288c11e 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -183,7 +183,7 @@ Other Enhancements - :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`) - :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`). The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`). -- :meth:`DataFrame.to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` columns (:issue:`9086`) +- :meth:`DataFrame.to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` types for supported databases. See the `io reference `__ for implications (:issue:`9086`). - :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f143e55dbb16e..f7398294732f4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2316,8 +2316,8 @@ def to_sql(self, name, con, schema=None, if_exists='fail', index=True, ----- Timezone aware datetime columns will be written as ``Timestamp with timezone`` type with SQLAlchemy if supported by the - database. Otherwise, the datetimes will be stored as local, naive - timestamps. + database. Otherwise, the datetimes will stored in the closest available + type as timezone unaware timestamps local to the original timezone. References ---------- diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 281c8251943ba..aeefc5f5ebe61 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1370,7 +1370,7 @@ def test_datetime_with_timezone_roundtrip(self): expected = DataFrame({'A': date_range( '2013-01-01 09:00:00', periods=3, tz='US/Pacific' )}) - expected.to_sql('test_datetime_tz', self.conn) + expected.to_sql('test_datetime_tz', self.conn, index=False) if self.flavor == 'postgresql': # SQLalchemy "timezones" (i.e. offsets) are coerced to UTC @@ -1380,13 +1380,11 @@ def test_datetime_with_timezone_roundtrip(self): expected['A'] = expected['A'].dt.tz_localize(None) result = sql.read_sql_table('test_datetime_tz', self.conn) - result = result.drop('index', axis=1) tm.assert_frame_equal(result, expected) result = sql.read_sql_query( 'SELECT * FROM test_datetime_tz', self.conn ) - result = result.drop('index', axis=1) if self.flavor == 'sqlite': # read_sql_query does not return datetime type like read_sql_table assert isinstance(result.loc[0, 'A'], string_types) From 8025248b643c070e55d8c515df80b4e49ef5cf0e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 21 Sep 2018 12:59:21 -0700 Subject: [PATCH 18/25] Fix sqlalchemy ref --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index aeefc5f5ebe61..1a91a394f6405 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1373,7 +1373,7 @@ def test_datetime_with_timezone_roundtrip(self): expected.to_sql('test_datetime_tz', self.conn, index=False) if self.flavor == 'postgresql': - # SQLalchemy "timezones" (i.e. offsets) are coerced to UTC + # SQLAlchemy "timezones" (i.e. offsets) are coerced to UTC expected['A'] = expected['A'].dt.tz_convert('UTC') else: # Otherwise, timestamps are returned as local, naive From 0e89370dc8911e86f93b581400ba02c97372fb14 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 25 Sep 2018 22:08:35 -0700 Subject: [PATCH 19/25] clarify documentation and whatsnew --- doc/source/io.rst | 14 ++++++++++---- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/generic.py | 6 ++++-- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index cb1bd7c623c49..aba1dcdae62b5 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4777,7 +4777,7 @@ writes ``data`` to the database in batches of 1000 rows at a time: data.to_sql('data_chunked', engine, chunksize=1000) SQL data types -'''''''''''''' +++++++++++++++ :func:`~pandas.DataFrame.to_sql` will try to map your data to an appropriate SQL data type based on the dtype of the data. When you have columns of dtype @@ -4809,13 +4809,19 @@ default ``Text`` type for string columns: Because of this, reading the database table back in does **not** generate a categorical. +.. _io.sql_datetime_data: + Datetime data types ''''''''''''''''''' Using SQLAlchemy, :func:`~pandas.DataFrame.to_sql` is capable of writing datetime data that is timezone naive or timezone aware. However, the resulting -data stored in the database ultimately depends on the corresponding, -supported data type for datetime data. +data stored in the database ultimately depends on the supported data type +for datetime data of the database system being used. + +The following table lists supported data types for datetime data for some +common databases. Other database dialects may have different data types for +datetime data. =========== ============================================= =================== Database SQL Datetime Types Timezone Support @@ -4830,7 +4836,7 @@ the data will be written as timezone naive timestamps that are in local time with respect to the timezone. :func:`~pandas.read_sql_table` is also capable of reading datetime data that is -timezone aware of naive. When reading ``TIMESTAMP WITH TIME ZONE`` types, pandas +timezone aware or naive. When reading ``TIMESTAMP WITH TIME ZONE`` types, pandas will convert the data to UTC. Reading Tables diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index d32f7a288c11e..ef708beb9c4a8 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -183,7 +183,7 @@ Other Enhancements - :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`) - :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`). The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`). -- :meth:`DataFrame.to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` types for supported databases. See the `io reference `__ for implications (:issue:`9086`). +- :meth:`DataFrame.to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` types for supported databases. For databases that don't support timezones, datetime data will be stored as timezone unaware local timestamps. See the :ref:`io.sql_datetime_data` for implications (:issue:`9086`). - :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f7398294732f4..1e25840576ce3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2316,8 +2316,10 @@ def to_sql(self, name, con, schema=None, if_exists='fail', index=True, ----- Timezone aware datetime columns will be written as ``Timestamp with timezone`` type with SQLAlchemy if supported by the - database. Otherwise, the datetimes will stored in the closest available - type as timezone unaware timestamps local to the original timezone. + database. Otherwise, the datetimes will be stored as timezone unaware + timestamps local to the original timezone. + + .. versionadded:: 0.24.0 References ---------- From bab5cfbad3c41782135d6199dc56d9ef4679666a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 26 Sep 2018 17:18:14 -0700 Subject: [PATCH 20/25] Add an api breaking entry change as well --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index ef708beb9c4a8..2e2831c1facb0 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -534,6 +534,7 @@ Datetimelike API Changes - :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`) - :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`) - :meth:`DatetimeIndex.to_period` and :meth:`Timestamp.to_period` will issue a warning when timezone information will be lost (:issue:`21333`) +- :meth:`DataFrame.to_sql` now writes timezone aware datetime data (``datetime64[ns, tz]`` dtype) as timezone unaware local timestamps instead of timezone unaware UTC timestamps for database dialects that don't support the ``TIMESTAMP WITH TIME ZONE`` type. See the :ref:`io.sql_datetime_data` for implications (:issue:`9086`). .. _whatsnew_0240.api.other: From 8c754b54a4fbe954710047d13c45ab314fbdd094 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 24 Oct 2018 17:19:19 -0700 Subject: [PATCH 21/25] Add new section in whatsnew --- doc/source/whatsnew/v0.24.0.txt | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 06c0309146743..c9d212de04d27 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -424,6 +424,35 @@ that the dates have been converted to UTC pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) +.. _whatsnew_0240.api_breaking.timezone_databases: + +Writing Timezone Aware Data to Databases +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`to_sql` can now write timezone aware datetime data (``datetime64[ns, tz]`` dtype) +as ``TIMESTAMP WITH TIME ZONE`` type to databases that support that type. (:issue:`9086`) + +However, databases that do not support timezones will now store timezone aware data +as naive timestamps in local time instead of naive timestamps in UTC. + +Therefore, round-tripping timezone aware data from pandas to a database and back +to pandas has changed behavior. Given timezone aware data that has been +written to a database from pandas, the following table summarizes +the timestamps that will be returned. + ++----------+--------------------------------+-----------------------------------+ +| | Database with timezone support | Database without timezone support | ++==========+================================+===================================+ +| Before | tz-naive in UTC time | tz-naive in UTC time | ++----------+--------------------------------+-----------------------------------+ +| After | tz-aware in UTC time | tz-naive in local time | ++----------+--------------------------------+-----------------------------------+ + +Converting timezones read from ``TIMESTAMP WITH TIME ZONE`` types to UTC is +consistent with prior behavior. + +See the :ref:`io.sql_datetime_data` for more information. + .. _whatsnew_0240.api_breaking.calendarday: CalendarDay Offset @@ -874,7 +903,6 @@ Datetimelike API Changes - :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`) - :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`) - :meth:`DatetimeIndex.to_period` and :meth:`Timestamp.to_period` will issue a warning when timezone information will be lost (:issue:`21333`) -- :meth:`DataFrame.to_sql` now writes timezone aware datetime data (``datetime64[ns, tz]`` dtype) as timezone unaware local timestamps instead of timezone unaware UTC timestamps for database dialects that don't support the ``TIMESTAMP WITH TIME ZONE`` type. See the :ref:`io.sql_datetime_data` for implications (:issue:`9086`). .. _whatsnew_0240.api.other: From 5af83f7d59dbf6aa375c0bda8fba284426e1e385 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 25 Oct 2018 18:59:29 -0700 Subject: [PATCH 22/25] Fix whatsnew to reflect prior bug --- doc/source/whatsnew/v0.24.0.txt | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b3437a02868cf..6e3ae659c1820 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -433,34 +433,6 @@ that the dates have been converted to UTC pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) -.. _whatsnew_0240.api_breaking.timezone_databases: - -Writing Timezone Aware Data to Databases -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:meth:`to_sql` can now write timezone aware datetime data (``datetime64[ns, tz]`` dtype) -as ``TIMESTAMP WITH TIME ZONE`` type to databases that support that type. (:issue:`9086`) - -However, databases that do not support timezones will now store timezone aware data -as naive timestamps in local time instead of naive timestamps in UTC. - -Therefore, round-tripping timezone aware data from pandas to a database and back -to pandas has changed behavior. Given timezone aware data that has been -written to a database from pandas, the following table summarizes -the timestamps that will be returned. - -+----------+--------------------------------+-----------------------------------+ -| | Database with timezone support | Database without timezone support | -+==========+================================+===================================+ -| Before | tz-naive in UTC time | tz-naive in UTC time | -+----------+--------------------------------+-----------------------------------+ -| After | tz-aware in UTC time | tz-naive in local time | -+----------+--------------------------------+-----------------------------------+ - -Converting timezones read from ``TIMESTAMP WITH TIME ZONE`` types to UTC is -consistent with prior behavior. - -See the :ref:`io.sql_datetime_data` for more information. .. _whatsnew_0240.api_breaking.calendarday: @@ -1156,6 +1128,8 @@ MultiIndex I/O ^^^ +- Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`) + .. _whatsnew_0240.bug_fixes.nan_with_str_dtype: Proper handling of `np.NaN` in a string data-typed column with the Python engine From 1054fdb8f14c81d60ac24de4e9bf5333531db673 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 6 Nov 2018 22:23:27 -0800 Subject: [PATCH 23/25] handle case when column is datetimeindex --- pandas/io/sql.py | 9 +++++++-- pandas/tests/io/test_sql.py | 10 ++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 1f7393db12ef0..2f411a956dfb8 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -852,8 +852,13 @@ def _sqlalchemy_type(self, col): if col_type == 'datetime64' or col_type == 'datetime': # GH 9086: TIMESTAMP is the suggested type if the column contains # timezone information - if col.dt.tz is not None: - return TIMESTAMP(timezone=True) + try: + if col.dt.tz is not None: + return TIMESTAMP(timezone=True) + except AttributeError: + # The column is actually a DatetimeIndex + if col.tz is not None: + return TIMESTAMP(timezone=True) return DateTime if col_type == 'timedelta64': warnings.warn("the 'timedelta' type is not supported, and will be " diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 1a91a394f6405..e5e3600d16732 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1391,6 +1391,16 @@ def test_datetime_with_timezone_roundtrip(self): result['A'] = to_datetime(result['A']) tm.assert_frame_equal(result, expected) + def test_naive_datetimeindex_roundtrip(self): + # GH 23510 + # Ensure that a naive DatetimeIndex isn't converted to UTC + dates = date_range('2018-01-01', periods=5, freq='6H') + expected = DataFrame({'nums': range(5)}, index=dates) + expected.to_sql('foo_table', self.conn, index_label='info_date') + result = sql.read_sql_table('foo_table', self.conn, + index_col='info_date') + tm.assert_frame_equal(result, expected) + def test_date_parsing(self): # No Parsing df = sql.read_sql_table("types_test_data", self.conn) From f21c75502b0222ec33bb35d9c59aed005e1b0e13 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 6 Nov 2018 22:28:57 -0800 Subject: [PATCH 24/25] Add new whatsnew entry --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e25d9ca620673..9cd673b4ed314 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -462,7 +462,6 @@ that the dates have been converted to UTC pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) - .. _whatsnew_0240.api_breaking.calendarday: CalendarDay Offset @@ -1245,6 +1244,7 @@ I/O ^^^ - Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`) +- Bug in :meth:`to_sql` where a naive DatetimeIndex would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`) .. _whatsnew_0240.bug_fixes.nan_with_str_dtype: From ef3b20fcb1640986278aa8a774eb70e677b06e34 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 7 Nov 2018 10:23:39 -0800 Subject: [PATCH 25/25] don't check name --- pandas/tests/io/test_sql.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index e5e3600d16732..777b04bbae97d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1399,7 +1399,8 @@ def test_naive_datetimeindex_roundtrip(self): expected.to_sql('foo_table', self.conn, index_label='info_date') result = sql.read_sql_table('foo_table', self.conn, index_col='info_date') - tm.assert_frame_equal(result, expected) + # result index with gain a name from a set_index operation; expected + tm.assert_frame_equal(result, expected, check_names=False) def test_date_parsing(self): # No Parsing