diff --git a/doc/source/io.rst b/doc/source/io.rst index 0b9a610b50d7d..93f5c5bea53b4 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4711,6 +4711,12 @@ writes ``data`` to the database in batches of 1000 rows at a time: data.to_sql('data_chunked', engine, chunksize=1000) +.. note:: + + The function :func:`~pandas.DataFrame.to_sql` will perform a multivalue + insert if the engine dialect ``supports_multivalues_insert``. This will + greatly speed up the insert in some cases. + SQL data types ++++++++++++++ diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index feca90aae6237..233816600ec0f 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -341,6 +341,8 @@ Other Enhancements - :func:`DataFrame.replace` now supports the ``method`` parameter, which can be used to specify the replacement method when ``to_replace`` is a scalar, list or tuple and ``value`` is ``None`` (:issue:`19632`) - :meth:`Timestamp.month_name`, :meth:`DatetimeIndex.month_name`, and :meth:`Series.dt.month_name` are now available (:issue:`12805`) - :meth:`Timestamp.day_name` and :meth:`DatetimeIndex.day_name` are now available to return day names with a specified locale (:issue:`12806`) +- :meth:`DataFrame.to_sql` now performs a multivalue insert if the underlying connection supports itk rather than inserting row by row. + ``SQLAlchemy`` dialects supporting multivalue inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`) .. _whatsnew_0230.api_breaking: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index a582d32741ae9..ccb8d2d99d734 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -572,8 +572,29 @@ def create(self): else: self._execute_create() - def insert_statement(self): - return self.table.insert() + def insert_statement(self, data, conn): + """ + Generate tuple of SQLAlchemy insert statement and any arguments + to be executed by connection (via `_execute_insert`). + + Parameters + ---------- + conn : SQLAlchemy connectable(engine/connection) + Connection to recieve the data + data : list of dict + The data to be inserted + + Returns + ------- + SQLAlchemy statement + insert statement + *, optional + Additional parameters to be passed when executing insert statement + """ + dialect = getattr(conn, 'dialect', None) + if dialect and getattr(dialect, 'supports_multivalues_insert', False): + return self.table.insert(data), + return self.table.insert(), data def insert_data(self): if self.index is not None: @@ -612,8 +633,9 @@ def insert_data(self): return column_names, data_list def _execute_insert(self, conn, keys, data_iter): + """Insert data into this table with database connection""" data = [{k: v for k, v in zip(keys, row)} for row in data_iter] - conn.execute(self.insert_statement(), data) + conn.execute(*self.insert_statement(data, conn)) def insert(self, chunksize=None): keys, data_list = self.insert_data() diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index f3ab74d37a2bc..4530cc9d2fba9 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1665,6 +1665,29 @@ class Temporary(Base): tm.assert_frame_equal(df, expected) + def test_insert_multivalues(self): + # issues addressed + # https://github.com/pandas-dev/pandas/issues/14315 + # https://github.com/pandas-dev/pandas/issues/8953 + + db = sql.SQLDatabase(self.conn) + df = DataFrame({'A': [1, 0, 0], 'B': [1.1, 0.2, 4.3]}) + table = sql.SQLTable("test_table", db, frame=df) + data = [ + {'A': 1, 'B': 0.46}, + {'A': 0, 'B': -2.06} + ] + statement = table.insert_statement(data, conn=self.conn)[0] + + if self.supports_multivalues_insert: + assert statement.parameters == data, ( + 'insert statement should be multivalues' + ) + else: + assert statement.parameters is None, ( + 'insert statement should not be multivalues' + ) + class _TestSQLAlchemyConn(_EngineToConnMixin, _TestSQLAlchemy): @@ -1679,6 +1702,7 @@ class _TestSQLiteAlchemy(object): """ flavor = 'sqlite' + supports_multivalues_insert = True @classmethod def connect(cls): @@ -1727,6 +1751,7 @@ class _TestMySQLAlchemy(object): """ flavor = 'mysql' + supports_multivalues_insert = True @classmethod def connect(cls): @@ -1796,6 +1821,7 @@ class _TestPostgreSQLAlchemy(object): """ flavor = 'postgresql' + supports_multivalues_insert = True @classmethod def connect(cls):