diff --git a/doc/source/install.rst b/doc/source/install.rst index 89f7b580303f5..9a62c6a89457a 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -286,7 +286,9 @@ Optional Dependencies `xsel `__, or `xclip `__: necessary to use :func:`~pandas.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation. -* `pandas-gbq `__: for Google BigQuery I/O. +* `pandas-gbq + `__: + for Google BigQuery I/O. (pandas-gbq >= 0.8.0) * `Backports.lzma `__: Only for Python 2, for writing to and/or reading from an xz compressed DataFrame in CSV; Python 3 support is built into the standard library. diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 44c467795d1ed..fce1b2c2c5d4b 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -218,9 +218,12 @@ Other Enhancements - :meth:`Series.droplevel` and :meth:`DataFrame.droplevel` are now implemented (:issue:`20342`) - Added support for reading from/writing to Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`, :issue:`23094`) - :func:`to_gbq` and :func:`read_gbq` signature and documentation updated to - reflect changes from the `Pandas-GBQ library version 0.6.0 - `__. - (:issue:`21627`, :issue:`22557`) + reflect changes from the `Pandas-GBQ library version 0.8.0 + `__. + Adds a ``credentials`` argument, which enables the use of any kind of + `google-auth credentials + `__. (:issue:`21627`, + :issue:`22557`, :issue:`23662`) - New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`) - :func:`read_html` copies cell data across ``colspan`` and ``rowspan``, and it treats all-``th`` table rows as headers if ``header`` kwarg is not given and there is no ``thead`` (:issue:`17054`) - :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep`` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`) @@ -271,17 +274,19 @@ If installed, we now require: +-----------------+-----------------+----------+ | bottleneck | 1.2.0 | | +-----------------+-----------------+----------+ +| fastparquet | 0.1.2 | | ++-----------------+-----------------+----------+ | matplotlib | 2.0.0 | | +-----------------+-----------------+----------+ | numexpr | 2.6.1 | | +-----------------+-----------------+----------+ -| pytables | 3.4.2 | | -+-----------------+-----------------+----------+ -| scipy | 0.18.1 | | +| pandas-gbq | 0.8.0 | | +-----------------+-----------------+----------+ | pyarrow | 0.7.0 | | +-----------------+-----------------+----------+ -| fastparquet | 0.1.2 | | +| pytables | 3.4.2 | | ++-----------------+-----------------+----------+ +| scipy | 0.18.1 | | +-----------------+-----------------+----------+ Additionally we no longer depend on `feather-format` for feather based storage diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 511604517a84e..a54598166a608 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1241,9 +1241,9 @@ def to_dict(self, orient='dict', into=dict): raise ValueError("orient '{o}' not understood".format(o=orient)) def to_gbq(self, destination_table, project_id=None, chunksize=None, - reauth=False, if_exists='fail', private_key=None, - auth_local_webserver=False, table_schema=None, location=None, - progress_bar=True, verbose=None): + reauth=False, if_exists='fail', auth_local_webserver=False, + table_schema=None, location=None, progress_bar=True, + credentials=None, verbose=None, private_key=None): """ Write a DataFrame to a Google BigQuery table. @@ -1311,10 +1311,31 @@ def to_gbq(self, destination_table, project_id=None, chunksize=None, chunk by chunk. *New in version 0.5.0 of pandas-gbq*. + credentials : google.auth.credentials.Credentials, optional + Credentials for accessing Google APIs. Use this parameter to + override default credentials, such as to use Compute Engine + :class:`google.auth.compute_engine.Credentials` or Service + Account :class:`google.oauth2.service_account.Credentials` + directly. + + *New in version 0.8.0 of pandas-gbq*. + + .. versionadded:: 0.24.0 verbose : bool, deprecated - Deprecated in Pandas-GBQ 0.4.0. Use the `logging module + Deprecated in pandas-gbq version 0.4.0. Use the `logging module to adjust verbosity instead `__. + private_key : str, deprecated + Deprecated in pandas-gbq version 0.8.0. Use the ``credentials`` + parameter and + :func:`google.oauth2.service_account.Credentials.from_service_account_info` + or + :func:`google.oauth2.service_account.Credentials.from_service_account_file` + instead. + + Service account private key in JSON format. Can be file path + or string contents. This is useful for remote server + authentication (eg. Jupyter/IPython notebook on remote host). See Also -------- @@ -1324,11 +1345,11 @@ def to_gbq(self, destination_table, project_id=None, chunksize=None, from pandas.io import gbq return gbq.to_gbq( self, destination_table, project_id=project_id, - chunksize=chunksize, reauth=reauth, - if_exists=if_exists, private_key=private_key, + chunksize=chunksize, reauth=reauth, if_exists=if_exists, auth_local_webserver=auth_local_webserver, table_schema=table_schema, location=location, - progress_bar=progress_bar, verbose=verbose) + progress_bar=progress_bar, credentials=credentials, + verbose=verbose, private_key=private_key) @classmethod def from_records(cls, data, index=None, exclude=None, columns=None, diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 46e1b13631f07..4d5b2fda7cd10 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -24,9 +24,9 @@ def _try_import(): def read_gbq(query, project_id=None, index_col=None, col_order=None, - reauth=False, private_key=None, auth_local_webserver=False, - dialect=None, location=None, configuration=None, - verbose=None): + reauth=False, auth_local_webserver=False, dialect=None, + location=None, configuration=None, credentials=None, + private_key=None, verbose=None): """ Load data from Google BigQuery. @@ -98,10 +98,30 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, For more information see `BigQuery REST API Reference `__. + credentials : google.auth.credentials.Credentials, optional + Credentials for accessing Google APIs. Use this parameter to override + default credentials, such as to use Compute Engine + :class:`google.auth.compute_engine.Credentials` or Service Account + :class:`google.oauth2.service_account.Credentials` directly. + + *New in version 0.8.0 of pandas-gbq*. + + .. versionadded:: 0.24.0 verbose : None, deprecated - Deprecated in Pandas-GBQ 0.4.0. Use the `logging module - to adjust verbosity instead + Deprecated in pandas-gbq version 0.4.0. Use the `logging module to + adjust verbosity instead `__. + private_key : str, deprecated + Deprecated in pandas-gbq version 0.8.0. Use the ``credentials`` + parameter and + :func:`google.oauth2.service_account.Credentials.from_service_account_info` + or + :func:`google.oauth2.service_account.Credentials.from_service_account_file` + instead. + + Service account private key in JSON format. Can be file path + or string contents. This is useful for remote server + authentication (eg. Jupyter/IPython notebook on remote host). Returns ------- @@ -127,20 +147,20 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, return pandas_gbq.read_gbq( query, project_id=project_id, index_col=index_col, - col_order=col_order, reauth=reauth, verbose=verbose, - private_key=private_key, auth_local_webserver=auth_local_webserver, - dialect=dialect, location=location, configuration=configuration) + col_order=col_order, reauth=reauth, + auth_local_webserver=auth_local_webserver, dialect=dialect, + location=location, configuration=configuration, + credentials=credentials, verbose=verbose, private_key=private_key) def to_gbq(dataframe, destination_table, project_id=None, chunksize=None, - verbose=None, reauth=False, if_exists='fail', private_key=None, - auth_local_webserver=False, table_schema=None, location=None, - progress_bar=True): + reauth=False, if_exists='fail', auth_local_webserver=False, + table_schema=None, location=None, progress_bar=True, + credentials=None, verbose=None, private_key=None): pandas_gbq = _try_import() return pandas_gbq.to_gbq( dataframe, destination_table, project_id=project_id, - chunksize=chunksize, verbose=verbose, reauth=reauth, - if_exists=if_exists, private_key=private_key, - auth_local_webserver=auth_local_webserver, - table_schema=table_schema, location=location, - progress_bar=progress_bar) + chunksize=chunksize, reauth=reauth, if_exists=if_exists, + auth_local_webserver=auth_local_webserver, table_schema=table_schema, + location=location, progress_bar=progress_bar, + credentials=credentials, verbose=verbose, private_key=private_key) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 345af7c8f056a..6dd16107bc7d7 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -19,7 +19,7 @@ api_exceptions = pytest.importorskip("google.api_core.exceptions") bigquery = pytest.importorskip("google.cloud.bigquery") service_account = pytest.importorskip("google.oauth2.service_account") -pandas_gbq = pytest.importorskip('pandas_gbq') +pandas_gbq = pytest.importorskip("pandas_gbq") PROJECT_ID = None PRIVATE_KEY_JSON_PATH = None @@ -70,15 +70,16 @@ def _get_private_key_path(): return private_key_path -def _get_client(): - project_id = _get_project_id() - credentials = None - +def _get_credentials(): private_key_path = _get_private_key_path() if private_key_path: - credentials = service_account.Credentials.from_service_account_file( + return service_account.Credentials.from_service_account_file( private_key_path) + +def _get_client(): + project_id = _get_project_id() + credentials = _get_credentials() return bigquery.Client(project=project_id, credentials=credentials) @@ -144,11 +145,11 @@ def test_roundtrip(self): df = make_mixed_dataframe_v2(test_size) df.to_gbq(destination_table, _get_project_id(), chunksize=None, - private_key=_get_private_key_path()) + credentials=_get_credentials()) result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" .format(destination_table), project_id=_get_project_id(), - private_key=_get_private_key_path(), + credentials=_get_credentials(), dialect="standard") assert result['num_rows'][0] == test_size