Skip to content

Commit e0a038d

Browse files
dakltswast
authored andcommitted
ENH: show progress bar when downloading data (#292)
* add progress bar * update changelog * default to use the progress bar
1 parent 351ac7b commit e0a038d

File tree

3 files changed

+58
-5
lines changed

3 files changed

+58
-5
lines changed

docs/source/changelog.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ Changelog
1111
``max_results`` to 0 to ignore query outputs, such as for DML or DDL
1212
queries. (:issue:`102`)
1313

14+
- Add ``progress_bar_type`` argument to :func:`~pandas_gbq.read_gbq()`. Use this
15+
argument to display a progress bar when downloading data. (:issue:`182`)
16+
1417
Documentation
1518
~~~~~~~~~~~~~
1619

pandas_gbq/gbq.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,9 @@ def process_http_error(ex):
436436

437437
raise GenericGBQException("Reason: {0}".format(ex))
438438

439-
def run_query(self, query, max_results=None, **kwargs):
439+
def run_query(
440+
self, query, max_results=None, progress_bar_type=None, **kwargs
441+
):
440442
from concurrent.futures import TimeoutError
441443
from google.auth.exceptions import RefreshError
442444
from google.cloud import bigquery
@@ -526,9 +528,15 @@ def run_query(self, query, max_results=None, **kwargs):
526528
)
527529
)
528530

529-
return self._download_results(query_reply, max_results=max_results)
531+
return self._download_results(
532+
query_reply,
533+
max_results=max_results,
534+
progress_bar_type=progress_bar_type,
535+
)
530536

531-
def _download_results(self, query_job, max_results=None):
537+
def _download_results(
538+
self, query_job, max_results=None, progress_bar_type=None
539+
):
532540
# No results are desired, so don't bother downloading anything.
533541
if max_results == 0:
534542
return None
@@ -552,7 +560,9 @@ def _download_results(self, query_job, max_results=None):
552560
schema_fields = [field.to_api_repr() for field in rows_iter.schema]
553561
nullsafe_dtypes = _bqschema_to_nullsafe_dtypes(schema_fields)
554562
df = rows_iter.to_dataframe(
555-
dtypes=nullsafe_dtypes, bqstorage_client=bqstorage_client
563+
dtypes=nullsafe_dtypes,
564+
bqstorage_client=bqstorage_client,
565+
progress_bar_type=progress_bar_type,
556566
)
557567

558568
if df.empty:
@@ -833,6 +843,7 @@ def read_gbq(
833843
max_results=None,
834844
verbose=None,
835845
private_key=None,
846+
progress_bar_type="tqdm",
836847
):
837848
r"""Load data from Google BigQuery using google-cloud-python
838849
@@ -952,6 +963,23 @@ def read_gbq(
952963
or string contents. This is useful for remote server
953964
authentication (eg. Jupyter/IPython notebook on remote host).
954965
966+
progress_bar_type (Optional[str]):
967+
If set, use the `tqdm <https://tqdm.github.io/>`_ library to
968+
display a progress bar while the data downloads. Install the
969+
``tqdm`` package to use this feature.
970+
Possible values of ``progress_bar_type`` include:
971+
``None``
972+
No progress bar.
973+
``'tqdm'``
974+
Use the :func:`tqdm.tqdm` function to print a progress bar
975+
to :data:`sys.stderr`.
976+
``'tqdm_notebook'``
977+
Use the :func:`tqdm.tqdm_notebook` function to display a
978+
progress bar as a Jupyter notebook widget.
979+
``'tqdm_gui'``
980+
Use the :func:`tqdm.tqdm_gui` function to display a
981+
progress bar as a graphical dialog box.
982+
955983
Returns
956984
-------
957985
df: DataFrame
@@ -996,7 +1024,10 @@ def read_gbq(
9961024
)
9971025

9981026
final_df = connector.run_query(
999-
query, configuration=configuration, max_results=max_results
1027+
query,
1028+
configuration=configuration,
1029+
max_results=max_results,
1030+
progress_bar_type=progress_bar_type,
10001031
)
10011032

10021033
# Reindex the DataFrame on the provided column

tests/unit/test_gbq.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,3 +552,22 @@ def test_load_does_not_modify_schema_arg(mock_bigquery_client):
552552
if_exists="append",
553553
)
554554
assert original_schema == original_schema_cp
555+
556+
557+
def test_read_gbq_calls_tqdm(
558+
mock_bigquery_client, mock_service_account_credentials
559+
):
560+
mock_service_account_credentials.project_id = "service_account_project_id"
561+
df = gbq.read_gbq(
562+
"SELECT 1",
563+
dialect="standard",
564+
credentials=mock_service_account_credentials,
565+
progress_bar_type="foobar",
566+
)
567+
assert df is not None
568+
569+
mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100)
570+
571+
mock_list_rows.to_dataframe.assert_called_once_with(
572+
dtypes=mock.ANY, bqstorage_client=mock.ANY, progress_bar_type="foobar"
573+
)

0 commit comments

Comments
 (0)