Skip to content

Commit 333d3cb

Browse files
authored
ENH: Add user-agent string when constructing BigQuery and BigQuery Storage API clients. (#284)
* ENH: Add user-agent string when constructing BigQuery and BigQuery Storage API clients. Since this was a relatively new addition to the BigQuery client library, only populate the user-agent for BigQuery when recent-enough versions of google-cloud-bigquery and google-api-core are installed. * Add google-cloud-bigquery-storage to conda tests. * Skip BigQuery Storage API test when package not available. Use a smaller query to test the API. * Make the BQ Storage API test query slightly smaller. * Add fastavro to conda deps.
1 parent 1a68b40 commit 333d3cb

File tree

9 files changed

+138
-37
lines changed

9 files changed

+138
-37
lines changed

ci/requirements-3.6-0.20.1.conda

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
pydata-google-auth
2-
google-cloud-bigquery==1.9.0
3-
pytest
4-
pytest-cov
51
codecov
62
coverage
3+
fastavro
74
flake8
5+
google-cloud-bigquery==1.9.0
6+
google-cloud-bigquery-storage==0.5.0
7+
pydata-google-auth
8+
pytest
9+
pytest-cov

ci/requirements-3.7-NIGHTLY.conda

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
pydata-google-auth
2-
google-cloud-bigquery==1.10.0
2+
google-cloud-bigquery
3+
google-cloud-bigquery-storage
34
pytest
45
pytest-cov
56
codecov

ci/requirements-3.7.pip

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
pandas==0.24.0
2-
google-cloud-bigquery==1.9.0
2+
google-cloud-bigquery==1.12.0
33
pydata-google-auth==0.1.2

docs/source/changelog.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ Implementation changes
1616
- Use object dtype for ``STRING``, ``ARRAY``, and ``STRUCT`` columns when
1717
there are zero rows. (:issue:`285`)
1818

19+
Internal changes
20+
~~~~~~~~~~~~~~~~
21+
22+
- Populate ``user-agent`` with ``pandas`` version information. (:issue:`281`)
23+
1924
.. _changelog-0.10.0:
2025

2126
0.10.0 / 2019-04-05

pandas_gbq/gbq.py

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,18 @@
88
try:
99
# The BigQuery Storage API client is an optional dependency. It is only
1010
# required when use_bqstorage_api=True.
11-
from google.cloud import bigquery_storage_v1beta1
11+
from google.cloud import bigquery_storage
1212
except ImportError: # pragma: NO COVER
13-
bigquery_storage_v1beta1 = None
13+
bigquery_storage = None
1414

1515
from pandas_gbq.exceptions import AccessDenied
1616
import pandas_gbq.schema
1717

1818
logger = logging.getLogger(__name__)
1919

2020
BIGQUERY_INSTALLED_VERSION = None
21+
BIGQUERY_CLIENT_INFO_VERSION = "1.12.0"
22+
HAS_CLIENT_INFO = False
2123
SHOW_VERBOSE_DEPRECATION = False
2224
SHOW_PRIVATE_KEY_DEPRECATION = False
2325
PRIVATE_KEY_DEPRECATION_MESSAGE = (
@@ -34,7 +36,7 @@
3436

3537

3638
def _check_google_client_version():
37-
global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION
39+
global BIGQUERY_INSTALLED_VERSION, HAS_CLIENT_INFO, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION
3840

3941
try:
4042
import pkg_resources
@@ -44,10 +46,17 @@ def _check_google_client_version():
4446

4547
# https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/bigquery/CHANGELOG.md
4648
bigquery_minimum_version = pkg_resources.parse_version("1.9.0")
49+
bigquery_client_info_version = pkg_resources.parse_version(
50+
BIGQUERY_CLIENT_INFO_VERSION
51+
)
4752
BIGQUERY_INSTALLED_VERSION = pkg_resources.get_distribution(
4853
"google-cloud-bigquery"
4954
).parsed_version
5055

56+
HAS_CLIENT_INFO = (
57+
BIGQUERY_INSTALLED_VERSION >= bigquery_client_info_version
58+
)
59+
5160
if BIGQUERY_INSTALLED_VERSION < bigquery_minimum_version:
5261
raise ImportError(
5362
"pandas-gbq requires google-cloud-bigquery >= {0}, "
@@ -392,6 +401,29 @@ def sizeof_fmt(num, suffix="B"):
392401

393402
def get_client(self):
394403
from google.cloud import bigquery
404+
import pandas
405+
406+
try:
407+
# This module was added in google-api-core 1.11.0.
408+
# We don't have a hard requirement on that version, so only
409+
# populate the client_info if available.
410+
import google.api_core.client_info
411+
412+
client_info = google.api_core.client_info.ClientInfo(
413+
user_agent="pandas-{}".format(pandas.__version__)
414+
)
415+
except ImportError:
416+
client_info = None
417+
418+
# In addition to new enough version of google-api-core, a new enough
419+
# version of google-cloud-bigquery is required to populate the
420+
# client_info.
421+
if HAS_CLIENT_INFO:
422+
return bigquery.Client(
423+
project=self.project_id,
424+
credentials=self.credentials,
425+
client_info=client_info,
426+
)
395427

396428
return bigquery.Client(
397429
project=self.project_id, credentials=self.credentials
@@ -751,14 +783,20 @@ def _make_bqstorage_client(use_bqstorage_api, credentials):
751783
if not use_bqstorage_api:
752784
return None
753785

754-
if bigquery_storage_v1beta1 is None:
786+
if bigquery_storage is None:
755787
raise ImportError(
756-
"Install the google-cloud-bigquery-storage and fastavro packages "
757-
"to use the BigQuery Storage API."
788+
"Install the google-cloud-bigquery-storage and fastavro/pyarrow "
789+
"packages to use the BigQuery Storage API."
758790
)
759791

760-
return bigquery_storage_v1beta1.BigQueryStorageClient(
761-
credentials=credentials
792+
import google.api_core.gapic_v1.client_info
793+
import pandas
794+
795+
client_info = google.api_core.gapic_v1.client_info.ClientInfo(
796+
user_agent="pandas-{}".format(pandas.__version__)
797+
)
798+
return bigquery_storage.BigQueryStorageClient(
799+
credentials=credentials, client_info=client_info
762800
)
763801

764802

tests/system/test_gbq.py

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,16 @@ def random_dataset_id(bigquery_client):
7070
pass # Not all tests actually create a dataset
7171

7272

73+
@pytest.fixture()
74+
def random_dataset(bigquery_client, random_dataset_id):
75+
from google.cloud import bigquery
76+
77+
dataset_ref = bigquery_client.dataset(random_dataset_id)
78+
dataset = bigquery.Dataset(dataset_ref)
79+
bigquery_client.create_dataset(dataset)
80+
return dataset
81+
82+
7383
@pytest.fixture()
7484
def tokyo_dataset(bigquery_client, random_dataset_id):
7585
from google.cloud import bigquery
@@ -894,31 +904,41 @@ def test_tokyo(self, tokyo_dataset, tokyo_table, project_id):
894904
assert df["max_year"][0] >= 2000
895905

896906

897-
@pytest.mark.skip(reason="large query for BQ Storage API tests")
898-
def test_read_gbq_w_bqstorage_api(credentials):
907+
@pytest.mark.slow(reason="Large query for BQ Storage API tests.")
908+
def test_read_gbq_w_bqstorage_api(credentials, random_dataset):
909+
pytest.importorskip("google.cloud.bigquery_storage")
899910
df = gbq.read_gbq(
900911
"""
901912
SELECT
902-
dependency_name,
903-
dependency_platform,
904-
project_name,
905-
project_id,
906-
version_number,
907-
version_id,
908-
dependency_kind,
909-
optional_dependency,
910-
dependency_requirements,
911-
dependency_project_id
912-
FROM
913-
`bigquery-public-data.libraries_io.dependencies`
914-
WHERE
915-
LOWER(dependency_platform) = 'npm'
916-
LIMIT 2500000
913+
total_amount,
914+
passenger_count,
915+
trip_distance
916+
FROM `bigquery-public-data.new_york_taxi_trips.tlc_green_trips_2014`
917+
-- Select non-null rows for no-copy conversion from Arrow to pandas.
918+
WHERE total_amount IS NOT NULL
919+
AND passenger_count IS NOT NULL
920+
AND trip_distance IS NOT NULL
921+
LIMIT 10000000
917922
""",
918923
use_bqstorage_api=True,
919924
credentials=credentials,
925+
configuration={
926+
"query": {
927+
"destinationTable": {
928+
"projectId": random_dataset.project,
929+
"datasetId": random_dataset.dataset_id,
930+
"tableId": "".join(
931+
[
932+
"test_read_gbq_w_bqstorage_api_",
933+
str(uuid.uuid4()).replace("-", "_"),
934+
]
935+
),
936+
},
937+
"writeDisposition": "WRITE_TRUNCATE",
938+
}
939+
},
920940
)
921-
assert len(df) == 2500000
941+
assert len(df) == 10000000
922942

923943

924944
class TestToGBQIntegration(object):

tests/unit/conftest.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@ def reset_context():
1515

1616
@pytest.fixture(autouse=True)
1717
def mock_bigquery_client(monkeypatch):
18-
from pandas_gbq import gbq
1918
from google.api_core.exceptions import NotFound
2019
import google.cloud.bigquery
2120
import google.cloud.bigquery.table
2221

2322
mock_client = mock.create_autospec(google.cloud.bigquery.Client)
23+
# Constructor returns the mock itself, so this mock can be treated as the
24+
# constructor or the instance.
25+
mock_client.return_value = mock_client
2426
mock_schema = [google.cloud.bigquery.SchemaField("_f0", "INTEGER")]
2527
# Mock out SELECT 1 query results.
2628
mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob)
@@ -34,5 +36,6 @@ def mock_bigquery_client(monkeypatch):
3436
mock_client.query.return_value = mock_query
3537
# Mock table creation.
3638
mock_client.get_table.side_effect = NotFound("nope")
37-
monkeypatch.setattr(gbq.GbqConnector, "get_client", lambda _: mock_client)
39+
monkeypatch.setattr(google.cloud.bigquery, "Client", mock_client)
40+
mock_client.reset_mock()
3841
return mock_client

tests/unit/test_auth.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,6 @@ def mock_default_credentials(scopes=None, request=None):
7272
)
7373

7474
monkeypatch.setattr(google.auth, "default", mock_default_credentials)
75-
mock_client = mock.create_autospec(google.cloud.bigquery.Client)
76-
monkeypatch.setattr(google.cloud.bigquery, "Client", mock_client)
7775

7876
credentials, project = auth.get_credentials()
7977
assert project == "default-project"

tests/unit/test_gbq.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from unittest import mock
66

77
import numpy
8+
import pandas
89
from pandas import DataFrame
910
import pandas.util.testing as tm
1011
import pkg_resources
@@ -22,6 +23,10 @@
2223
).parsed_version
2324

2425

26+
def _make_connector(project_id="some-project", **kwargs):
27+
return gbq.GbqConnector(project_id, **kwargs)
28+
29+
2530
@pytest.fixture
2631
def min_bq_version():
2732
import pkg_resources
@@ -99,7 +104,7 @@ def no_auth(monkeypatch):
99104
("DATETIME", "datetime64[ns]"),
100105
],
101106
)
102-
def test_should_return_bigquery_correctly_typed(type_, expected):
107+
def test__bqschema_to_nullsafe_dtypes(type_, expected):
103108
result = gbq._bqschema_to_nullsafe_dtypes(
104109
[dict(name="x", type=type_, mode="NULLABLE")]
105110
)
@@ -109,6 +114,35 @@ def test_should_return_bigquery_correctly_typed(type_, expected):
109114
assert result == {"x": expected}
110115

111116

117+
def test_GbqConnector_get_client_w_old_bq(monkeypatch, mock_bigquery_client):
118+
gbq._test_google_api_imports()
119+
connector = _make_connector()
120+
monkeypatch.setattr(gbq, "HAS_CLIENT_INFO", False)
121+
122+
connector.get_client()
123+
124+
# No client_info argument.
125+
mock_bigquery_client.assert_called_with(
126+
credentials=mock.ANY, project=mock.ANY
127+
)
128+
129+
130+
def test_GbqConnector_get_client_w_new_bq(mock_bigquery_client):
131+
gbq._test_google_api_imports()
132+
pytest.importorskip(
133+
"google.cloud.bigquery", minversion=gbq.BIGQUERY_CLIENT_INFO_VERSION
134+
)
135+
pytest.importorskip("google.api_core.client_info")
136+
137+
connector = _make_connector()
138+
connector.get_client()
139+
140+
_, kwargs = mock_bigquery_client.call_args
141+
assert kwargs["client_info"].user_agent == "pandas-{}".format(
142+
pandas.__version__
143+
)
144+
145+
112146
def test_to_gbq_should_fail_if_invalid_table_name_passed():
113147
with pytest.raises(gbq.NotFoundException):
114148
gbq.to_gbq(DataFrame([[1]]), "invalid_table_name", project_id="1234")

0 commit comments

Comments
 (0)