Skip to content

Commit 9065368

Browse files
authored
feat(pandas-gbq): add Ruff format session and implement mypy (#17041)
This PR adds a Ruff-based `format` session to `pandas-gbq` and implements the `mypy` session based on the `gapic-generator` template. It also adds `type ignores` and descriptive comments for untyped imports in source files, and applies formatting fixes.
1 parent fbcebd8 commit 9065368

20 files changed

Lines changed: 141 additions & 76 deletions

packages/pandas-gbq/noxfile.py

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,19 @@
1818

1919
from __future__ import absolute_import
2020

21-
from functools import wraps
2221
import os
2322
import pathlib
2423
import re
2524
import shutil
2625
import time
2726
import warnings
27+
from functools import wraps
2828

2929
import nox
3030

3131
BLACK_VERSION = "black==23.7.0"
3232
ISORT_VERSION = "isort==5.10.1"
33+
RUFF_VERSION = "ruff==0.14.14"
3334
LINT_PATHS = ["docs", "pandas_gbq", "tests", "noxfile.py", "setup.py"]
3435

3536
DEFAULT_PYTHON_VERSION = "3.14"
@@ -147,19 +148,29 @@ def blacken(session):
147148
@_calculate_duration
148149
def format(session):
149150
"""
150-
Run isort to sort imports. Then run black
151-
to format code to uniform standard.
151+
Run ruff to sort imports and format code.
152152
"""
153-
session.install(BLACK_VERSION, ISORT_VERSION)
154-
# Use the --fss option to sort imports using strict alphabetical order.
155-
# See https://pycqa.github.io/isort/docs/configuration/options.html#force-sort-within-sections
153+
# 1. Install ruff (skipped automatically if you run with --no-venv)
154+
session.install(RUFF_VERSION)
155+
156+
# 2. Run Ruff to fix imports
156157
session.run(
157-
"isort",
158-
"--fss",
158+
"ruff",
159+
"check",
160+
"--select",
161+
"I",
162+
"--fix",
163+
f"--target-version=py{UNIT_TEST_PYTHON_VERSIONS[0].replace('.', '')}",
164+
"--line-length=88",
159165
*LINT_PATHS,
160166
)
167+
168+
# 3. Run Ruff to format code
161169
session.run(
162-
"black",
170+
"ruff",
171+
"format",
172+
f"--target-version=py{UNIT_TEST_PYTHON_VERSIONS[0].replace('.', '')}",
173+
"--line-length=88",
163174
*LINT_PATHS,
164175
)
165176

@@ -519,11 +530,24 @@ def docfx(session):
519530

520531

521532
@nox.session(python=DEFAULT_PYTHON_VERSION)
533+
@_calculate_duration
522534
def mypy(session):
523535
"""Run the type checker."""
524-
# TODO(https://github.com/googleapis/google-cloud-python/issues/16014):
525-
# Add mypy tests
526-
session.skip("mypy tests are not yet supported")
536+
session.install(
537+
"mypy<1.16.0",
538+
"types-requests",
539+
"types-protobuf",
540+
"pandas-stubs",
541+
"types-tqdm",
542+
"types-psutil",
543+
)
544+
session.install(".")
545+
session.run(
546+
"mypy",
547+
"pandas_gbq",
548+
"--check-untyped-defs",
549+
*session.posargs,
550+
)
527551

528552

529553
@nox.session(python=DEFAULT_PYTHON_VERSION)

packages/pandas-gbq/pandas_gbq/auth.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ def get_credentials(
2323
client_id=None,
2424
client_secret=None,
2525
):
26-
import pydata_google_auth
26+
# pydata-google-auth does not have type hints nor stubs that mypy uses for type checking.
27+
# Remove this comment and the ignore pragma upon completing:
28+
# https://github.com/googleapis/google-cloud-python/issues/17045
29+
import pydata_google_auth # type: ignore[import-untyped]
2730

2831
if private_key:
2932
raise NotImplementedError(
@@ -48,7 +51,10 @@ def get_credentials(
4851

4952

5053
def get_credentials_cache(reauth):
51-
import pydata_google_auth.cache
54+
# pydata-google-auth does not have type hints nor stubs that mypy uses for type checking.
55+
# Remove this comment and the ignore pragma upon completing:
56+
# https://github.com/googleapis/google-cloud-python/issues/17045
57+
import pydata_google_auth.cache # type: ignore[import-untyped]
5258

5359
if reauth:
5460
return pydata_google_auth.cache.WriteOnlyCredentialsCache(

packages/pandas-gbq/pandas_gbq/core/pandas.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import itertools
66

77
import pandas
8+
import typing
89

910

1011
def list_columns_and_indexes(dataframe, index=True):
@@ -22,7 +23,9 @@ def list_columns_and_indexes(dataframe, index=True):
2223
if isinstance(dataframe.index, pandas.MultiIndex):
2324
for name in dataframe.index.names:
2425
if name and name not in column_names:
25-
values = dataframe.index.get_level_values(name)
26+
values = dataframe.index.get_level_values(
27+
typing.cast(typing.Union[str, int], name)
28+
)
2629
columns_and_indexes.append((name, values.dtype))
2730
else:
2831
if dataframe.index.name and dataframe.index.name not in column_names:

packages/pandas-gbq/pandas_gbq/core/read.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
from __future__ import annotations
66

77
import typing
8-
from typing import Any, Dict, Optional, Sequence
98
import warnings
9+
from typing import Any, Dict, Optional, Sequence
1010

1111
import google.cloud.bigquery
1212
import google.cloud.bigquery.table
@@ -34,7 +34,10 @@ def _bqschema_to_nullsafe_dtypes(schema_fields):
3434
See: http://pandas.pydata.org/pandas-docs/dev/missing_data.html
3535
#missing-data-casting-rules-and-indexing
3636
"""
37-
import db_dtypes
37+
# db-dtypes does not have type hints nor stubs that mypy uses for type checking.
38+
# Remove this comment and the ignore pragma upon completing:
39+
# https://github.com/googleapis/google-cloud-python/issues/17045
40+
import db_dtypes # type: ignore[import-untyped]
3841

3942
# If you update this mapping, also update the table at
4043
# `docs/reading.rst`.
@@ -79,7 +82,10 @@ def _finalize_dtypes(
7982
1970. See:
8083
https://github.com/googleapis/python-bigquery-pandas/issues/365
8184
"""
82-
import db_dtypes
85+
# db-dtypes does not have type hints nor stubs that mypy uses for type checking.
86+
# Remove this comment and the ignore pragma upon completing:
87+
# https://github.com/googleapis/google-cloud-python/issues/17045
88+
import db_dtypes # type: ignore[import-untyped]
8389
import pandas.api.types
8490

8591
# If you update this mapping, also update the table at

packages/pandas-gbq/pandas_gbq/dry_runs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
import copy
88
from typing import Any, List
99

10-
from google.cloud import bigquery
1110
import pandas
11+
from google.cloud import bigquery
1212

1313

1414
def get_query_stats(

packages/pandas-gbq/pandas_gbq/gbq.py

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,32 +3,36 @@
33
# license that can be found in the LICENSE file.
44

55
import copy
6-
from datetime import datetime
76
import logging
87
import re
98
import typing
109
import warnings
10+
from datetime import datetime
1111

1212
import pandas
1313

14-
from pandas_gbq.contexts import Context # noqa - backward compatible export
15-
from pandas_gbq.contexts import context
16-
from pandas_gbq.exceptions import ( # noqa - backward compatible export
14+
import pandas_gbq.schema
15+
import pandas_gbq.schema.pandas_to_bigquery
16+
from pandas_gbq.contexts import ( # noqa: F401
17+
Context, # noqa: F401 - imported solely to support a backwards compatible export
18+
context,
19+
)
20+
from pandas_gbq.exceptions import ( # noqa: F401 - imported solely to support a backwards compatible export
1721
DatasetCreationError,
1822
GenericGBQException,
1923
InvalidColumnOrder,
2024
InvalidIndexColumn,
25+
InvalidPageToken, # noqa: F401 - imported solely to support a backwards compatible export
26+
InvalidSchema, # noqa: F401 - imported solely to support a backwards compatible export
2127
NotFoundException,
28+
QueryTimeout, # noqa: F401 - imported solely to support a backwards compatible export
2229
TableCreationError,
2330
)
24-
from pandas_gbq.exceptions import InvalidPageToken # noqa - backward compatible export
25-
from pandas_gbq.exceptions import InvalidSchema # noqa - backward compatible export
26-
from pandas_gbq.exceptions import QueryTimeout # noqa - backward compatible export
2731
from pandas_gbq.features import FEATURES
28-
from pandas_gbq.gbq_connector import GbqConnector # noqa - backward compatible export
29-
from pandas_gbq.gbq_connector import _get_client # noqa - backward compatible export
30-
import pandas_gbq.schema
31-
import pandas_gbq.schema.pandas_to_bigquery
32+
from pandas_gbq.gbq_connector import ( # noqa: F401
33+
GbqConnector, # noqa: F401 - imported solely to support a backwards compatible export
34+
_get_client, # noqa: F401 - imported solely to support a backwards compatible export
35+
)
3236

3337
logger = logging.getLogger(__name__)
3438

@@ -40,17 +44,29 @@ def _test_google_api_imports():
4044
raise ImportError("pandas-gbq requires db-dtypes") from ex
4145

4246
try:
43-
import db_dtypes # noqa
47+
# db-dtypes does not have type hints nor stubs that mypy uses for type checking.
48+
# This import is solely to test if the package is installed, so we ignore the "unused import" warning.
49+
# Remove this comment and the ignore pragma upon completing:
50+
# https://github.com/googleapis/google-cloud-python/issues/17045
51+
import db_dtypes # type: ignore[import-untyped] # noqa: F401
4452
except ImportError as ex: # pragma: NO COVER
4553
raise ImportError("pandas-gbq requires db-dtypes") from ex
4654

4755
try:
48-
import pydata_google_auth # noqa
56+
# pydata-google-auth does not have type hints nor stubs that mypy uses for type checking.
57+
# This import is solely to test if the package is installed, so we ignore the "unused import" warning.
58+
# Remove this comment and the ignore pragma upon completing:
59+
# https://github.com/googleapis/google-cloud-python/issues/17045
60+
import pydata_google_auth # type: ignore[import-untyped] # noqa: F401
4961
except ImportError as ex: # pragma: NO COVER
5062
raise ImportError("pandas-gbq requires pydata-google-auth") from ex
5163

5264
try:
53-
from google_auth_oauthlib.flow import InstalledAppFlow # noqa
65+
# google-auth-oauthlib does not have type hints nor stubs that mypy uses for type checking.
66+
# This import is solely to test if the package is installed, so we ignore the "unused import" warning.
67+
# Remove this comment and the ignore pragma upon completing:
68+
# https://github.com/googleapis/google-cloud-python/issues/17045
69+
from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore[import-untyped] # noqa: F401
5470
except ImportError as ex: # pragma: NO COVER
5571
raise ImportError("pandas-gbq requires google-auth-oauthlib") from ex
5672

@@ -686,7 +702,7 @@ def generate_bq_schema(df, default_type="STRING"):
686702
"""
687703
# deprecation TimeSeries, #11121
688704
warnings.warn(
689-
"generate_bq_schema is deprecated and will be removed in " "a future version",
705+
"generate_bq_schema is deprecated and will be removed in a future version",
690706
FutureWarning,
691707
stacklevel=2,
692708
)
@@ -927,9 +943,7 @@ def create(self, dataset_id):
927943
from google.cloud.bigquery import Dataset
928944

929945
if self.exists(dataset_id):
930-
raise DatasetCreationError(
931-
"Dataset {0} already " "exists".format(dataset_id)
932-
)
946+
raise DatasetCreationError("Dataset {0} already exists".format(dataset_id))
933947

934948
dataset = Dataset(self._dataset_ref(dataset_id))
935949

packages/pandas-gbq/pandas_gbq/gbq_connector.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,30 @@
77
import logging
88
import time
99
import typing
10-
from typing import Any, Dict, Optional, Union
1110
import warnings
11+
from typing import Any, Dict, Optional, Union
1212

1313
# Only import at module-level at type checking time to avoid circular
1414
# dependencies in the pandas package, which has an optional dependency on
1515
# pandas-gbq.
1616
if typing.TYPE_CHECKING: # pragma: NO COVER
1717
import pandas
1818

19-
from pandas_gbq import dry_runs
2019
import pandas_gbq.constants
21-
from pandas_gbq.contexts import context
2220
import pandas_gbq.core.read
2321
import pandas_gbq.environment as environment
2422
import pandas_gbq.exceptions
23+
import pandas_gbq.query
24+
from pandas_gbq import dry_runs
25+
from pandas_gbq.contexts import context
2526
from pandas_gbq.exceptions import QueryTimeout
2627
from pandas_gbq.features import FEATURES
27-
import pandas_gbq.query
2828

29+
tqdm: Any = None
2930
try:
3031
import tqdm # noqa
3132
except ImportError:
32-
tqdm = None
33+
pass
3334

3435
logger = logging.getLogger(__name__)
3536

@@ -204,6 +205,8 @@ def run_query(
204205
].get("timeoutMs")
205206

206207
if timeout_ms:
208+
if not isinstance(timeout_ms, (str, int, float)):
209+
raise TypeError(f"Expected str, int or float, got {type(timeout_ms)}")
207210
timeout_ms = int(timeout_ms)
208211
# Having too small a timeout_ms results in individual
209212
# API calls timing out before they can finish.
@@ -220,6 +223,7 @@ def run_query(
220223

221224
self._start_timer()
222225
job_config = bigquery.QueryJobConfig.from_api_repr(job_config_dict)
226+
job_config = typing.cast(bigquery.QueryJobConfig, job_config)
223227
job_config.dry_run = dry_run
224228

225229
if FEATURES.bigquery_has_query_and_wait:

packages/pandas-gbq/pandas_gbq/load/core.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,18 @@
88
import io
99
from typing import Any, Callable, Dict, List, Optional
1010

11-
import db_dtypes
12-
from google.cloud import bigquery
11+
# db-dtypes does not have type hints nor stubs that mypy uses for type checking.
12+
# Remove this comment and the ignore pragma upon completing:
13+
# https://github.com/googleapis/google-cloud-python/issues/17045
14+
import db_dtypes # type: ignore[import-untyped]
1315
import pandas
1416
import pyarrow.lib
17+
from google.cloud import bigquery
1518

16-
from pandas_gbq import exceptions
1719
import pandas_gbq.schema
1820
import pandas_gbq.schema.bigquery
1921
import pandas_gbq.schema.pandas_to_bigquery
22+
from pandas_gbq import exceptions
2023

2124

2225
def encode_chunk(dataframe):
@@ -38,8 +41,8 @@ def encode_chunk(dataframe):
3841
# Convert to a BytesIO buffer so that unicode text is properly handled.
3942
# See: https://github.com/pydata/pandas-gbq/issues/106
4043
body = csv_buffer.getvalue()
41-
body = body.encode("utf-8")
42-
return io.BytesIO(body)
44+
body_bytes = body.encode("utf-8")
45+
return io.BytesIO(body_bytes)
4346

4447

4548
def split_dataframe(dataframe, chunksize=None):
@@ -68,7 +71,7 @@ def cast_dataframe_for_parquet(
6871
See: https://github.com/googleapis/python-bigquery-pandas/issues/421
6972
"""
7073

71-
columns = schema.get("fields", [])
74+
columns = schema.get("fields", []) if schema is not None else []
7275

7376
# Protect against an explicit None in the dictionary.
7477
columns = columns if columns is not None else []
@@ -130,7 +133,7 @@ def cast_dataframe_for_csv(
130133
) -> pandas.DataFrame:
131134
"""Cast columns to needed dtype when writing CSV files."""
132135

133-
columns = schema.get("fields", [])
136+
columns = schema.get("fields", []) if schema is not None else []
134137

135138
# Protect against an explicit None in the dictionary.
136139
columns = columns if columns is not None else []
@@ -280,7 +283,9 @@ def load_chunk(chunk, job_config):
280283
finally:
281284
chunk_buffer.close()
282285

283-
return load_csv(dataframe, write_disposition, chunksize, bq_schema, load_chunk)
286+
return load_csv(
287+
dataframe, write_disposition, chunksize, list(bq_schema), load_chunk
288+
)
284289

285290

286291
def load_chunks(

0 commit comments

Comments
 (0)