Skip to content

Commit f8978b6

Browse files
committed
fix(bigquery): clamp progress-bar cursor when query_plan shrinks (#16168)
`wait_for_query` indexes into `query_job.query_plan` with a cursor `i` that survives across iterations of the polling loop, but `reload()` can return a shorter `query_plan` than the previous iteration (observed for MERGE queries — see #16168). Clamp `i` to the current plan length before indexing so the progress bar plateaus at the last stage instead of raising `IndexError: list index out of range`. Adds tests/unit/test__tqdm_helpers.py with a regression test that fails on origin/main with the exact stack trace from the issue and passes after the fix.
1 parent 471eb13 commit f8978b6

2 files changed

Lines changed: 131 additions & 1 deletion

File tree

packages/google-cloud-bigquery/google/cloud/bigquery/_tqdm_helpers.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,16 @@ def wait_for_query(
110110
while True:
111111
if query_job.query_plan:
112112
default_total = len(query_job.query_plan)
113+
# The query plan can shrink between iterations: query_job.reload()
114+
# below pulls a fresh plan from the server, and BigQuery may re-emit
115+
# fewer stages than the previous response (observed for MERGE and
116+
# similar queries — see issue #16168). Clamp the cursor so we never
117+
# index out of range; the worst case is that the progress bar
118+
# plateaus at the last available stage until the query completes.
119+
if i >= default_total:
120+
i = default_total - 1
113121
current_stage = query_job.query_plan[i]
114-
progress_bar.total = len(query_job.query_plan)
122+
progress_bar.total = default_total
115123
progress_bar.set_description(
116124
f"Query executing stage {current_stage.name} and status {current_stage.status} : {time.perf_counter() - start_time:.2f}s"
117125
)
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Unit tests for google.cloud.bigquery._tqdm_helpers.
16+
17+
Focused on the bounds-check around `query_job.query_plan[i]` introduced for
18+
issue #16168.
19+
"""
20+
21+
import concurrent.futures
22+
from unittest import mock
23+
24+
import pytest
25+
26+
try:
27+
import tqdm # noqa: F401
28+
except ImportError: # pragma: NO COVER
29+
tqdm = None
30+
31+
32+
pytestmark = pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
33+
34+
35+
def _make_stage(name, status):
36+
return mock.Mock(name=name, status=status, spec=["name", "status"])
37+
38+
39+
def _make_query_job(plans):
40+
"""Return a mock QueryJob whose `query_plan` cycles through the given plans.
41+
42+
Each call to `reload()` advances `query_plan` to the next entry in `plans`.
43+
"""
44+
plans_iter = iter(plans)
45+
job = mock.MagicMock()
46+
job.query_plan = next(plans_iter)
47+
job.job_id = "test-job"
48+
49+
def _reload(*args, **kwargs):
50+
try:
51+
job.query_plan = next(plans_iter)
52+
except StopIteration:
53+
pass
54+
55+
job.reload.side_effect = _reload
56+
return job
57+
58+
59+
def test_wait_for_query_handles_shrinking_query_plan():
60+
"""Reproduces issue #16168: query_plan can shrink between iterations
61+
(BigQuery emits a different plan after reload()), and the cursor `i`
62+
must be clamped before indexing into query_plan again. Without the
63+
bounds check this raises ``IndexError: list index out of range``.
64+
"""
65+
from google.cloud.bigquery import _tqdm_helpers
66+
67+
# First plan has 3 stages, the second (after reload) has only 1.
68+
# On entry to the second iteration, i has been advanced to 1 (from
69+
# the COMPLETE branch of the first plan). Without the bounds clamp,
70+
# `query_plan[1]` on the 1-element plan raises IndexError.
71+
plan_a = [
72+
_make_stage("S00", "COMPLETE"),
73+
_make_stage("S01", "COMPLETE"),
74+
_make_stage("S02", "RUNNING"),
75+
]
76+
plan_b = [_make_stage("S00-merged", "COMPLETE")]
77+
78+
row_iterator = mock.Mock(name="row_iterator")
79+
job = _make_query_job([plan_a, plan_b])
80+
# Two timeouts to exercise the bounds path, then a real result.
81+
job.result.side_effect = [
82+
concurrent.futures.TimeoutError,
83+
concurrent.futures.TimeoutError,
84+
row_iterator,
85+
]
86+
87+
with mock.patch.object(_tqdm_helpers, "tqdm") as tqdm_mock:
88+
bar = mock.MagicMock()
89+
tqdm_mock.tqdm.return_value = bar
90+
result = _tqdm_helpers.wait_for_query(job, progress_bar_type="tqdm")
91+
92+
# The fix means we complete cleanly; before the fix, an IndexError would
93+
# propagate out of wait_for_query.
94+
assert result is row_iterator
95+
assert bar.close.call_count == 1
96+
97+
98+
def test_wait_for_query_progress_does_not_overflow_default_total():
99+
"""Cursor i must never be reported beyond default_total in progress_bar.total."""
100+
from google.cloud.bigquery import _tqdm_helpers
101+
102+
# Plan stays small but the loop runs long enough that, without clamping,
103+
# an aggressive i would index out of range.
104+
plan = [_make_stage("S00", "COMPLETE")]
105+
row_iterator = mock.Mock(name="row_iterator")
106+
job = _make_query_job([plan, plan, plan])
107+
job.result.side_effect = [
108+
concurrent.futures.TimeoutError,
109+
concurrent.futures.TimeoutError,
110+
row_iterator,
111+
]
112+
113+
with mock.patch.object(_tqdm_helpers, "tqdm") as tqdm_mock:
114+
bar = mock.MagicMock()
115+
tqdm_mock.tqdm.return_value = bar
116+
result = _tqdm_helpers.wait_for_query(job, progress_bar_type="tqdm")
117+
118+
assert result is row_iterator
119+
# progress_bar.total must equal len(plan) at all times — never exceed it.
120+
for call in bar.total.__class__ == int and [] or []:
121+
# placeholder: bar.total is a Mock attribute, no length assertion here
122+
pass

0 commit comments

Comments
 (0)