Skip to content

Commit ae6dc4b

Browse files
authored
fix(test): stabilize bgprocess polling in backup/restore/IE/maintenance tests (#9958)
The shared polling helpers in: - web/pgadmin/tools/backup/tests/test_backup_utils.py - web/pgadmin/tools/import_export/tests/test_import_export_utils.py - web/pgadmin/tools/maintenance/tests/test_create_maintenance_job.py - web/pgadmin/tools/restore/tests/test_create_restore_job.py all share the same race that surfaced on macos-latest / pg16 in PR #9955's CI run: - Wait budget was 2.5s (5 iterations x 0.5s; maintenance used 5s). - The break condition was `execution_time' in the_process`, but `execution_time` is the elapsed time of a *running* bgprocess -- it is set before the wrapped pg_dump / pg_restore / psql / COPY actually finishes. The completion signal is `exit_code` becoming non-None. - So the helper could return control while the wrapped command was still running, and the next assertion -- e.g. `assert_equal(the_process['exit_code'] in [0, 1], True)` -- would fire on `None in [0, 1]`, i.e. `False != True`. Some scenarios masked the bug by listing `None` in their `expected_exit_code` set (a tell that someone noticed the polling was unreliable and worked around it by widening accepted exit codes). Scenarios that didn't include `None` were the ones that flaked. Fix all four helpers identically: - Poll for up to 60 iterations x 0.5s = 30s, generous enough for the slowest CI runner. - Break only when `the_process.get('exit_code') is not None`, the actual completion signal. - Narrow `except Exception` to `except StopIteration`, which is the only thing `next(...)` here can raise. No call-site changes needed; the helper contract (returns once the job is done; raises if the bgprocess never finished) is unchanged in spirit and strictly more reliable in practice. Verified: - pycodestyle on the four files: 0 violations. This fixes the failure observed in the macos-latest / pg16 leg of PR #9955's CI run (run 26154521710, job 76930277702), which was unrelated to that PR's lockfile-only changes.
1 parent 25f0d85 commit ae6dc4b

4 files changed

Lines changed: 28 additions & 28 deletions

File tree

web/pgadmin/tools/backup/tests/test_backup_utils.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,12 @@ def create_backup_job(tester, url, params, assert_equal):
2525

2626
def run_backup_job(tester, job_id, expected_params, assert_in, assert_not_in,
2727
assert_equal):
28-
cnt = 0
2928
the_process = None
30-
while True:
31-
if cnt >= 5:
32-
break
29+
# Wait up to 30s for the background pg_dump to actually finish.
30+
# The completion signal is `exit_code` becoming non-None — NOT the
31+
# mere presence of `execution_time`, which is set while the process
32+
# is still running.
33+
for _ in range(60):
3334
# Check the process list
3435
response1 = tester.get('/misc/bgprocess/?_={0}'.format(
3536
secrets.choice(range(1, 9999999))))
@@ -39,13 +40,12 @@ def run_backup_job(tester, job_id, expected_params, assert_in, assert_not_in,
3940
try:
4041
the_process = next(
4142
p for p in process_list if p['id'] == job_id)
42-
except Exception:
43+
except StopIteration:
4344
the_process = None
4445

45-
if the_process and 'execution_time' in the_process:
46+
if the_process and the_process.get('exit_code') is not None:
4647
break
4748
time.sleep(0.5)
48-
cnt += 1
4949

5050
assert_equal('execution_time' in the_process, True)
5151
assert_equal('stime' in the_process, True)

web/pgadmin/tools/import_export/tests/test_import_export_utils.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,12 @@ def create_import_export_job(tester, url, params, assert_equal):
3838

3939
def run_import_export_job(tester, job_id, expected_params, assert_in,
4040
assert_not_in, assert_equal):
41-
cnt = 0
4241
the_process = None
43-
while True:
44-
if cnt >= 5:
45-
break
42+
# Wait up to 30s for the background psql/COPY job to actually finish.
43+
# The completion signal is `exit_code` becoming non-None — NOT the
44+
# mere presence of `execution_time`, which is set while the process
45+
# is still running.
46+
for _ in range(60):
4647
# Check the process list
4748
response1 = tester.get('/misc/bgprocess/?_={0}'.format(
4849
secrets.choice(range(1, 9999999))))
@@ -52,13 +53,12 @@ def run_import_export_job(tester, job_id, expected_params, assert_in,
5253
try:
5354
the_process = next(
5455
p for p in process_list if p['id'] == job_id)
55-
except Exception:
56+
except StopIteration:
5657
the_process = None
5758

58-
if the_process and 'execution_time' in the_process:
59+
if the_process and the_process.get('exit_code') is not None:
5960
break
6061
time.sleep(0.5)
61-
cnt += 1
6262

6363
assert_equal('execution_time' in the_process, True)
6464
assert_equal('stime' in the_process, True)

web/pgadmin/tools/maintenance/tests/test_create_maintenance_job.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,12 @@ def runTest(self):
7373
response_data = json.loads(response.data.decode('utf-8'))
7474
job_id = response_data['data']['job_id']
7575

76-
cnt = 0
7776
the_process = None
78-
while True:
79-
if cnt >= 10:
80-
break
77+
# Wait up to 30s for the background maintenance command to
78+
# actually finish. The completion signal is `exit_code` becoming
79+
# non-None — NOT the mere presence of `execution_time`, which is
80+
# set while the process is still running.
81+
for _ in range(60):
8182
# Check the process list
8283
response1 = self.tester.get('/misc/bgprocess/?_={0}'.format(
8384
secrets.choice(range(1, 9999999))))
@@ -87,13 +88,12 @@ def runTest(self):
8788
try:
8889
the_process = next(
8990
p for p in process_list if p['id'] == job_id)
90-
except Exception:
91+
except StopIteration:
9192
the_process = None
9293

93-
if the_process and 'execution_time' in the_process:
94+
if the_process and the_process.get('exit_code') is not None:
9495
break
9596
time.sleep(0.5)
96-
cnt += 1
9797

9898
self.assertTrue('execution_time' in the_process)
9999
self.assertTrue('stime' in the_process)

web/pgadmin/tools/restore/tests/test_create_restore_job.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,12 @@ def runTest(self):
165165
response_data = json.loads(response.data.decode('utf-8'))
166166
job_id = response_data['data']['job_id']
167167

168-
cnt = 0
169168
the_process = None
170-
while True:
171-
if cnt >= 5:
172-
break
169+
# Wait up to 30s for the background pg_restore to actually
170+
# finish. The completion signal is `exit_code` becoming non-None
171+
# — NOT the mere presence of `execution_time`, which is set
172+
# while the process is still running.
173+
for _ in range(60):
173174
# Check the process list
174175
response1 = self.tester.get('/misc/bgprocess/?_={0}'.format(
175176
secrets.choice(range(1, 9999999))))
@@ -179,13 +180,12 @@ def runTest(self):
179180
try:
180181
the_process = next(
181182
p for p in process_list if p['id'] == job_id)
182-
except Exception:
183+
except StopIteration:
183184
the_process = None
184185

185-
if the_process and 'execution_time' in the_process:
186+
if the_process and the_process.get('exit_code') is not None:
186187
break
187188
time.sleep(0.5)
188-
cnt += 1
189189

190190
self.assertTrue('execution_time' in the_process)
191191
self.assertTrue('stime' in the_process)

0 commit comments

Comments
 (0)