Skip to content

Commit c38c8ab

Browse files
committed
Merge tag '26.9.1' into develop
Hotfix registration stuck archiving
2 parents a565b3a + 7025174 commit c38c8ab

5 files changed

Lines changed: 154 additions & 30 deletions

File tree

CHANGELOG

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
We follow the CalVer (https://calver.org/) versioning scheme: YY.MINOR.MICRO.
44

5+
26.9.1 (2026-05-15)
6+
===================
7+
8+
- Hotfix for registrations stuck archiving
9+
510
26.9.0 (2026-05-07)
611
===================
712

osf_tests/test_archiver.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,65 @@ def test_compact_traceback_uses_last_lines(self):
467467
def test_compact_traceback_handles_empty(self):
468468
assert archiver_utils.compact_traceback(None) is None
469469

470+
def test_compact_traceback_uses_last_chars_then_last_lines(self):
471+
traceback_text = '\n'.join(f'line {line_num}' for line_num in range(20))
472+
compact = archiver_utils.compact_traceback(traceback_text, max_lines=3, max_chars=45)
473+
474+
# max_chars keeps only the tail content, then max_lines keeps the tail lines.
475+
assert compact == '\n'.join(['line 17', 'line 18', 'line 19'])
476+
477+
@mock.patch('website.archiver.tasks.sentry.log_message')
478+
def test_archiver_task_load_archive_job_retries_with_context(self, mock_log_message):
479+
task = ArchiverTask()
480+
task.name = 'website.archiver.tasks.stat_addon'
481+
task.max_retries = 3
482+
task.retry = mock.Mock(side_effect=RuntimeError('retry requested'))
483+
request = mock.Mock(retries=1, id='task-123', kwargs={'dst_pk': 'reg123'})
484+
485+
with mock.patch.object(ArchiverTask, 'request', new_callable=mock.PropertyMock, return_value=request):
486+
with mock.patch('website.archiver.tasks.ArchiveJob.load', return_value=None):
487+
with pytest.raises(RuntimeError, match='retry requested'):
488+
task.load_archive_job('abc123')
489+
490+
retry_exception = task.retry.call_args.kwargs['exc']
491+
assert isinstance(retry_exception, ArchiverStateError)
492+
assert retry_exception.info['job_pk'] == 'abc123'
493+
assert retry_exception.info['registration_id'] == 'reg123'
494+
assert retry_exception.info['should_retry'] is True
495+
assert not mock_log_message.called
496+
497+
@mock.patch('website.archiver.tasks.sentry.log_exception')
498+
@mock.patch('website.archiver.tasks.sentry.log_message')
499+
def test_archiver_task_load_archive_job_final_failure_logs_context(self, mock_log_message, mock_log_exception):
500+
task = ArchiverTask()
501+
task.name = 'website.archiver.tasks.stat_addon'
502+
task.max_retries = 3
503+
task.retry = mock.Mock()
504+
request = mock.Mock(retries=3, id='task-456', kwargs={'dst_pk': 'reg456'})
505+
506+
with mock.patch.object(ArchiverTask, 'request', new_callable=mock.PropertyMock, return_value=request):
507+
with mock.patch('website.archiver.tasks.ArchiveJob.load', return_value=None):
508+
with pytest.raises(ArchiverStateError) as exc:
509+
task.load_archive_job('def456')
510+
511+
assert exc.value.info['job_pk'] == 'def456'
512+
assert exc.value.info['registration_id'] == 'reg456'
513+
assert exc.value.info['should_retry'] is False
514+
assert not task.retry.called
515+
mock_log_message.assert_called_once_with(
516+
f'ArchiveJob {exc.value.info['job_pk']} not found during archiver task execution',
517+
extra_data={
518+
'job_pk': 'def456',
519+
'registration_id': 'reg456',
520+
'task_id': 'task-456',
521+
'task_name': task.name,
522+
'retries': 3,
523+
'max_retries': 3,
524+
'should_retry': False,
525+
},
526+
)
527+
mock_log_exception.assert_called_once()
528+
470529
@mock.patch('website.archiver.tasks.archive_addon.delay')
471530
def test_archive_node_pass(self, mock_archive_addon):
472531
settings.MAX_ARCHIVE_SIZE = 1024 ** 3

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "OSF",
3-
"version": "26.9.0",
3+
"version": "26.9.1",
44
"description": "Facilitating Open Science",
55
"repository": "https://github.com/CenterForOpenScience/osf.io",
66
"author": "Center for Open Science",

website/archiver/tasks.py

Lines changed: 82 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -87,18 +87,52 @@ class ArchiverTask(celery.Task):
8787
max_retries = 0
8888
ignore_result = False
8989

90+
def load_archive_job(self, job_pk, retry_if_missing=True, task_id=None, kwargs=None):
91+
"""Load an ArchiveJob and optionally retry bound tasks if row is missing."""
92+
job = ArchiveJob.load(job_pk)
93+
if job:
94+
return job
95+
96+
request = getattr(self, 'request', None)
97+
request_kwargs = kwargs or getattr(request, 'kwargs', None) or {}
98+
context = {
99+
'job_pk': job_pk,
100+
'registration_id': request_kwargs.get('dst_pk'),
101+
'task_id': task_id or getattr(request, 'id', None),
102+
'task_name': self.name,
103+
'retries': getattr(request, 'retries', None),
104+
'max_retries': self.max_retries,
105+
}
106+
should_retry = (
107+
retry_if_missing
108+
and context['retries'] is not None
109+
and context['max_retries'] is not None
110+
and context['retries'] < context['max_retries']
111+
)
112+
context['should_retry'] = should_retry
113+
114+
error = ArchiverStateError({
115+
'error': 'ArchiveJob not found',
116+
**context,
117+
})
118+
if should_retry:
119+
raise self.retry(exc=error)
120+
121+
sentry.log_message(
122+
f'ArchiveJob {job_pk} not found during archiver task execution',
123+
extra_data=context,
124+
)
125+
sentry.log_exception(error)
126+
raise error
127+
90128
def on_failure(self, exc, task_id, args, kwargs, einfo):
91-
job = ArchiveJob.load(kwargs.get('job_pk'))
92-
compact_traceback = utils.compact_traceback(einfo)
93-
if not job:
94-
archiver_state_exc = ArchiverStateError({
95-
'exception': exc,
96-
'args': args,
97-
'kwargs': kwargs,
98-
'einfo': compact_traceback,
99-
})
100-
sentry.log_exception(archiver_state_exc)
101-
raise archiver_state_exc
129+
job_pk = kwargs.get('job_pk')
130+
job = self.load_archive_job(job_pk, retry_if_missing=False, task_id=task_id, kwargs=kwargs)
131+
compact_traceback = utils.compact_traceback(
132+
einfo,
133+
max_lines=20,
134+
max_chars=3000,
135+
)
102136

103137
if job.status == ARCHIVER_FAILURE:
104138
# already captured
@@ -161,9 +195,15 @@ def get_addon_from_gv(src_node, addon_name, requesting_user):
161195
)
162196

163197

164-
@celery_app.task(base=ArchiverTask, ignore_result=False)
198+
@celery_app.task(
199+
bind=True,
200+
base=ArchiverTask,
201+
ignore_result=False,
202+
max_retries=3,
203+
default_retry_delay=60,
204+
)
165205
@logged('stat_addon')
166-
def stat_addon(addon_short_name, job_pk):
206+
def stat_addon(self, addon_short_name, job_pk):
167207
"""Collect metadata about the file tree of a given addon
168208
169209
:param addon_short_name: AddonConfig.short_name of the addon to be examined
@@ -178,7 +218,7 @@ def stat_addon(addon_short_name, job_pk):
178218
addon_name = 'dataverse'
179219
version = 'latest' if addon_short_name.split('-')[-1] == 'draft' else 'latest-published'
180220
create_app_context()
181-
job = ArchiveJob.load(job_pk)
221+
job = self.load_archive_job(job_pk)
182222
src, dst, user = job.info()
183223

184224
src_addon = None
@@ -206,9 +246,15 @@ def stat_addon(addon_short_name, job_pk):
206246
return result
207247

208248

209-
@celery_app.task(base=ArchiverTask, ignore_result=False)
249+
@celery_app.task(
250+
bind=True,
251+
base=ArchiverTask,
252+
ignore_result=False,
253+
max_retries=3,
254+
default_retry_delay=60,
255+
)
210256
@logged('make_copy_request')
211-
def make_copy_request(job_pk, url, data):
257+
def make_copy_request(self, job_pk, url, data):
212258
"""Make the copy request to the WaterButler API and handle
213259
successful and failed responses
214260
@@ -218,7 +264,7 @@ def make_copy_request(job_pk, url, data):
218264
:return: None
219265
"""
220266
create_app_context()
221-
job = ArchiveJob.load(job_pk)
267+
job = self.load_archive_job(job_pk)
222268
src, dst, user = job.info()
223269
logger.info(f"Sending copy request for addon: {data['provider']} on node: {dst._id}")
224270
cookie = furl(url).query.params.get('cookie')
@@ -235,9 +281,15 @@ def make_waterbutler_payload(dst_id, rename):
235281
'provider': settings.ARCHIVE_PROVIDER,
236282
}
237283

238-
@celery_app.task(base=ArchiverTask, ignore_result=False)
284+
@celery_app.task(
285+
bind=True,
286+
base=ArchiverTask,
287+
ignore_result=False,
288+
max_retries=3,
289+
default_retry_delay=60,
290+
)
239291
@logged('archive_addon')
240-
def archive_addon(addon_short_name, job_pk):
292+
def archive_addon(self, addon_short_name, job_pk):
241293
"""Archive the contents of an addon by making a copy request to the
242294
WaterButler API
243295
@@ -246,7 +298,7 @@ def archive_addon(addon_short_name, job_pk):
246298
:return: None
247299
"""
248300
create_app_context()
249-
job = ArchiveJob.load(job_pk)
301+
job = self.load_archive_job(job_pk)
250302
src, dst, user = job.info()
251303
logger.info(f'Archiving addon: {addon_short_name} on node: {src._id}')
252304

@@ -274,9 +326,15 @@ def archive_addon(addon_short_name, job_pk):
274326
data = make_waterbutler_payload(dst._id, rename)
275327
make_copy_request.delay(job_pk=job_pk, url=url, data=data)
276328

277-
@celery_app.task(base=ArchiverTask, ignore_result=False)
329+
@celery_app.task(
330+
bind=True,
331+
base=ArchiverTask,
332+
ignore_result=False,
333+
max_retries=3,
334+
default_retry_delay=60,
335+
)
278336
@logged('archive_node')
279-
def archive_node(stat_results, job_pk):
337+
def archive_node(self, stat_results, job_pk):
280338
"""First use the results of #stat_node to check disk usage of the
281339
initiated registration, then either fail the registration or
282340
create a celery.group group of subtasks to archive addons
@@ -286,7 +344,7 @@ def archive_node(stat_results, job_pk):
286344
:return: None
287345
"""
288346
create_app_context()
289-
job = ArchiveJob.load(job_pk)
347+
job = self.load_archive_job(job_pk)
290348
src, dst, user = job.info()
291349
logger.info(f'Archiving node: {src._id}')
292350

@@ -381,7 +439,7 @@ def archive_success(self, dst_pk, job_pk):
381439
)
382440
self.retry(exc=err)
383441

384-
job = ArchiveJob.load(job_pk)
442+
job = self.load_archive_job(job_pk)
385443
if not job.sent:
386444
job.sent = True
387445
job.save()

website/archiver/utils.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -394,8 +394,10 @@ def compact_traceback(einfo, max_lines=25, max_chars=4000):
394394
if not traceback_text:
395395
return None
396396

397-
lines = traceback_text.splitlines()
398-
compact = '\n'.join(lines[-max_lines:])
399-
if len(compact) > max_chars:
400-
compact = compact[-max_chars:]
401-
return compact
397+
max_lines = max(1, int(max_lines))
398+
max_chars = max(1, int(max_chars))
399+
400+
# Always compact from the tail to preserve the latest failure context.
401+
tail_text = traceback_text[-max_chars:]
402+
lines = tail_text.splitlines()
403+
return '\n'.join(lines[-max_lines:])

0 commit comments

Comments
 (0)