Skip to content

Commit 502dd5d

Browse files
committed
feat(osf): update migration script to remove unmapped crossref funders
1 parent 52c5dac commit 502dd5d

2 files changed

Lines changed: 14 additions & 17 deletions

File tree

osf/management/commands/migrate_funder_ids_to_ror.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def handle(self, *args, **options):
9797
self.stdout.write(f" Records updated: {stats['updated']}")
9898
self.stdout.write(f" Records re-indexed: {stats['reindexed']}")
9999
self.stdout.write(f" Funders migrated: {stats['funders_migrated']}")
100-
self.stdout.write(f" Funders not in mapping: {stats['not_in_mapping']}")
100+
self.stdout.write(f" Unmapped funders removed: {stats['not_in_mapping']}")
101101
if stats['errors']:
102102
self.stdout.write(self.style.ERROR(f" Errors: {stats['errors']}"))
103103

@@ -307,14 +307,15 @@ def migrate_record(self, record, mapping, dry_run, update_funder_name):
307307
f'{funder_identifier} -> {ror_info["ror_id"]}'
308308
)
309309
else:
310-
# No mapping found, keep original
311-
updated_funding_info.append(funder)
310+
# No mapping found, remove unmapped Crossref funder
311+
record_modified = True
312312
funder_stats['not_found'] += 1
313313
funder_stats['unmapped_ids'].add(funder_identifier)
314314

315315
logger.warning(
316-
f'No ROR mapping found for Crossref Funder ID: {funder_identifier} '
317-
f'in record {record.guid._id}'
316+
f'{"[DRY RUN] " if dry_run else ""}'
317+
f'Removing unmapped Crossref Funder ID: {funder_identifier} '
318+
f'from record {record.guid._id}'
318319
)
319320

320321
# Warn about duplicate ROR IDs that would result from migration

osf_tests/management_commands/test_migrate_funder_ids_to_ror.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,8 @@ def test_migrate_multiple_funders(self, record_with_multiple_funders, csv_mappin
201201
assert funders[2]['funder_identifier'] == 'https://ror.org/021nxhr62'
202202
assert funders[2]['funder_identifier_type'] == 'ROR'
203203

204-
def test_unmapped_funder_preserved(self, record_with_unmapped_funder, csv_mapping_file):
205-
"""Test that funders not in mapping are preserved unchanged."""
204+
def test_unmapped_funder_removed(self, record_with_unmapped_funder, csv_mapping_file):
205+
"""Test that funders not in mapping are removed."""
206206
command = Command()
207207
command.stdout = type('MockStdout', (), {'write': lambda self, x: None})()
208208

@@ -214,17 +214,13 @@ def test_unmapped_funder_preserved(self, record_with_unmapped_funder, csv_mappin
214214
update_funder_name=False
215215
)
216216

217-
assert updated is False
217+
assert updated is True
218218
assert stats['migrated'] == 0
219219
assert stats['not_found'] == 1
220220
assert 'http://dx.doi.org/10.13039/999999999' in stats['unmapped_ids']
221221

222222
record_with_unmapped_funder.refresh_from_db()
223-
funder = record_with_unmapped_funder.funding_info[0]
224-
225-
# Should be unchanged
226-
assert funder['funder_identifier'] == 'http://dx.doi.org/10.13039/999999999'
227-
assert funder['funder_identifier_type'] == 'Crossref Funder ID'
223+
assert record_with_unmapped_funder.funding_info == []
228224

229225
def test_load_mapping_various_id_formats(self, csv_mapping_file):
230226
"""Test that mapping handles various ID formats."""
@@ -347,17 +343,17 @@ def test_reindex_not_triggered_with_skip_flag(self, record_with_crossref_funder,
347343
assert funder['funder_identifier'] == 'https://ror.org/01cwqze88'
348344
assert funder['funder_identifier_type'] == 'ROR'
349345

350-
def test_reindex_not_triggered_for_unmapped_records(self, record_with_unmapped_funder, csv_mapping_file, mock_reindex):
351-
"""Test that re-indexing is NOT triggered for records that weren't updated."""
346+
def test_reindex_triggered_for_unmapped_records(self, record_with_unmapped_funder, csv_mapping_file, mock_reindex):
347+
"""Test that re-indexing IS triggered when unmapped funders are removed."""
352348
mock_update_search, mock_request_identifier_update = mock_reindex
353349

354350
call_command(
355351
'migrate_funder_ids_to_ror',
356352
'--csv-file', csv_mapping_file,
357353
)
358354

359-
mock_update_search.assert_not_called()
360-
mock_request_identifier_update.assert_not_called()
355+
mock_update_search.assert_called()
356+
mock_request_identifier_update.assert_called_with('doi')
361357

362358
def test_end_to_end_call_command(self, record_with_crossref_funder, record_with_multiple_funders, csv_mapping_file, mock_reindex):
363359
"""Test the full management command end-to-end via call_command."""

0 commit comments

Comments
 (0)