Skip to content

Commit f067eb7

Browse files
Merge pull request #251 from MITLibraries/IN-1759-digitized-theses-testing
Set dc.identifier.oclc for digitized theses item submissions
2 parents eba7f1e + a54a121 commit f067eb7

6 files changed

Lines changed: 254 additions & 238 deletions

File tree

dsc/reports/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ class CreateReport(Report):
165165

166166
@property
167167
def subject(self) -> str:
168-
return f"DSC Create Batch Results - {self.workflow_name}, batch='{self.batch_id}'"
168+
return f"[{CONFIG.workspace}] DSC Create Batch Results - {self.workflow_name}, batch='{self.batch_id}'" # noqa: E501
169169

170170
@property
171171
def summary_template(self) -> Template:
@@ -196,7 +196,7 @@ def create_errors_csv(self) -> StringIO | None:
196196
class SubmitReport(Report):
197197
@property
198198
def subject(self) -> str:
199-
return f"DSC Submit Results - {self.workflow_name}, batch='{self.batch_id}'"
199+
return f"[{CONFIG.workspace}] DSC Submit Results - {self.workflow_name}, batch='{self.batch_id}'" # noqa: E501
200200

201201
@property
202202
def summary_template(self) -> Template:

dsc/workflows/digitized_theses/workflow.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -512,9 +512,10 @@ def submit_items(self, collection_handle: str | None = None) -> list:
512512
try:
513513
# get item metadata
514514
item_metadata = self._get_transformed_metadata(
515+
item_identifier=item_submission.item_identifier,
515516
source_metadata_file=manifest[item_submission.item_identifier][
516517
"metadata_file"
517-
]
518+
],
518519
)
519520

520521
# prepare submission assets
@@ -620,7 +621,9 @@ def _load_batch_manifest(self) -> dict:
620621

621622
return manifest
622623

623-
def _get_transformed_metadata(self, source_metadata_file: str) -> dict:
624+
def _get_transformed_metadata(
625+
self, item_identifier: str, source_metadata_file: str
626+
) -> dict:
624627
"""Get transformed metadata for an item submission.
625628
626629
This method expects a filepath to an Alma MARC XML file.
@@ -638,6 +641,9 @@ def _get_transformed_metadata(self, source_metadata_file: str) -> dict:
638641

639642
transformed_metadata = self.metadata_transformer.transform(source_metadata)
640643

644+
# set dc.identifier.oclc to item identifier
645+
transformed_metadata["dc.identifier.oclc"] = item_identifier
646+
641647
# if replacement thesis, include additional dc.description.provenance entry
642648
if "replacement-theses" in source_metadata_file:
643649
replacement_message = f"The thesis import has been updated on {self.run_date.strftime('%Y-%m-%dT%H:%M:%SZ')}" # noqa: E501

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ dependencies = [
1717
"lxml>=6.0.2",
1818
"pandas>=2.3.3",
1919
"pynamodb>=6.1.0",
20+
"requests>=2.33.0",
2021
"sentry-sdk>=2.50.0",
2122
"smart_open",
2223
]

tests/test_report.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ def test_report_init_success():
1616
assert create_report.workflow_name == "test"
1717
assert create_report.batch_id == "batch-aaa"
1818
assert create_report.report_date == "20250101T090000Z"
19-
assert create_report.subject == ("DSC Create Batch Results - test, batch='batch-aaa'")
19+
assert create_report.subject == (
20+
"[test] DSC Create Batch Results - test, batch='batch-aaa'"
21+
)
2022

2123

2224
def test_report_get_item_submissions(mock_item_submission_db_with_records):

tests/workflows/digitized_theses/test_workflow.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,9 +486,11 @@ def test_workflow_load_batch_manifest(mock_s3_digitized_theses_dsc):
486486
def test_workflow_get_transformed_metadata(mock_s3_digitized_theses_dsc):
487487
workflow = DigitizedTheses(batch_id="batch-aaa")
488488
item_metadata = workflow._get_transformed_metadata(
489-
source_metadata_file="tests/fixtures/digitized-theses/batch-aaa/replacement-theses/05588126/05588126.xml"
489+
item_identifier="05588126",
490+
source_metadata_file="tests/fixtures/digitized-theses/batch-aaa/replacement-theses/05588126/05588126.xml",
490491
)
491492

493+
assert item_metadata["dc.identifier.oclc"] == "05588126"
492494
assert item_metadata["dc.title"] == [
493495
"Global solvability of invariant differential operators."
494496
]

0 commit comments

Comments
 (0)