Skip to content

Commit a239203

Browse files
[wip]
1 parent 32df1bf commit a239203

2 files changed

Lines changed: 68 additions & 19 deletions

File tree

dsc/workflows/digitized_theses/workflow.py

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -189,20 +189,37 @@ def _get_item_submissions_from_synced_batch(self) -> list[ItemSubmission]:
189189
)
190190
continue
191191

192+
# track identifier as 'seen'
192193
seen_item_identifiers.append(item_identifier)
193-
item_submissions.append(
194-
ItemSubmission(
195-
batch_id=self.batch_id,
196-
item_identifier=item_identifier,
197-
workflow_name=self.workflow_name,
198-
status=(
199-
ItemSubmissionStatus.CREATE_SUCCESS
200-
if theses_subfolder in ["replacement-theses", "new-theses"]
201-
else ItemSubmissionStatus.CREATE_SKIPPED
202-
),
203-
)
194+
195+
# create an instance of ItemSubmission
196+
item_submission = ItemSubmission(
197+
batch_id=self.batch_id,
198+
item_identifier=item_identifier,
199+
workflow_name=self.workflow_name,
204200
)
205201

202+
if theses_subfolder == "replacement-theses":
203+
try:
204+
dspace_item = self._get_item_from_dspace(
205+
item_submission.item_identifier
206+
)
207+
except exceptions.DSpaceClientSearchError as exception:
208+
item_submission.status = ItemSubmissionStatus.CREATE_SKIPPED
209+
item_submission.status_details = str(exception)
210+
else:
211+
if dspace_item:
212+
item_submission.dspace_handle = dspace_item.handle
213+
item_submission.status = ItemSubmissionStatus.CREATE_SUCCESS
214+
item_submission.status_details = "Replacement thesis"
215+
elif theses_subfolder == "new-theses":
216+
item_submission.status = ItemSubmissionStatus.CREATE_SUCCESS
217+
item_submission.status_details = "New thesis"
218+
else:
219+
item_submission.status = ItemSubmissionStatus.CREATE_SKIPPED
220+
item_submission.status = "Skipped thesis"
221+
item_submissions.append(item_submission)
222+
206223
return item_submissions
207224

208225
def _create_batch_in_s3(self) -> list[ItemSubmission]:
@@ -273,20 +290,19 @@ def _create_batch_in_s3(self) -> list[ItemSubmission]:
273290
# check if item submission is a 'Replacement thesis'
274291
if dspace_item and not self._is_replacement_thesis(dspace_item):
275292
item_submission.dspace_handle = dspace_item.handle
276-
item_submission.status = "create_skipped"
293+
item_submission.status = ItemSubmissionStatus.CREATE_SKIPPED
277294
item_submission.status_details = "Cannot replace the electronic version submitted by the student author." # noqa: E501
278295
item_submissions.append(item_submission)
279296
continue
280297

281298
if dspace_item and self._is_replacement_thesis(dspace_item):
282299
item_submission.dspace_handle = dspace_item.handle
283-
item_submission.status = "create_success"
300+
item_submission.status = ItemSubmissionStatus.CREATE_SUCCESS
284301
item_submission.status_details = "Replacement thesis"
285-
item_submissions.append(item_submission)
286302
else:
287-
item_submission.status = "create_success"
303+
item_submission.status = ItemSubmissionStatus.CREATE_SUCCESS
288304
item_submission.status_details = "New thesis"
289-
item_submissions.append(item_submission)
305+
item_submissions.append(item_submission)
290306

291307
self._move_batch_files_to_theses_subfolders(
292308
item_submissions, batch_location=tmp_batch_path

tests/workflows/digitized_theses/test_workflow.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from lxml import etree
1212

1313
from dsc import exceptions
14+
from dsc.db.models import ItemSubmissionStatus
1415
from dsc.item_submission import ItemSubmission
1516
from dsc.workflows.digitized_theses import (
1617
DigitizedTheses,
@@ -110,7 +111,8 @@ def alma_sru_response_no_record():
110111

111112

112113
@pytest.fixture
113-
def mock_s3_digitized_theses(mocked_s3, s3_client):
114+
def mock_s3_digitized_theses_dsc(mocked_s3, s3_client):
115+
"""Mock batch for digitized theses in DSC S3 bucket."""
114116
for source_metadata_file in glob.glob(
115117
"tests/fixtures/digitized-theses/batch-aaa/**/*.xml", recursive=True
116118
):
@@ -171,6 +173,37 @@ def test_workflow_update_batch_id():
171173
assert workflow._update_batch_id(batch_id="batch-aaa") == "batch-aaa-20250101T090000Z"
172174

173175

176+
@patch("dsc.workflows.digitized_theses.workflow.DigitizedTheses._get_item_from_dspace")
177+
def test_workflow_get_item_submissions_from_synced_batch(
178+
mock_workflow_get_item_from_dspace, mock_s3_digitized_theses_dsc
179+
):
180+
"""Verify workflow can get item submissions from synced batch.
181+
182+
This test uses mock_s3_digitized_theses, which represents a previously
183+
created batch in the DSC S3 bucket (i.e., contents organized into
184+
theses subfolders). This test shows the workflow's ability to
185+
generate ItemSubmissions based on the contents of the existing
186+
batch in the DSC S3 bucket.
187+
"""
188+
mock_response = MagicMock()
189+
mock_response.handle = "1721.1/157651"
190+
mock_workflow_get_item_from_dspace.return_value = mock_response
191+
192+
workflow = DigitizedTheses(batch_id="batch-aaa")
193+
results = workflow._get_item_submissions_from_synced_batch()
194+
195+
assert results == [
196+
ItemSubmission(
197+
batch_id="batch-aaa",
198+
item_identifier="05588126",
199+
workflow_name="digitized-theses",
200+
dspace_handle="1721.1/157651",
201+
status=ItemSubmissionStatus.CREATE_SUCCESS,
202+
status_details="Replacement thesis",
203+
)
204+
]
205+
206+
174207
@patch("dsc.workflows.digitized_theses.workflow.requests")
175208
def test_workflow_download_metadata_from_alma(
176209
mock_requests, alma_sru_response_single_record, tmp_path
@@ -371,7 +404,7 @@ def test_workflow_submit_items_handles_errors(
371404
)
372405

373406

374-
def test_workflow_load_batch_manifest(mock_s3_digitized_theses):
407+
def test_workflow_load_batch_manifest(mock_s3_digitized_theses_dsc):
375408
workflow = DigitizedTheses(batch_id="batch-aaa")
376409
assert workflow._load_batch_manifest() == defaultdict(
377410
dict,
@@ -385,7 +418,7 @@ def test_workflow_load_batch_manifest(mock_s3_digitized_theses):
385418

386419

387420
@freeze_time("2025-01-01 09:00:00")
388-
def test_workflow_get_transformed_metadata(mock_s3_digitized_theses):
421+
def test_workflow_get_transformed_metadata(mock_s3_digitized_theses_dsc):
389422
workflow = DigitizedTheses(batch_id="batch-aaa")
390423
item_metadata = workflow._get_transformed_metadata(
391424
source_metadata_file="tests/fixtures/digitized-theses/batch-aaa/replacement-theses/05588126/05588126.xml"

0 commit comments

Comments
 (0)