Skip to content

Commit a789f92

Browse files
authored
Merge pull request DSpace#11872 from bram-atmire/9622-doi-organiser-dao-pagination
Add DAO-level pagination to DOIOrganiser bulk operations
2 parents 05a4b5c + 68a2f13 commit a789f92

5 files changed

Lines changed: 123 additions & 93 deletions

File tree

dspace-api/src/main/java/org/dspace/identifier/DOIServiceImpl.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,12 @@ public List<DOI> getDOIsByStatus(Context context, List<Integer> statuses) throws
174174
return doiDAO.findByStatus(context, statuses);
175175
}
176176

177+
@Override
178+
public List<DOI> getDOIsByStatus(Context context, List<Integer> statuses, int limit, int offset)
179+
throws SQLException {
180+
return doiDAO.findByStatus(context, statuses, limit, offset);
181+
}
182+
177183
@Override
178184
public List<DOI> getSimilarDOIsNotInState(Context context, String doiPattern, List<Integer> statuses,
179185
boolean dsoIsNotNull)

dspace-api/src/main/java/org/dspace/identifier/dao/DOIDAO.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,18 @@ public List<DOI> findSimilarNotInState(Context context, String doi, List<Integer
3333

3434
public List<DOI> findByStatus(Context context, List<Integer> statuses) throws SQLException;
3535

36+
/**
37+
* Find all DOIs with any of the given statuses, with pagination support.
38+
*
39+
* @param context current DSpace session.
40+
* @param statuses desired statuses.
41+
* @param limit maximum number of results to return (-1 for unlimited).
42+
* @param offset number of results to skip (-1 for none).
43+
* @return matching DOIs ordered by ID.
44+
* @throws SQLException passed through.
45+
*/
46+
public List<DOI> findByStatus(Context context, List<Integer> statuses, int limit, int offset)
47+
throws SQLException;
48+
3649
public DOI findDOIByDSpaceObject(Context context, DSpaceObject dso) throws SQLException;
3750
}

dspace-api/src/main/java/org/dspace/identifier/dao/impl/DOIDAOImpl.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,12 @@ public DOI findDOIByDSpaceObject(Context context, DSpaceObject dso, List<Integer
7171

7272
@Override
7373
public List<DOI> findByStatus(Context context, List<Integer> statuses) throws SQLException {
74+
return findByStatus(context, statuses, -1, -1);
75+
}
76+
77+
@Override
78+
public List<DOI> findByStatus(Context context, List<Integer> statuses, int limit, int offset)
79+
throws SQLException {
7480
CriteriaBuilder criteriaBuilder = getCriteriaBuilder(context);
7581
CriteriaQuery criteriaQuery = getCriteriaQuery(criteriaBuilder, DOI.class);
7682
Root<DOI> doiRoot = criteriaQuery.from(DOI.class);
@@ -80,7 +86,8 @@ public List<DOI> findByStatus(Context context, List<Integer> statuses) throws SQ
8086
orPredicates.add(criteriaBuilder.equal(doiRoot.get(DOI_.status), status));
8187
}
8288
criteriaQuery.where(criteriaBuilder.or(orPredicates.toArray(new Predicate[] {})));
83-
return list(context, criteriaQuery, false, DOI.class, -1, -1);
89+
criteriaQuery.orderBy(criteriaBuilder.asc(doiRoot.get(DOI_.id)));
90+
return list(context, criteriaQuery, false, DOI.class, limit, offset);
8491
}
8592

8693
@Override

dspace-api/src/main/java/org/dspace/identifier/doi/DOIOrganiser.java

Lines changed: 83 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,25 @@ public class DOIOrganiser {
5656

5757
private static final Logger LOG = LogManager.getLogger(DOIOrganiser.class);
5858

59+
/**
60+
* Number of DOIs to fetch per batch during bulk operations.
61+
*/
62+
private static final int BATCH_SIZE = 100;
63+
64+
/**
65+
* Functional interface for a DOI processing operation.
66+
*/
67+
@FunctionalInterface
68+
private interface DOIOperation {
69+
/**
70+
* Process a single DOI.
71+
*
72+
* @param doi the DOI to process.
73+
* @throws Exception if processing fails.
74+
*/
75+
void process(DOI doi) throws Exception;
76+
}
77+
5978
private final DOIIdentifierProvider provider;
6079
private final Context context;
6180
private boolean quiet;
@@ -221,107 +240,27 @@ public static void runCLI(Context context, DOIOrganiser organiser, String[] args
221240
}
222241

223242
if (line.hasOption('s')) {
224-
try {
225-
List<DOI> dois = doiService
226-
.getDOIsByStatus(context, Arrays.asList(DOIIdentifierProvider.TO_BE_RESERVED));
227-
if (dois.isEmpty()) {
228-
System.err.println("There are no objects in the database "
229-
+ "that could be reserved.");
230-
}
231-
232-
for (DOI doi : dois) {
233-
doi = context.reloadEntity(doi);
234-
try {
235-
organiser.reserve(doi);
236-
context.commit();
237-
} catch (RuntimeException e) {
238-
System.err.format("DOI %s for object %s reservation failed, skipping: %s%n",
239-
doi.getDSpaceObject().getID().toString(),
240-
doi.getDoi(), e.getMessage());
241-
context.rollback();
242-
}
243-
}
244-
} catch (SQLException ex) {
245-
System.err.println("Error in database connection:" + ex.getMessage());
246-
ex.printStackTrace(System.err);
247-
}
243+
List<Integer> statuses = Arrays.asList(DOIIdentifierProvider.TO_BE_RESERVED);
244+
processBatched(context, doiService, statuses, organiser::reserve, "reservation");
248245
}
249246

250247
if (line.hasOption('r')) {
251-
try {
252-
List<DOI> dois = doiService
253-
.getDOIsByStatus(context, Arrays.asList(DOIIdentifierProvider.TO_BE_REGISTERED));
254-
if (dois.isEmpty()) {
255-
System.err.println("There are no objects in the database "
256-
+ "that could be registered.");
257-
}
258-
for (DOI doi : dois) {
259-
doi = context.reloadEntity(doi);
260-
try {
261-
organiser.register(doi);
262-
context.commit();
263-
} catch (SQLException e) {
264-
System.err.format("DOI %s for object %s registration failed, skipping: %s%n",
265-
doi.getDSpaceObject().getID().toString(),
266-
doi.getDoi(), e.getMessage());
267-
context.rollback();
268-
}
269-
}
270-
} catch (SQLException ex) {
271-
System.err.format("Error in database connection: %s%n", ex.getMessage());
272-
ex.printStackTrace(System.err);
273-
} catch (RuntimeException ex) {
274-
System.err.format("Error registering DOI identifier: %s%n", ex.getMessage());
275-
}
248+
List<Integer> statuses = Arrays.asList(DOIIdentifierProvider.TO_BE_REGISTERED);
249+
processBatched(context, doiService, statuses, organiser::register, "registration");
276250
}
277251

278252
if (line.hasOption('u')) {
279-
try {
280-
List<DOI> dois = doiService.getDOIsByStatus(context, Arrays.asList(
281-
DOIIdentifierProvider.UPDATE_BEFORE_REGISTRATION,
282-
DOIIdentifierProvider.UPDATE_RESERVED,
283-
DOIIdentifierProvider.UPDATE_REGISTERED));
284-
if (dois.isEmpty()) {
285-
System.err.println("There are no objects in the database "
286-
+ "whose metadata needs an update.");
287-
}
288-
289-
for (DOI doi : dois) {
290-
doi = context.reloadEntity(doi);
291-
organiser.update(doi);
292-
context.commit();
293-
}
294-
} catch (SQLException ex) {
295-
System.err.println("Error in database connection:" + ex.getMessage());
296-
ex.printStackTrace(System.err);
297-
}
253+
List<Integer> statuses = Arrays.asList(
254+
DOIIdentifierProvider.UPDATE_BEFORE_REGISTRATION,
255+
DOIIdentifierProvider.UPDATE_RESERVED,
256+
DOIIdentifierProvider.UPDATE_REGISTERED);
257+
processBatched(context, doiService, statuses, organiser::update, "update");
298258
}
299259

300260
if (line.hasOption('d')) {
301-
try {
302-
List<DOI> dois = doiService
303-
.getDOIsByStatus(context, Arrays.asList(DOIIdentifierProvider.TO_BE_DELETED));
304-
if (dois.isEmpty()) {
305-
System.err.println("There are no objects in the database "
306-
+ "that could be deleted.");
307-
}
308-
309-
for (DOI doi : dois) {
310-
doi = context.reloadEntity(doi);
311-
try {
312-
organiser.delete(doi.getDoi());
313-
context.commit();
314-
} catch (SQLException e) {
315-
System.err.format("DOI %s for object %s deletion failed, skipping: %s%n",
316-
doi.getDSpaceObject().getID().toString(),
317-
doi.getDoi(), e.getMessage());
318-
context.rollback();
319-
}
320-
}
321-
} catch (SQLException ex) {
322-
System.err.println("Error in database connection:" + ex.getMessage());
323-
ex.printStackTrace(System.err);
324-
}
261+
List<Integer> statuses = Arrays.asList(DOIIdentifierProvider.TO_BE_DELETED);
262+
processBatched(context, doiService, statuses,
263+
doi -> organiser.delete(doi.getDoi()), "deletion");
325264
}
326265

327266
if (line.hasOption("reserve-doi")) {
@@ -385,6 +324,58 @@ public static void runCLI(Context context, DOIOrganiser organiser, String[] args
385324

386325
}
387326

327+
/**
328+
* Process all DOIs matching the given statuses in batches.
329+
* Each batch queries from offset 0 because successfully processed DOIs change status and
330+
* drop out of subsequent queries. Stops when a batch is empty or an entire batch fails
331+
* (to prevent infinite loops).
332+
*
333+
* @param context current DSpace session.
334+
* @param doiService the DOI service to query.
335+
* @param statuses the statuses to query for.
336+
* @param operation the operation to perform on each DOI.
337+
* @param processName a human-readable name for the operation (for logging).
338+
*/
339+
private static void processBatched(Context context, DOIService doiService,
340+
List<Integer> statuses, DOIOperation operation,
341+
String processName) {
342+
try {
343+
List<DOI> batch;
344+
boolean firstBatch = true;
345+
do {
346+
batch = doiService.getDOIsByStatus(context, statuses, BATCH_SIZE, 0);
347+
if (firstBatch && batch.isEmpty()) {
348+
System.err.println("There are no objects in the database "
349+
+ "that could be processed for " + processName + ".");
350+
}
351+
firstBatch = false;
352+
353+
int succeeded = 0;
354+
for (DOI doi : batch) {
355+
doi = context.reloadEntity(doi);
356+
try {
357+
operation.process(doi);
358+
context.commit();
359+
succeeded++;
360+
} catch (Exception e) {
361+
System.err.format("DOI %s %s failed, skipping: %s%n",
362+
doi.getDoi(), processName, e.getMessage());
363+
context.rollback();
364+
}
365+
}
366+
// If no DOI in this batch succeeded, stop to prevent an infinite loop.
367+
if (!batch.isEmpty() && succeeded == 0) {
368+
System.err.println("Entire batch failed for " + processName
369+
+ ", stopping to prevent infinite loop.");
370+
break;
371+
}
372+
} while (!batch.isEmpty());
373+
} catch (SQLException ex) {
374+
System.err.println("Error in database connection: " + ex.getMessage());
375+
ex.printStackTrace(System.err);
376+
}
377+
}
378+
388379
/**
389380
* list DOIs queued for reservation or registration
390381
* @param processName - process name for display

dspace-api/src/main/java/org/dspace/identifier/service/DOIService.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,19 @@ public String formatIdentifier(String identifier)
141141
*/
142142
public List<DOI> getDOIsByStatus(Context context, List<Integer> statuses) throws SQLException;
143143

144+
/**
145+
* Find DOIs that have one of a given set of statuses, with pagination support.
146+
*
147+
* @param context current DSpace session.
148+
* @param statuses desired statuses.
149+
* @param limit maximum number of results to return (-1 for unlimited).
150+
* @param offset number of results to skip (-1 for none).
151+
* @return matching DOIs ordered by ID.
152+
* @throws SQLException passed through.
153+
*/
154+
public List<DOI> getDOIsByStatus(Context context, List<Integer> statuses, int limit, int offset)
155+
throws SQLException;
156+
144157
/**
145158
* Find all DOIs that are similar to the specified pattern and not in the
146159
* specified states.

0 commit comments

Comments
 (0)