Skip to content

Commit 49a21c4

Browse files
TnniennMicheleboychuk
authored andcommitted
Merged in task/dspace-cris-2024_02_x/DSC-2734 (pull request DSpace#5399)
[DSC-2734] added CORE API to LiveImportFramework Approved-by: Mykhaylo Boychuk
2 parents 0483b6c + 663eed9 commit 49a21c4

14 files changed

Lines changed: 1579 additions & 4 deletions

File tree

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/**
2+
* The contents of this file are subject to the license and copyright
3+
* detailed in the LICENSE and NOTICE files at the root of the source
4+
* tree and available online at
5+
*
6+
* http://www.dspace.org/license/
7+
*/
8+
package org.dspace.importer.external.core;
9+
10+
import java.util.Map;
11+
12+
import jakarta.annotation.Resource;
13+
import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping;
14+
15+
/**
16+
* An implementation of {@link AbstractMetadataFieldMapping}
17+
* Responsible for defining the mapping of the CORE metadatum fields on the DSpace metadatum fields
18+
*
19+
* @author Antonio Fasanella (antonio.fasanella@4science.com)
20+
*/
21+
@SuppressWarnings("rawtypes")
22+
public class CoreFieldMapping extends AbstractMetadataFieldMapping {
23+
24+
/**
25+
* Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it
26+
* only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over
27+
* what metadatafield is generated.
28+
*
29+
* @param metadataFieldMap The map containing the link between retrieve metadata and metadata that will be set to
30+
* the item.
31+
*/
32+
@Override
33+
@SuppressWarnings("unchecked")
34+
@Resource(name = "coreMetadataFieldMap")
35+
public void setMetadataFieldMap(Map metadataFieldMap) {
36+
super.setMetadataFieldMap(metadataFieldMap);
37+
}
38+
}
Lines changed: 350 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,350 @@
1+
/**
2+
* The contents of this file are subject to the license and copyright
3+
* detailed in the LICENSE and NOTICE files at the root of the source
4+
* tree and available online at
5+
*
6+
* http://www.dspace.org/license/
7+
*/
8+
package org.dspace.importer.external.core;
9+
10+
import java.net.URLEncoder;
11+
import java.nio.charset.StandardCharsets;
12+
import java.util.ArrayList;
13+
import java.util.Collection;
14+
import java.util.HashMap;
15+
import java.util.List;
16+
import java.util.Map;
17+
import java.util.concurrent.Callable;
18+
19+
import com.fasterxml.jackson.core.JsonProcessingException;
20+
import com.fasterxml.jackson.databind.JsonNode;
21+
import com.fasterxml.jackson.databind.ObjectMapper;
22+
import org.apache.commons.lang3.StringUtils;
23+
import org.apache.logging.log4j.LogManager;
24+
import org.apache.logging.log4j.Logger;
25+
import org.dspace.content.Item;
26+
import org.dspace.importer.external.datamodel.ImportRecord;
27+
import org.dspace.importer.external.datamodel.Query;
28+
import org.dspace.importer.external.exception.MetadataSourceException;
29+
import org.dspace.importer.external.liveimportclient.service.LiveImportClient;
30+
import org.dspace.importer.external.service.AbstractImportMetadataSourceService;
31+
import org.dspace.importer.external.service.DoiCheck;
32+
import org.dspace.importer.external.service.components.QuerySource;
33+
import org.dspace.services.ConfigurationService;
34+
import org.springframework.beans.factory.annotation.Autowired;
35+
36+
/**
37+
* Live import provider for the CORE aggregator (https://core.ac.uk).
38+
* Uses CORE API v3 to search and retrieve scholarly works.
39+
*/
40+
public class CoreImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<String>
41+
implements QuerySource {
42+
43+
private static final Logger log = LogManager.getLogger(CoreImportMetadataSourceServiceImpl.class);
44+
45+
private static final ObjectMapper objectMapper = new ObjectMapper();
46+
47+
@Autowired
48+
private LiveImportClient liveImportClient;
49+
50+
@Autowired
51+
private ConfigurationService configurationService;
52+
53+
@Override
54+
public String getImportSource() {
55+
return "core";
56+
}
57+
58+
@Override
59+
public void init() throws Exception {
60+
// no-op
61+
}
62+
63+
@Override
64+
public Collection<ImportRecord> getRecords(String query, int start, int count) throws MetadataSourceException {
65+
return retry(new SearchByQueryCallable(query, start, count));
66+
}
67+
68+
@Override
69+
public Collection<ImportRecord> getRecords(Query query) throws MetadataSourceException {
70+
return retry(new SearchByQueryCallable(query));
71+
}
72+
73+
@Override
74+
public ImportRecord getRecord(String recordId) throws MetadataSourceException {
75+
return retry(new GetByIdCallable(recordId));
76+
}
77+
78+
@Override
79+
public ImportRecord getRecord(Query query) throws MetadataSourceException {
80+
return retry(new GetByIdCallable(query));
81+
}
82+
83+
@Override
84+
public int getRecordsCount(String query) throws MetadataSourceException {
85+
return retry(new CountByQueryCallable(query));
86+
}
87+
88+
@Override
89+
public int getRecordsCount(Query query) throws MetadataSourceException {
90+
return retry(new CountByQueryCallable(query));
91+
}
92+
93+
@Override
94+
public Collection<ImportRecord> findMatchingRecords(Query query) throws MetadataSourceException {
95+
return retry(new SearchByQueryCallable(query));
96+
}
97+
98+
@Override
99+
public Collection<ImportRecord> findMatchingRecords(Item item) throws MetadataSourceException {
100+
throw new UnsupportedOperationException("This method is not implemented for CORE");
101+
}
102+
103+
/**
104+
* If the query is a DOI, converts it to CORE search format: doi:"10.1234/example".
105+
* Returns empty string if not a DOI.
106+
*
107+
* @param query the search query
108+
* @return DOI in CORE search format, or empty string
109+
*/
110+
private String getID(String query) {
111+
if (StringUtils.isBlank(query)) {
112+
return StringUtils.EMPTY;
113+
}
114+
String q = query;
115+
// Handle double URL-encoded slash: %252F -> /
116+
if (q.contains("%252F")) {
117+
q = q.replace("%252F", "/");
118+
}
119+
if (DoiCheck.isDoi(q)) {
120+
return "doi:\"" + q + "\"";
121+
}
122+
return StringUtils.EMPTY;
123+
}
124+
125+
private int getTimeoutMs() {
126+
return configurationService.getIntProperty("core.timeout", 30000);
127+
}
128+
129+
private int getDefaultPageSize() {
130+
return configurationService.getIntProperty("core.pageSize", 10);
131+
}
132+
133+
private String getBaseUrl() {
134+
return configurationService.getProperty("core.api.url", "https://api.core.ac.uk/v3");
135+
}
136+
137+
private String getApiKey() {
138+
return StringUtils.trimToNull(configurationService.getProperty("core.apiKey"));
139+
}
140+
141+
/**
142+
* Builds the request map expected by LiveImportClient.
143+
* - uriParameters: querystring params
144+
* - headers: HTTP headers (Authorization)
145+
*/
146+
private Map<String, Map<String, String>> buildParams(Map<String, String> uriParameters) {
147+
Map<String, Map<String, String>> params = new HashMap<>();
148+
149+
params.put("uriParameters", uriParameters != null ? uriParameters : new HashMap<>());
150+
151+
Map<String, String> headers = new HashMap<>();
152+
String apiKey = getApiKey();
153+
if (apiKey != null) {
154+
headers.put("Authorization", "Bearer " + apiKey);
155+
} else {
156+
log.warn("CORE api key is not configured (property core.apiKey). Requests may fail with 401/403.");
157+
}
158+
params.put("headers", headers);
159+
160+
return params;
161+
}
162+
163+
private String buildWorksSearchUrl() {
164+
return getBaseUrl() + "/search/works";
165+
}
166+
167+
private String buildWorkByIdUrl(String identifier) {
168+
return getBaseUrl() + "/works/" + encodePathSegment(identifier);
169+
}
170+
171+
private String encodePathSegment(String segment) {
172+
try {
173+
return URLEncoder.encode(segment, StandardCharsets.UTF_8).replace("+", "%20");
174+
} catch (Exception e) {
175+
return segment;
176+
}
177+
}
178+
179+
private class SearchByQueryCallable implements Callable<List<ImportRecord>> {
180+
181+
private final Query query;
182+
183+
private SearchByQueryCallable(String queryString, Integer start, Integer count) {
184+
Query q = new Query();
185+
q.addParameter("query", StringUtils.trimToNull(queryString));
186+
q.addParameter("start", start);
187+
q.addParameter("count", count);
188+
this.query = q;
189+
}
190+
191+
private SearchByQueryCallable(Query query) {
192+
this.query = query != null ? query : new Query();
193+
}
194+
195+
@Override
196+
public List<ImportRecord> call() {
197+
List<ImportRecord> records = new ArrayList<>();
198+
199+
String raw = query.getParameterAsClass("query", String.class);
200+
String q = getID(raw);
201+
if (StringUtils.isBlank(q)) {
202+
q = StringUtils.trimToNull(raw);
203+
}
204+
if (q == null) {
205+
return records;
206+
}
207+
208+
Integer start = query.getParameterAsClass("start", Integer.class);
209+
Integer count = query.getParameterAsClass("count", Integer.class);
210+
211+
int offset = (start != null) ? Math.max(start, 0) : 0;
212+
int limit = (count != null && count > 0) ? count : getDefaultPageSize();
213+
214+
Map<String, String> uriParameters = new HashMap<>();
215+
uriParameters.put("q", q);
216+
uriParameters.put("limit", Integer.toString(limit));
217+
uriParameters.put("offset", Integer.toString(offset));
218+
219+
String url = buildWorksSearchUrl();
220+
String response = liveImportClient.executeHttpGetRequest(getTimeoutMs(), url, buildParams(uriParameters));
221+
222+
JsonNode jsonNode = convertStringJsonToJsonNode(response);
223+
if (jsonNode == null) {
224+
log.warn("CORE returned invalid JSON");
225+
return records;
226+
}
227+
228+
JsonNode resultsNode = jsonNode.at("/results");
229+
if (resultsNode != null && resultsNode.isArray()) {
230+
for (JsonNode workNode : resultsNode) {
231+
if (workNode == null || workNode.isMissingNode() || workNode.isNull()) {
232+
continue;
233+
}
234+
records.add(transformSourceRecords(workNode.toString()));
235+
}
236+
} else {
237+
log.debug("CORE: missing /results array in response");
238+
}
239+
240+
return records;
241+
}
242+
}
243+
244+
private class GetByIdCallable implements Callable<ImportRecord> {
245+
246+
private final Query query;
247+
248+
private GetByIdCallable(String recordId) {
249+
Query q = new Query();
250+
q.addParameter("id", StringUtils.trimToNull(recordId));
251+
this.query = q;
252+
}
253+
254+
private GetByIdCallable(Query query) {
255+
this.query = query != null ? query : new Query();
256+
}
257+
258+
@Override
259+
public ImportRecord call() throws Exception {
260+
261+
String id = query.getParameterAsClass("id", String.class);
262+
id = StringUtils.trimToNull(id);
263+
if (id == null) {
264+
return null;
265+
}
266+
267+
String url = buildWorkByIdUrl(id);
268+
269+
String response;
270+
try {
271+
response = liveImportClient.executeHttpGetRequest(getTimeoutMs(), url, buildParams(new HashMap<>()));
272+
} catch (RuntimeException e) {
273+
log.error("CORE getRecord failed for identifier={}", id, e);
274+
throw new MetadataSourceException("CORE getRecord failed for identifier=" + id, e);
275+
}
276+
277+
JsonNode jsonNode = convertStringJsonToJsonNode(response);
278+
if (jsonNode == null || jsonNode.isMissingNode() || jsonNode.isNull()) {
279+
log.warn("CORE /works/{id} returned invalid JSON for identifier={}", id);
280+
return null;
281+
}
282+
283+
return transformSourceRecords(jsonNode.toString());
284+
}
285+
}
286+
287+
private class CountByQueryCallable implements Callable<Integer> {
288+
289+
private final Query query;
290+
291+
private CountByQueryCallable(String queryString) {
292+
Query q = new Query();
293+
q.addParameter("query", StringUtils.trimToNull(queryString));
294+
this.query = q;
295+
}
296+
297+
private CountByQueryCallable(Query query) {
298+
this.query = query != null ? query : new Query();
299+
}
300+
301+
@Override
302+
public Integer call() throws Exception {
303+
String raw = query.getParameterAsClass("query", String.class);
304+
String q = getID(raw);
305+
if (StringUtils.isBlank(q)) {
306+
q = StringUtils.trimToNull(raw);
307+
}
308+
if (q == null) {
309+
return 0;
310+
}
311+
312+
Map<String, String> uriParameters = new HashMap<>();
313+
uriParameters.put("q", q);
314+
uriParameters.put("limit", "1");
315+
uriParameters.put("offset", "0");
316+
317+
String url = buildWorksSearchUrl();
318+
String responseString = liveImportClient.executeHttpGetRequest(getTimeoutMs(), url,
319+
buildParams(uriParameters));
320+
321+
JsonNode jsonNode = convertStringJsonToJsonNode(responseString);
322+
if (jsonNode == null) {
323+
log.warn("CORE returned invalid JSON");
324+
throw new MetadataSourceException("Could not read CORE source");
325+
}
326+
327+
JsonNode totalNode = jsonNode.at("/totalHits");
328+
if (totalNode != null && totalNode.isNumber()) {
329+
return totalNode.asInt();
330+
}
331+
if (totalNode != null && totalNode.isTextual()) {
332+
try {
333+
return Integer.parseInt(totalNode.asText());
334+
} catch (Exception e) {
335+
log.debug("Could not parse totalHits: {}", totalNode.asText(), e);
336+
}
337+
}
338+
return 0;
339+
}
340+
}
341+
342+
private JsonNode convertStringJsonToJsonNode(String json) {
343+
try {
344+
return objectMapper.readTree(json);
345+
} catch (JsonProcessingException e) {
346+
log.error("Unable to process json response.", e);
347+
}
348+
return null;
349+
}
350+
}

0 commit comments

Comments
 (0)