Skip to content

Commit e48f227

Browse files
TnniennMicheleboychuk
authored andcommitted
Merged in task/dspace-cris-2023_02_x/DSC-2734 (pull request DSpace#5398)
[DSC-2734] added CORE API to LiveImportFramework Approved-by: Mykhaylo Boychuk
2 parents 38ece69 + 3cc827b commit e48f227

14 files changed

Lines changed: 1585 additions & 5 deletions

File tree

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/**
2+
* The contents of this file are subject to the license and copyright
3+
* detailed in the LICENSE and NOTICE files at the root of the source
4+
* tree and available online at
5+
*
6+
* http://www.dspace.org/license/
7+
*/
8+
package org.dspace.importer.external.core;
9+
10+
import java.util.Map;
11+
import javax.annotation.Resource;
12+
13+
import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping;
14+
15+
/**
16+
* An implementation of {@link AbstractMetadataFieldMapping}
17+
* Responsible for defining the mapping of the CORE metadatum fields on the DSpace metadatum fields
18+
*
19+
* @author Antonio Fasanella (antonio.fasanella@4science.com)
20+
*/
21+
@SuppressWarnings("rawtypes")
22+
public class CoreFieldMapping extends AbstractMetadataFieldMapping {
23+
24+
/**
25+
* Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it
26+
* only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over
27+
* what metadatafield is generated.
28+
*
29+
* @param metadataFieldMap The map containing the link between retrieve metadata and metadata that will be set to
30+
* the item.
31+
*/
32+
@Override
33+
@SuppressWarnings("unchecked")
34+
@Resource(name = "coreMetadataFieldMap")
35+
public void setMetadataFieldMap(Map metadataFieldMap) {
36+
super.setMetadataFieldMap(metadataFieldMap);
37+
}
38+
}
Lines changed: 351 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,351 @@
1+
/**
2+
* The contents of this file are subject to the license and copyright
3+
* detailed in the LICENSE and NOTICE files at the root of the source
4+
* tree and available online at
5+
*
6+
* http://www.dspace.org/license/
7+
*/
8+
package org.dspace.importer.external.core;
9+
10+
import java.net.URLEncoder;
11+
import java.nio.charset.StandardCharsets;
12+
import java.util.ArrayList;
13+
import java.util.Collection;
14+
import java.util.HashMap;
15+
import java.util.List;
16+
import java.util.Map;
17+
import java.util.concurrent.Callable;
18+
import javax.el.MethodNotFoundException;
19+
20+
import com.fasterxml.jackson.core.JsonProcessingException;
21+
import com.fasterxml.jackson.databind.JsonNode;
22+
import com.fasterxml.jackson.databind.ObjectMapper;
23+
import org.apache.commons.lang3.StringUtils;
24+
import org.apache.logging.log4j.LogManager;
25+
import org.apache.logging.log4j.Logger;
26+
import org.dspace.content.Item;
27+
import org.dspace.importer.external.datamodel.ImportRecord;
28+
import org.dspace.importer.external.datamodel.Query;
29+
import org.dspace.importer.external.exception.MetadataSourceException;
30+
import org.dspace.importer.external.liveimportclient.service.LiveImportClient;
31+
import org.dspace.importer.external.service.AbstractImportMetadataSourceService;
32+
import org.dspace.importer.external.service.DoiCheck;
33+
import org.dspace.importer.external.service.components.QuerySource;
34+
import org.dspace.services.ConfigurationService;
35+
import org.springframework.beans.factory.annotation.Autowired;
36+
37+
/**
38+
* Live import provider for the CORE aggregator (https://core.ac.uk).
39+
* Uses CORE API v3 to search and retrieve scholarly works.
40+
*/
41+
public class CoreImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<String>
42+
implements QuerySource {
43+
44+
private static final Logger log = LogManager.getLogger(CoreImportMetadataSourceServiceImpl.class);
45+
46+
private static final ObjectMapper objectMapper = new ObjectMapper();
47+
48+
@Autowired
49+
private LiveImportClient liveImportClient;
50+
51+
@Autowired
52+
private ConfigurationService configurationService;
53+
54+
@Override
55+
public String getImportSource() {
56+
return "core";
57+
}
58+
59+
@Override
60+
public void init() throws Exception {
61+
// no-op
62+
}
63+
64+
@Override
65+
public Collection<ImportRecord> getRecords(String query, int start, int count) throws MetadataSourceException {
66+
return retry(new SearchByQueryCallable(query, start, count));
67+
}
68+
69+
@Override
70+
public Collection<ImportRecord> getRecords(Query query) throws MetadataSourceException {
71+
return retry(new SearchByQueryCallable(query));
72+
}
73+
74+
@Override
75+
public ImportRecord getRecord(String recordId) throws MetadataSourceException {
76+
return retry(new GetByIdCallable(recordId));
77+
}
78+
79+
@Override
80+
public ImportRecord getRecord(Query query) throws MetadataSourceException {
81+
return retry(new GetByIdCallable(query));
82+
}
83+
84+
@Override
85+
public int getRecordsCount(String query) throws MetadataSourceException {
86+
return retry(new CountByQueryCallable(query));
87+
}
88+
89+
@Override
90+
public int getRecordsCount(Query query) throws MetadataSourceException {
91+
return retry(new CountByQueryCallable(query));
92+
}
93+
94+
@Override
95+
public Collection<ImportRecord> findMatchingRecords(Query query) throws MetadataSourceException {
96+
return retry(new SearchByQueryCallable(query));
97+
}
98+
99+
@Override
100+
public Collection<ImportRecord> findMatchingRecords(Item item) throws MetadataSourceException {
101+
throw new MethodNotFoundException("This method is not implemented for CORE");
102+
}
103+
104+
/**
105+
* If the query is a DOI, converts it to CORE search format: doi:"10.1234/example".
106+
* Returns empty string if not a DOI.
107+
*
108+
* @param query the search query
109+
* @return DOI in CORE search format, or empty string
110+
*/
111+
private String getID(String query) {
112+
if (StringUtils.isBlank(query)) {
113+
return StringUtils.EMPTY;
114+
}
115+
String q = query;
116+
// Handle double URL-encoded slash: %252F -> /
117+
if (q.contains("%252F")) {
118+
q = q.replace("%252F", "/");
119+
}
120+
if (DoiCheck.isDoi(q)) {
121+
return "doi:\"" + q + "\"";
122+
}
123+
return StringUtils.EMPTY;
124+
}
125+
126+
private int getTimeoutMs() {
127+
return configurationService.getIntProperty("core.timeout", 30000);
128+
}
129+
130+
private int getDefaultPageSize() {
131+
return configurationService.getIntProperty("core.pageSize", 10);
132+
}
133+
134+
private String getBaseUrl() {
135+
return configurationService.getProperty("core.api.url", "https://api.core.ac.uk/v3");
136+
}
137+
138+
private String getApiKey() {
139+
return StringUtils.trimToNull(configurationService.getProperty("core.apiKey"));
140+
}
141+
142+
/**
143+
* Builds the request map expected by LiveImportClient.
144+
* - uriParameters: querystring params
145+
* - headers: HTTP headers (Authorization)
146+
*/
147+
private Map<String, Map<String, String>> buildParams(Map<String, String> uriParameters) {
148+
Map<String, Map<String, String>> params = new HashMap<>();
149+
150+
params.put("uriParameters", uriParameters != null ? uriParameters : new HashMap<>());
151+
152+
Map<String, String> headers = new HashMap<>();
153+
String apiKey = getApiKey();
154+
if (apiKey != null) {
155+
headers.put("Authorization", "Bearer " + apiKey);
156+
} else {
157+
log.warn("CORE api key is not configured (property core.apiKey). Requests may fail with 401/403.");
158+
}
159+
params.put("headers", headers);
160+
161+
return params;
162+
}
163+
164+
private String buildWorksSearchUrl() {
165+
return getBaseUrl() + "/search/works";
166+
}
167+
168+
private String buildWorkByIdUrl(String identifier) {
169+
return getBaseUrl() + "/works/" + encodePathSegment(identifier);
170+
}
171+
172+
private String encodePathSegment(String segment) {
173+
try {
174+
return URLEncoder.encode(segment, StandardCharsets.UTF_8).replace("+", "%20");
175+
} catch (Exception e) {
176+
return segment;
177+
}
178+
}
179+
180+
private class SearchByQueryCallable implements Callable<List<ImportRecord>> {
181+
182+
private final Query query;
183+
184+
private SearchByQueryCallable(String queryString, Integer start, Integer count) {
185+
Query q = new Query();
186+
q.addParameter("query", StringUtils.trimToNull(queryString));
187+
q.addParameter("start", start);
188+
q.addParameter("count", count);
189+
this.query = q;
190+
}
191+
192+
private SearchByQueryCallable(Query query) {
193+
this.query = query != null ? query : new Query();
194+
}
195+
196+
@Override
197+
public List<ImportRecord> call() {
198+
List<ImportRecord> records = new ArrayList<>();
199+
200+
String raw = query.getParameterAsClass("query", String.class);
201+
String q = getID(raw);
202+
if (StringUtils.isBlank(q)) {
203+
q = StringUtils.trimToNull(raw);
204+
}
205+
if (q == null) {
206+
return records;
207+
}
208+
209+
Integer start = query.getParameterAsClass("start", Integer.class);
210+
Integer count = query.getParameterAsClass("count", Integer.class);
211+
212+
int offset = (start != null) ? Math.max(start, 0) : 0;
213+
int limit = (count != null && count > 0) ? count : getDefaultPageSize();
214+
215+
Map<String, String> uriParameters = new HashMap<>();
216+
uriParameters.put("q", q);
217+
uriParameters.put("limit", Integer.toString(limit));
218+
uriParameters.put("offset", Integer.toString(offset));
219+
220+
String url = buildWorksSearchUrl();
221+
String response = liveImportClient.executeHttpGetRequest(getTimeoutMs(), url, buildParams(uriParameters));
222+
223+
JsonNode jsonNode = convertStringJsonToJsonNode(response);
224+
if (jsonNode == null) {
225+
log.warn("CORE returned invalid JSON");
226+
return records;
227+
}
228+
229+
JsonNode resultsNode = jsonNode.at("/results");
230+
if (resultsNode != null && resultsNode.isArray()) {
231+
for (JsonNode workNode : resultsNode) {
232+
if (workNode == null || workNode.isMissingNode() || workNode.isNull()) {
233+
continue;
234+
}
235+
records.add(transformSourceRecords(workNode.toString()));
236+
}
237+
} else {
238+
log.debug("CORE: missing /results array in response");
239+
}
240+
241+
return records;
242+
}
243+
}
244+
245+
private class GetByIdCallable implements Callable<ImportRecord> {
246+
247+
private final Query query;
248+
249+
private GetByIdCallable(String recordId) {
250+
Query q = new Query();
251+
q.addParameter("id", StringUtils.trimToNull(recordId));
252+
this.query = q;
253+
}
254+
255+
private GetByIdCallable(Query query) {
256+
this.query = query != null ? query : new Query();
257+
}
258+
259+
@Override
260+
public ImportRecord call() throws Exception {
261+
262+
String id = query.getParameterAsClass("id", String.class);
263+
id = StringUtils.trimToNull(id);
264+
if (id == null) {
265+
return null;
266+
}
267+
268+
String url = buildWorkByIdUrl(id);
269+
270+
String response;
271+
try {
272+
response = liveImportClient.executeHttpGetRequest(getTimeoutMs(), url, buildParams(new HashMap<>()));
273+
} catch (RuntimeException e) {
274+
log.error("CORE getRecord failed for identifier={}", id, e);
275+
throw new MetadataSourceException("CORE getRecord failed for identifier=" + id, e);
276+
}
277+
278+
JsonNode jsonNode = convertStringJsonToJsonNode(response);
279+
if (jsonNode == null || jsonNode.isMissingNode() || jsonNode.isNull()) {
280+
log.warn("CORE /works/{id} returned invalid JSON for identifier={}", id);
281+
return null;
282+
}
283+
284+
return transformSourceRecords(jsonNode.toString());
285+
}
286+
}
287+
288+
private class CountByQueryCallable implements Callable<Integer> {
289+
290+
private final Query query;
291+
292+
private CountByQueryCallable(String queryString) {
293+
Query q = new Query();
294+
q.addParameter("query", StringUtils.trimToNull(queryString));
295+
this.query = q;
296+
}
297+
298+
private CountByQueryCallable(Query query) {
299+
this.query = query != null ? query : new Query();
300+
}
301+
302+
@Override
303+
public Integer call() throws Exception {
304+
String raw = query.getParameterAsClass("query", String.class);
305+
String q = getID(raw);
306+
if (StringUtils.isBlank(q)) {
307+
q = StringUtils.trimToNull(raw);
308+
}
309+
if (q == null) {
310+
return 0;
311+
}
312+
313+
Map<String, String> uriParameters = new HashMap<>();
314+
uriParameters.put("q", q);
315+
uriParameters.put("limit", "1");
316+
uriParameters.put("offset", "0");
317+
318+
String url = buildWorksSearchUrl();
319+
String responseString = liveImportClient.executeHttpGetRequest(getTimeoutMs(), url,
320+
buildParams(uriParameters));
321+
322+
JsonNode jsonNode = convertStringJsonToJsonNode(responseString);
323+
if (jsonNode == null) {
324+
log.warn("CORE returned invalid JSON");
325+
throw new MetadataSourceException("Could not read CORE source");
326+
}
327+
328+
JsonNode totalNode = jsonNode.at("/totalHits");
329+
if (totalNode != null && totalNode.isNumber()) {
330+
return totalNode.asInt();
331+
}
332+
if (totalNode != null && totalNode.isTextual()) {
333+
try {
334+
return Integer.parseInt(totalNode.asText());
335+
} catch (Exception e) {
336+
log.debug("Could not parse totalHits: {}", totalNode.asText(), e);
337+
}
338+
}
339+
return 0;
340+
}
341+
}
342+
343+
private JsonNode convertStringJsonToJsonNode(String json) {
344+
try {
345+
return objectMapper.readTree(json);
346+
} catch (JsonProcessingException e) {
347+
log.error("Unable to process json response.", e);
348+
}
349+
return null;
350+
}
351+
}

0 commit comments

Comments
 (0)