Skip to content

Commit cb46332

Browse files
convert to a generate way to get bibtex informations
1 parent a0e2b62 commit cb46332

6 files changed

Lines changed: 300 additions & 135 deletions

File tree

csv2bib/src/main/java/jacksonpradolima/csv2bib/Run.java

Lines changed: 46 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -17,134 +17,89 @@
1717
package jacksonpradolima.csv2bib;
1818

1919
import com.beust.jcommander.JCommander;
20+
import jacksonpradolima.csv2bib.utils.BibTex;
2021
import jacksonpradolima.csv2bib.utils.CsvReader;
21-
import java.io.BufferedReader;
22+
import jacksonpradolima.csv2bib.utils.DigitalLibraries;
2223
import java.io.IOException;
2324
import java.io.Reader;
2425
import java.io.UncheckedIOException;
25-
import java.net.MalformedURLException;
26-
import java.net.URL;
27-
import java.net.URLConnection;
2826
import java.nio.charset.Charset;
2927
import java.nio.charset.StandardCharsets;
3028
import java.nio.file.Files;
3129
import java.nio.file.Path;
3230
import java.nio.file.Paths;
33-
import java.util.ArrayList;
34-
import java.util.Collections;
3531
import java.util.List;
36-
import java.util.concurrent.TimeUnit;
37-
import me.tongfei.progressbar.ProgressBar;
38-
import me.tongfei.progressbar.ProgressBarStyle;
32+
import java.util.stream.Collectors;
3933
import org.apache.commons.io.FilenameUtils;
4034
import org.slf4j.Logger;
4135
import org.slf4j.LoggerFactory;
4236

4337
/**
44-
* Class to parse csv file from SpringerLink to bib file.
38+
* Class to parse csv file to bib file using DOI as key to get bibtex
39+
* informations.
4540
*
4641
* @author Jackson A. Prado Lima <jacksonpradolima at gmail.com>
4742
*/
4843
public class Run {
4944

45+
/**
46+
* Logger
47+
*/
5048
private static final Logger logger = LoggerFactory.getLogger(Run.class);
5149

52-
private static final String url_export_citation = "https://citation-needed.springer.com/v2/references/%s?format=bibtex&flavour=citation";
53-
private static final Charset ENCODING = StandardCharsets.UTF_8;
50+
/**
51+
* The output file is generated with the same name that the input file, but
52+
* with the extension .bib
53+
*/
5454
private static String output_file = "";
5555

5656
public static void main(String[] args) throws IOException, InterruptedException {
57-
logger.info("Converting .csv to .bib");
58-
RunCommands jct = new RunCommands();
59-
JCommander jCommander = new JCommander(jct, args);
60-
61-
if (jct.help) {
62-
jCommander.usage();
63-
return;
64-
}
65-
66-
CsvReader csvReader = createCsvReader(jct.fileIn);
67-
List<String> header = csvReader.readHeader();
68-
List<List<String>> records = csvReader.readRecords();
69-
70-
if (records.isEmpty()) {
71-
logger.error("Empty file.");
72-
System.exit(-1);
73-
}
74-
75-
logger.info("Found: " + records.size() + " citations");
76-
77-
List<String> urls = new ArrayList();
78-
79-
int indexDoi = header.indexOf("Item DOI");
57+
try {
58+
RunCommands jct = new RunCommands();
59+
JCommander jCommander = new JCommander(jct, args);
8060

81-
records.stream().forEach(record -> {
82-
try {
83-
String url = String.format(url_export_citation, record.get(indexDoi)).replace("\"", "");
84-
if (!url.isEmpty()) {
85-
urls.add(url);
86-
}
87-
} catch (Exception e) {
88-
logger.error("Error in get citation from url.", e);
61+
if (jct.help) {
62+
jCommander.usage();
63+
return;
8964
}
90-
});
9165

92-
if (urls.isEmpty()) {
93-
logger.error("Urls not found.");
94-
System.exit(-1);
95-
}
66+
// Read the csv file
67+
List<List<String>> records = createCsvReader(jct.fileIn).readRecords(jct.header);
9668

97-
try {
98-
List<String> bibs = getBibText(urls);
99-
logger.info("Generating .bib file");
100-
Path path = Paths.get(output_file, new String[0]);
101-
Files.write(path, bibs, ENCODING, new java.nio.file.OpenOption[0]);
102-
logger.info("The file was generated with successfull!");
103-
} catch (IOException e) {
104-
logger.info("Error in to generate .bib file.", e);
105-
}
106-
}
69+
if (records.isEmpty()) {
70+
throw new Exception("Empty file.");
71+
}
10772

108-
static List<String> getBibText(List<String> urls) {
109-
ProgressBar pb = new ProgressBar("[csv2bib]", urls.size(), 1000, System.out, ProgressBarStyle.UNICODE_BLOCK)
110-
.start()
111-
.maxHint(urls.size());
73+
DigitalLibraries digitalLibray = DigitalLibraries.getEnum(jct.digitalLibrary);
11274

113-
List<String> bibs = new ArrayList();
75+
// Generate the urls
76+
List<String> urls = records.parallelStream()
77+
.map(m -> String.format(digitalLibray.getUrl(), m.get(jct.doiIndex)).replace("\"", ""))
78+
.collect(Collectors.toList());
11479

115-
URL uri;
116-
for (String strUrl : urls) {
80+
if (urls.isEmpty()) {
81+
throw new Exception("Urls not found.");
82+
}
83+
11784
try {
118-
uri = new URL(strUrl);
119-
URLConnection ec = uri.openConnection();
120-
121-
try (BufferedReader in = new BufferedReader(new java.io.InputStreamReader(ec.getInputStream(), ENCODING))) {
122-
123-
StringBuilder a = new StringBuilder();
124-
String inputLine;
125-
while ((inputLine = in.readLine()) != null) {
126-
a.append(inputLine).append("\n");
127-
}
128-
logger.debug(a.toString());
129-
bibs.add(a.toString());
130-
} catch (IOException ex) {
131-
throw ex;
132-
}
133-
134-
pb.step();
135-
pb.setExtraMessage("Getting BibText informations...");
136-
} catch (MalformedURLException ex) {
137-
logger.error("Invalid url: " + strUrl, ex);
138-
} catch (IOException ex) {
139-
logger.error("Error to get file at " + strUrl, ex);
85+
// Get bibtex informations and generate bib file
86+
Files.write(Paths.get(output_file), BibTex.getBibTextFromDigitalLibrary(urls), StandardCharsets.UTF_8);
87+
} catch (IOException e) {
88+
throw new Exception("Error in to generate .bib file.", e);
14089
}
90+
}catch(UncheckedIOException e){
91+
logger.error("Error when tried to read the csv file.", e);
92+
} catch (Exception ex) {
93+
logger.error(ex.getMessage());
14194
}
142-
143-
pb.stop();
144-
145-
return bibs;
14695
}
14796

97+
/**
98+
* Wrapper to read a csv
99+
*
100+
* @param file
101+
* @return
102+
*/
148103
static CsvReader createCsvReader(String file) {
149104
try {
150105
Path path = Paths.get(file);
@@ -156,30 +111,4 @@ static CsvReader createCsvReader(String file) {
156111
throw new UncheckedIOException(e);
157112
}
158113
}
159-
160-
private static void printProgress(long startTime, long total, long current) {
161-
long eta = current == 0 ? 0
162-
: (total - current) * (System.currentTimeMillis() - startTime) / current;
163-
164-
String etaHms = current == 0 ? "N/A"
165-
: String.format("%02d:%02d:%02d", TimeUnit.MILLISECONDS.toHours(eta),
166-
TimeUnit.MILLISECONDS.toMinutes(eta) % TimeUnit.HOURS.toMinutes(1),
167-
TimeUnit.MILLISECONDS.toSeconds(eta) % TimeUnit.MINUTES.toSeconds(1));
168-
169-
StringBuilder string = new StringBuilder(140);
170-
int percent = (int) (current * 100 / total);
171-
string
172-
.append('\r')
173-
.append(String.join("", Collections.nCopies(percent == 0 ? 2 : 2 - (int) (Math.log10(percent)), " ")))
174-
.append(String.format(" %d%% [", percent))
175-
.append(String.join("", Collections.nCopies(percent, "=")))
176-
.append('>')
177-
.append(String.join("", Collections.nCopies(100 - percent, " ")))
178-
.append(']')
179-
.append(String.join("", Collections.nCopies((int) (Math.log10(total)) - (int) (Math.log10(current)), " ")))
180-
.append(String.format(" %d/%d, ETA: %s", current, total, etaHms));
181-
182-
System.out.print(string);
183-
}
184-
185114
}

csv2bib/src/main/java/jacksonpradolima/csv2bib/RunCommands.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@ public class RunCommands {
2929
@Parameter(names = {"-help", "-h"}, help = true)
3030
public boolean help;
3131

32-
@Parameter(names = {"-fileIn", "-fi"}, description = "Input File")
32+
@Parameter(names = {"-fileIn", "-fi"}, description = "Input File", required = true)
3333
public String fileIn;
34+
35+
@Parameter(names = {"-dl"}, description = "Digital Library", required = true)
36+
public String digitalLibrary;
37+
38+
@Parameter(names = {"-doiIndex"}, description = "DOI index", required = true)
39+
public int doiIndex;
40+
41+
@Parameter(names = {"-header"}, description = "Contains header?", arity = 1)
42+
public boolean header;
3443
}
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
/*
2+
* Copyright (C) 2017 Jackson A. Prado Lima <jacksonpradolima at gmail.com>
3+
*
4+
* This program is free software: you can redistribute it and/or modify
5+
* it under the terms of the GNU General Public License as published by
6+
* the Free Software Foundation, either version 3 of the License, or
7+
* (at your option) any later version.
8+
*
9+
* This program is distributed in the hope that it will be useful,
10+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
* GNU General Public License for more details.
13+
*
14+
* You should have received a copy of the GNU General Public License
15+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
16+
*/
17+
package jacksonpradolima.csv2bib.utils;
18+
19+
import java.io.BufferedReader;
20+
import java.io.IOException;
21+
import java.io.InputStreamReader;
22+
import java.net.MalformedURLException;
23+
import java.net.URL;
24+
import java.net.URLConnection;
25+
import java.nio.charset.Charset;
26+
import java.nio.charset.StandardCharsets;
27+
import java.util.ArrayList;
28+
import java.util.List;
29+
import me.tongfei.progressbar.ProgressBar;
30+
import me.tongfei.progressbar.ProgressBarStyle;
31+
import org.slf4j.Logger;
32+
import org.slf4j.LoggerFactory;
33+
34+
/**
35+
* Wrapper to get bibtex informations from urls
36+
* @author Jackson A. Prado Lima <jacksonpradolima at gmail.com>
37+
*/
38+
public class BibTex {
39+
40+
/**
41+
* Logger
42+
*/
43+
private static final Logger logger = LoggerFactory.getLogger(BibTex.class);
44+
45+
/**
46+
* Encoding
47+
*/
48+
private static final Charset ENCODING = StandardCharsets.UTF_8;
49+
50+
/**
51+
* Get bibtex from urls. It was adapted from:
52+
* https://www.crossref.org/labs/resolving-citations-we-dont-need-no-stinkin-parser/
53+
*
54+
* @param urls
55+
* @return List of bibtex
56+
*/
57+
public static List<String> getBibTextFromCrossref(List<String> urls) {
58+
ProgressBar pb = new ProgressBar("[csv2bib]", urls.size(), 1000, System.out, ProgressBarStyle.UNICODE_BLOCK)
59+
.start()
60+
.maxHint(urls.size());
61+
62+
List<String> bibs = new ArrayList();
63+
64+
urls.parallelStream().forEach(strUrl -> {
65+
try {
66+
Process process = new ProcessBuilder("curl", "-LH", "Accept: application/x-bibtex; style=bibtex", strUrl)
67+
.redirectErrorStream(true)
68+
.start();
69+
70+
try (BufferedReader in = new BufferedReader(new InputStreamReader(process.getInputStream(), ENCODING))) {
71+
72+
StringBuilder bibtex = new StringBuilder();
73+
String inputLine;
74+
75+
boolean startedBibtex = false; // A flag to know when the bibtex starts
76+
while ((inputLine = in.readLine()) != null) {
77+
if (!startedBibtex) {
78+
if (inputLine.startsWith("@")) {
79+
startedBibtex = true;
80+
}
81+
}
82+
83+
if (startedBibtex) {
84+
bibtex.append(inputLine).append("\n");
85+
}
86+
}
87+
88+
bibs.add(bibtex.toString());
89+
} catch (IOException ex) {
90+
throw ex;
91+
}
92+
93+
pb.step();
94+
pb.setExtraMessage("Getting BibText informations...");
95+
96+
} catch (IOException e) {
97+
logger.error("Error to get file at " + strUrl, e);
98+
}
99+
});
100+
101+
pb.stop();
102+
103+
return bibs;
104+
}
105+
106+
/**
107+
* Get bibtex direct from Digital Libraries urls
108+
*
109+
* @param urls
110+
* @return List of bibtex
111+
*/
112+
public static List<String> getBibTextFromDigitalLibrary(List<String> urls) {
113+
ProgressBar pb = new ProgressBar("[csv2bib]", urls.size(), 1000, System.out, ProgressBarStyle.UNICODE_BLOCK)
114+
.start()
115+
.maxHint(urls.size());
116+
117+
List<String> bibs = new ArrayList();
118+
119+
URL uri;
120+
for (String strUrl : urls) {
121+
try {
122+
uri = new URL(strUrl);
123+
URLConnection ec = uri.openConnection();
124+
125+
try (BufferedReader in = new BufferedReader(new java.io.InputStreamReader(ec.getInputStream(), ENCODING))) {
126+
127+
StringBuilder a = new StringBuilder();
128+
String inputLine;
129+
while ((inputLine = in.readLine()) != null) {
130+
a.append(inputLine).append("\n");
131+
}
132+
logger.debug(a.toString());
133+
bibs.add(a.toString());
134+
} catch (IOException ex) {
135+
throw ex;
136+
}
137+
138+
pb.step();
139+
pb.setExtraMessage("Getting BibText informations...");
140+
} catch (MalformedURLException ex) {
141+
logger.error("Invalid url: " + strUrl, ex);
142+
} catch (IOException ex) {
143+
logger.error("Error to get file at " + strUrl, ex);
144+
}
145+
}
146+
147+
pb.stop();
148+
149+
return bibs;
150+
}
151+
}

0 commit comments

Comments
 (0)