Skip to content

Commit c2d7f3e

Browse files
feat: add option to disable OCR (sismics#768)
fixes sismics#344 refs sismics#767
1 parent 8f1ff56 commit c2d7f3e

16 files changed

Lines changed: 221 additions & 62 deletions

File tree

docs-core/src/main/java/com/sismics/docs/core/constant/ConfigType.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
package com.sismics.docs.core.constant;
22

33
/**
4-
* Configuration parameters.
4+
* Configuration parameters.
55
*
6-
* @author jtremeaux
6+
* @author jtremeaux
77
*/
88
public enum ConfigType {
99
/**
@@ -20,6 +20,11 @@ public enum ConfigType {
2020
*/
2121
GUEST_LOGIN,
2222

23+
/**
24+
* OCR enabled.
25+
*/
26+
OCR_ENABLED,
27+
2328
/**
2429
* Default language.
2530
*/

docs-core/src/main/java/com/sismics/docs/core/util/ConfigUtil.java

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@
88

99
/**
1010
* Configuration parameter utilities.
11-
*
12-
* @author jtremeaux
11+
*
1312
*/
1413
public class ConfigUtil {
1514
/**
1615
* Returns the textual value of a configuration parameter.
17-
*
16+
*
1817
* @param configType Type of the configuration parameter
1918
* @return Textual value of the configuration parameter
2019
* @throws IllegalStateException Configuration parameter undefined
@@ -30,7 +29,7 @@ public static String getConfigStringValue(ConfigType configType) {
3029

3130
/**
3231
* Returns the configuration resource bundle.
33-
*
32+
*
3433
* @return Resource bundle
3534
*/
3635
public static ResourceBundle getConfigBundle() {
@@ -39,14 +38,14 @@ public static ResourceBundle getConfigBundle() {
3938

4039
/**
4140
* Returns the integer value of a configuration parameter.
42-
*
41+
*
4342
* @param configType Type of the configuration parameter
4443
* @return Integer value of the configuration parameter
4544
* @throws IllegalStateException Configuration parameter undefined
4645
*/
4746
public static int getConfigIntegerValue(ConfigType configType) {
4847
String value = getConfigStringValue(configType);
49-
48+
5049
return Integer.parseInt(value);
5150
}
5251

@@ -65,14 +64,28 @@ public static long getConfigLongValue(ConfigType configType) {
6564

6665
/**
6766
* Returns the boolean value of a configuration parameter.
68-
*
67+
*
6968
* @param configType Type of the configuration parameter
7069
* @return Boolean value of the configuration parameter
7170
* @throws IllegalStateException Configuration parameter undefined
7271
*/
7372
public static boolean getConfigBooleanValue(ConfigType configType) {
7473
String value = getConfigStringValue(configType);
75-
7674
return Boolean.parseBoolean(value);
7775
}
76+
77+
/**
78+
* Returns the boolean value of a configuration parameter with a default value.
79+
*
80+
* @param configType Type of the configuration parameter
81+
* @param defaultValue Default value to return if the configuration parameter is undefined
82+
* @return Boolean value of the configuration parameter
83+
*/
84+
public static boolean getConfigBooleanValue(ConfigType configType, boolean defaultValue) {
85+
try {
86+
return getConfigBooleanValue(configType);
87+
} catch (IllegalStateException e) {
88+
return defaultValue;
89+
}
90+
}
7891
}

docs-core/src/main/java/com/sismics/docs/core/util/format/ImageFormatHandler.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import com.google.common.io.Closer;
44
import com.sismics.docs.core.constant.Constants;
55
import com.sismics.docs.core.util.FileUtil;
6+
import com.sismics.docs.core.util.ConfigUtil;
7+
import com.sismics.docs.core.constant.ConfigType;
68
import com.sismics.util.mime.MimeType;
79
import org.apache.pdfbox.io.MemoryUsageSetting;
810
import org.apache.pdfbox.pdmodel.PDDocument;
@@ -22,7 +24,6 @@
2224
/**
2325
* Image format handler.
2426
*
25-
* @author bgamard
2627
*/
2728
public class ImageFormatHandler implements FormatHandler {
2829
/**
@@ -45,7 +46,7 @@ public BufferedImage generateThumbnail(Path file) throws Exception {
4546

4647
@Override
4748
public String extractContent(String language, Path file) throws Exception {
48-
if (language == null) {
49+
if (language == null || !ConfigUtil.getConfigBooleanValue(ConfigType.OCR_ENABLED, true)) {
4950
return null;
5051
}
5152

docs-core/src/main/java/com/sismics/docs/core/util/format/PdfFormatHandler.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
import com.google.common.io.Closer;
44
import com.sismics.docs.core.util.FileUtil;
5+
import com.sismics.docs.core.util.ConfigUtil;
6+
import com.sismics.docs.core.constant.ConfigType;
57
import com.sismics.util.mime.MimeType;
68
import org.apache.pdfbox.io.MemoryUsageSetting;
79
import org.apache.pdfbox.multipdf.PDFMergerUtility;
@@ -53,7 +55,7 @@ public String extractContent(String language, Path file) {
5355
}
5456

5557
// No text content, try to OCR it
56-
if (language != null && content != null && content.trim().isEmpty()) {
58+
if (language != null && content != null && content.trim().isEmpty() && ConfigUtil.getConfigBooleanValue(ConfigType.OCR_ENABLED, true)) {
5759
StringBuilder sb = new StringBuilder();
5860
try (InputStream inputStream = Files.newInputStream(file);
5961
PDDocument pdfDocument = PDDocument.load(inputStream)) {

docs-core/src/main/java/com/sismics/util/jpa/DbOpenHelper.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ abstract class DbOpenHelper {
3939
private static final Logger log = LoggerFactory.getLogger(DbOpenHelper.class);
4040

4141
private final JdbcConnectionAccess jdbcConnectionAccess;
42-
42+
4343
private final List<Exception> exceptions = new ArrayList<>();
4444

4545
private Formatter formatter;
@@ -99,7 +99,7 @@ public void open() {
9999
onCreate();
100100
oldVersion = 0;
101101
}
102-
102+
103103
// Execute update script
104104
ResourceBundle configBundle = ConfigUtil.getConfigBundle();
105105
Integer currentVersion = Integer.parseInt(configBundle.getString("db.version"));
@@ -126,7 +126,7 @@ public void open() {
126126

127127
/**
128128
* Execute all upgrade scripts in ascending order for a given version.
129-
*
129+
*
130130
* @param version Version number
131131
* @throws Exception e
132132
*/
@@ -136,7 +136,7 @@ void executeAllScript(final int version) throws Exception {
136136
return name.matches("dbupdate-" + versionString + "-\\d+\\.sql");
137137
});
138138
Collections.sort(fileNameList);
139-
139+
140140
for (String fileName : fileNameList) {
141141
if (log.isInfoEnabled()) {
142142
log.info(MessageFormat.format("Executing script: {0}", fileName));
@@ -145,16 +145,16 @@ void executeAllScript(final int version) throws Exception {
145145
executeScript(is);
146146
}
147147
}
148-
148+
149149
/**
150150
* Execute a SQL script. All statements must be one line only.
151-
*
151+
*
152152
* @param inputScript Script to execute
153153
* @throws IOException e
154154
*/
155155
private void executeScript(InputStream inputScript) throws IOException {
156156
List<String> lines = CharStreams.readLines(new InputStreamReader(inputScript));
157-
157+
158158
for (String sql : lines) {
159159
if (Strings.isNullOrEmpty(sql) || sql.startsWith("--")) {
160160
continue;
@@ -178,21 +178,21 @@ private void executeScript(InputStream inputScript) throws IOException {
178178
}
179179

180180
public abstract void onCreate() throws Exception;
181-
181+
182182
public abstract void onUpgrade(int oldVersion, int newVersion) throws Exception;
183-
183+
184184
/**
185-
* Returns a List of all Exceptions which occured during the export.
185+
* Returns a List of all Exceptions which occurred during the export.
186186
*
187-
* @return A List containig the Exceptions occured during the export
187+
* @return A List containing the Exceptions occurred during the export
188188
*/
189189
public List<?> getExceptions() {
190190
return exceptions;
191191
}
192192

193193
/**
194194
* Format the output SQL statements.
195-
*
195+
*
196196
* @param format True to format
197197
*/
198198
public void setFormat(boolean format) {
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
db.version=30
1+
db.version=31
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
-- DBUPDATE-031-0.SQL
2+
3+
-- Insert a new setting for OCR recognition
4+
insert into T_CONFIG (CFG_ID_C, CFG_VALUE_C) values ('OCR_ENABLED', 'true');
5+
6+
-- Update the database version
7+
update T_CONFIG set CFG_VALUE_C = '31' where CFG_ID_C = 'DB_VERSION';
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
api.current_version=${project.version}
22
api.min_version=1.0
3-
db.version=30
3+
db.version=31

0 commit comments

Comments
 (0)