Skip to content

Commit d7acfe2

Browse files
committed
Fix #850: Resolve UTF-8 encoding and IPC issues in Search plugin
Forces UTF-8 encoding in the C++ layer and rewrites the Java IOHelper to properly decode multi-byte UTF-8 characters.
1 parent 7e69ea2 commit d7acfe2

4 files changed

Lines changed: 11 additions & 9 deletions

File tree

plugins/search/indexer/indexer-java/src/cc/search/indexer/Context.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
import java.util.Map;
1515
import org.apache.lucene.document.Document;
1616
import org.apache.lucene.index.IndexReader;
17+
import java.io.InputStreamReader;
18+
import java.nio.charset.StandardCharsets;
1719

1820
/**
1921
* Analysis context.
@@ -60,7 +62,7 @@ public Context(String fileId_, File file_, String fileMimeType_)
6062
// Read content from a file stream
6163
try (FileInputStream stream = new FileInputStream(file_)) {
6264
String fileContent = IOHelper.readFullContent(
63-
IOHelper.getReaderForInput(stream));
65+
new InputStreamReader(stream, StandardCharsets.UTF_8));
6466

6567
// Get line informations
6668
try (Reader reader = new StringReader(fileContent)) {

plugins/search/indexer/indexer-java/src/cc/search/indexer/util/IOHelper.java

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,12 @@ public static InputStreamReader getReaderForInput(InputStream input_) throws IOE
5757
* @throws IOException
5858
*/
5959
public static String readFullContent(InputStreamReader reader_) throws IOException {
60-
ByteArrayOutputStream out = new ByteArrayOutputStream();
61-
62-
int b = reader_.read();
63-
while (b != -1) {
64-
out.write(b);
65-
b = reader_.read();
60+
StringBuilder out = new StringBuilder();
61+
char[] buffer = new char[4096];
62+
int read;
63+
while ((read = reader_.read(buffer)) != -1) {
64+
out.append(buffer, 0, read);
6665
}
67-
68-
return out.toString(reader_.getEncoding());
66+
return out.toString();
6967
}
7068
}

plugins/search/indexer/src/indexerprocess.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ IndexerProcess::IndexerProcess(
5959

6060
std::vector<const char*> execArguments {
6161
"java", JAVAMEMORYAMOUNT,
62+
"-Dfile.encoding=UTF-8",
6263
"-classpath", classpath.c_str(),
6364
"-Djava.util.logging.config.class=cc.search.common.config.LogConfigurator",
6465
"-Djava.util.logging.SimpleFormatter.format=%1$tY-%1$tm-%1$td %1$tT [%4$s] %5$s%6$s%n",

plugins/search/service/include/service/serviceprocess.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class ServiceProcess : public SearchServiceIf, public util::PipedProcess
6161
std::string classpath = compassRoot_ + "/lib/java/*";
6262

6363
::execlp("java", "java", "-server",
64+
"-Dfile.encoding=UTF-8",
6465
"-classpath", classpath.c_str(),
6566
//"-Xdebug", "-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8666",
6667
"-Djava.util.logging.config.class=cc.search.common.config.LogConfigurator",

0 commit comments

Comments
 (0)