Skip to content

Commit cbc4390

Browse files
committed
Added separate functions UnicodeToWtf8() and Wtf8ToUnicode() to fix an old bug in Android
that older than M (API23), which can't process 4-byte UTF-8 sequences. How to reproduce this bug: just open a text file with such UTF-8 sequences and you will get a crash with the following error: "input is not valid Modified UTF-8: illegal start byte 0xf0". Reason: unhandled exception: 'input is not valid Modified UTF-8: illegal start byte 0xf0'. stack trace (for commit a1bf2be): _JNIEnv::NewStringUTF(char const*) // jni.h:841 jstring CRJNIEnv::toJavaString( const lString16 & str ) // cr3java.cpp:18 void CRStringField::set( const lString16& str) // cr3java.h:165 JNIEXPORT jobject JNICALL Java_org_coolreader_crengine_DocView_getPositionPropsInternal (JNIEnv * _env, jobject _this, jstring _path) // docview.cpp:1662 at org.coolreader.crengine.DocView.getPositionPropsInternal(Native Method) at org.coolreader.crengine.DocView.getPositionProps(DocView.java:270) at org.coolreader.crengine.ReaderView.preparePageImage(ReaderView.java:3248) at org.coolreader.crengine.ReaderView.access$1700(ReaderView.java:42) at org.coolreader.crengine.ReaderView$LoadDocumentTask.work(ReaderView.java:4829) at org.coolreader.crengine.Engine$TaskHandler.run(Engine.java:180) at android.os.Handler.handleCallback(Handler.java:725) at android.os.Handler.dispatchMessage(Handler.java:92) at android.os.Looper.loop(Looper.java:137) at org.coolreader.crengine.BackgroundThread.run(BackgroundThread.java:122) And solution: just don't pass such strings to env->NewStringUTF(), but pass WTF-8 strings. But this don't fix the crash when CoolReader finds files whose names contain such characters!
1 parent 6b83c1d commit cbc4390

File tree

10 files changed

+335
-14
lines changed

10 files changed

+335
-14
lines changed

android/jni/cr3engine.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,9 @@ void cr3androidFatalErrorHandler(int errorCode, const char * errorText )
552552
/// set fatal error handler
553553
void crSetFatalErrorHandler( lv_FatalErrorHandler_t * handler );
554554

555-
jboolean initInternal(JNIEnv * penv, jclass obj, jobjectArray fontArray) {
555+
jboolean initInternal(JNIEnv * penv, jclass obj, jobjectArray fontArray, jint sdk_int) {
556+
557+
CRJNIEnv::sdk_int = sdk_int;
556558

557559
CRJNIEnv env(penv);
558560

@@ -591,13 +593,13 @@ jboolean initInternal(JNIEnv * penv, jclass obj, jobjectArray fontArray) {
591593
/*
592594
* Class: org_coolreader_crengine_Engine
593595
* Method: initInternal
594-
* Signature: ([Ljava/lang/String;)Z
596+
* Signature: ([Ljava/lang/String;I)Z
595597
*/
596598
JNIEXPORT jboolean JNICALL Java_org_coolreader_crengine_Engine_initInternal
597-
(JNIEnv * penv, jclass obj, jobjectArray fontArray)
599+
(JNIEnv * penv, jclass obj, jobjectArray fontArray, jint sdk_int)
598600
{
599601
jboolean res = JNI_FALSE;
600-
COFFEE_TRY_JNI(penv, res = initInternal(penv, obj, fontArray));
602+
COFFEE_TRY_JNI(penv, res = initInternal(penv, obj, fontArray, sdk_int));
601603
return res;
602604
}
603605

@@ -775,7 +777,7 @@ JNIEXPORT jboolean JNICALL Java_org_coolreader_crengine_Engine_setKeyBacklightIn
775777

776778
static JNINativeMethod sEngineMethods[] = {
777779
/* name, signature, funcPtr */
778-
{"initInternal", "([Ljava/lang/String;)Z", (void*)Java_org_coolreader_crengine_Engine_initInternal},
780+
{"initInternal", "([Ljava/lang/String;I)Z", (void*)Java_org_coolreader_crengine_Engine_initInternal},
779781
{"uninitInternal", "()V", (void*)Java_org_coolreader_crengine_Engine_uninitInternal},
780782
{"getFontFaceListInternal", "()[Ljava/lang/String;", (void*)Java_org_coolreader_crengine_Engine_getFontFaceListInternal},
781783
{"setCacheDirectoryInternal", "(Ljava/lang/String;I)Z", (void*)Java_org_coolreader_crengine_Engine_setCacheDirectoryInternal},

android/jni/cr3java.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,35 @@
22

33
#include <dlfcn.h>
44

5+
// M is for Marshmallow!
6+
#define ANDROID_SDK_M 23
7+
8+
uint8_t CRJNIEnv::sdk_int = 0;
9+
510
lString16 CRJNIEnv::fromJavaString( jstring str )
611
{
712
if (!str)
813
return lString16::empty_str;
914
jboolean iscopy;
1015
const char * s = env->GetStringUTFChars(str, &iscopy);
11-
lString16 res(s);
16+
lString16 res;
17+
if (CRJNIEnv::sdk_int >= ANDROID_SDK_M)
18+
res = Utf8ToUnicode(s);
19+
else
20+
res = Wtf8ToUnicode(s);
1221
env->ReleaseStringUTFChars(str, s);
1322
return res;
1423
}
1524

1625
jstring CRJNIEnv::toJavaString( const lString16 & str )
1726
{
18-
return env->NewStringUTF(UnicodeToUtf8(str).c_str());
27+
if (CRJNIEnv::sdk_int >= ANDROID_SDK_M)
28+
return env->NewStringUTF(UnicodeToUtf8(str).c_str());
29+
// To support 4-byte UTF-8 sequence on Android older that 6.0 (API 23),
30+
// we encode characters with codes >= 0x10000 to WTF-8.
31+
// Otherwise, we have crash with following message:
32+
// "input is not valid Modified UTF-8: illegal start byte 0xf0"
33+
return env->NewStringUTF(UnicodeToWtf8(str).c_str());
1934
}
2035

2136
void CRJNIEnv::fromJavaStringArray( jobjectArray array, lString16Collection & dst )

android/jni/cr3java.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ class BitmapAccessorInterface {
5858
class CRJNIEnv {
5959
public:
6060
JNIEnv * env;
61+
static uint8_t sdk_int;
6162
CRJNIEnv(JNIEnv * pEnv) : env(pEnv) { }
6263
JNIEnv * operator -> () { return env; }
6364
lString16 fromJavaString( jstring str );

android/jni/org_coolreader_crengine_Engine.h

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

android/src/org/coolreader/crengine/Engine.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ public void initAgain() {
590590
}
591591
mFonts = findFonts();
592592
findExternalHyphDictionaries();
593-
if (!initInternal(mFonts)) {
593+
if (!initInternal(mFonts, DeviceInfo.getSDKLevel())) {
594594
log.i("Engine.initInternal failed!");
595595
throw new RuntimeException("Cannot initialize CREngine JNI");
596596
}
@@ -599,7 +599,7 @@ public void initAgain() {
599599
}
600600

601601
// Native functions
602-
private native static boolean initInternal(String[] fontList);
602+
private native static boolean initInternal(String[] fontList, int sdk_int);
603603

604604
private native static void uninitInternal();
605605

@@ -2022,7 +2022,7 @@ public MountPathCorrector getPathCorrector() {
20222022
}
20232023
mFonts = findFonts();
20242024
findExternalHyphDictionaries();
2025-
if (!initInternal(mFonts)) {
2025+
if (!initInternal(mFonts, DeviceInfo.getSDKLevel())) {
20262026
log.i("Engine.initInternal failed!");
20272027
throw new RuntimeException("Cannot initialize CREngine JNI");
20282028
}

crengine/Tools/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ add_subdirectory(HyphConv)
88
add_subdirectory(langstat)
99
add_subdirectory(langstat2)
1010
add_subdirectory(glyphcache_bench)
11+
add_subdirectory(wtf8-test)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
2+
set(SRC_LIST
3+
main.cpp
4+
)
5+
6+
set(crengine_part_SRC_LIST
7+
../../src/cp_stats.cpp
8+
../../src/crtxtenc.cpp
9+
../../src/lvmemman.cpp
10+
../../src/lvstream.cpp
11+
../../src/lvstring.cpp
12+
)
13+
14+
#add_definitions(-DBUILD_LITE=1)
15+
16+
if(UNIX)
17+
add_definitions(-DLINUX -D_LINUX)
18+
endif(UNIX)
19+
20+
if(WIN32)
21+
add_definitions(-DWIN32 -D_CONSOLE)
22+
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mconsole")
23+
endif(WIN32)
24+
25+
add_executable(wtf8-test ${SRC_LIST} ${crengine_part_SRC_LIST})
26+
target_link_libraries(wtf8-test ${STD_LIBS})

crengine/Tools/wtf8-test/main.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#include "lvstring.h"
2+
3+
#include <stdio.h>
4+
5+
lUInt32 uni_chars[] = {
6+
0x10000, // LINEAR B SYLLABLE B008 A
7+
0x10123, // AEGEAN NUMBER TWO THOUSAND
8+
0x10081, // LINEAR B IDEOGRAM B102 WOMAN
9+
0x1F600, // GRINNING FACE
10+
0x1F601, // GRINNING FACE WITH SMILING EYES
11+
0x1F602, // FACE WITH TEARS OF JOY
12+
0x1F603, // SMILING FACE WITH OPEN MOUTH
13+
0x1F604, // SMILING FACE WITH OPEN MOUTH AND SMILING EYES
14+
0x1F605, // SMILING FACE WITH OPEN MOUTH AND COLD SWEAT
15+
0x1F606, // SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES
16+
0x1F607, // SMILING FACE WITH HALO
17+
0x1F608, // SMILING FACE WITH HORNS
18+
0x1F609, // WINKING FACE
19+
0x1F60A, // SMILING FACE WITH SMILING EYES
20+
0x1F60B // FACE SAVOURING DELICIOUS FOOD
21+
};
22+
23+
int main(int argc, char* argv[])
24+
{
25+
lString16 src;
26+
for (size_t i = 0; i < sizeof(uni_chars)/sizeof(lUInt32); i++)
27+
{
28+
src.append(1, uni_chars[i]);
29+
}
30+
lString8 dst = UnicodeToUtf8(src);
31+
printf("UTF8: %s\n", dst.c_str());
32+
lString8 dstw = UnicodeToWtf8(src);
33+
printf("WTF8: %s\n", dstw.c_str());
34+
// Back to unicode
35+
lString16 str2 = Wtf8ToUnicode(dstw);
36+
// and compare...
37+
if (str2.compare(src) == 0)
38+
printf("OK, strings is equal.\n");
39+
else
40+
printf("Sorry, strings is NOT equal!\n");
41+
return 0;
42+
}

crengine/include/lvstring.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,6 +1030,10 @@ lString8 UnicodeToLocal( const lString16 & str );
10301030
lString8 UnicodeToUtf8( const lString16 & str );
10311031
/// converts wide unicode string to utf-8 string
10321032
lString8 UnicodeToUtf8(const lChar16 * s, int count);
1033+
/// converts wide unicode string to wtf-8 string
1034+
lString8 UnicodeToWtf8( const lString16 & str );
1035+
/// converts wide unicode string to wtf-8 string
1036+
lString8 UnicodeToWtf8(const lChar16 * s, int count);
10331037
/// converts unicode string to 8-bit string using specified conversion table
10341038
lString8 UnicodeTo8Bit( const lString16 & str, const lChar8 * * table );
10351039
/// converts 8-bit string to unicode string using specified conversion table for upper 128 characters
@@ -1044,6 +1048,12 @@ lString16 Utf8ToUnicode( const char * s );
10441048
lString16 Utf8ToUnicode( const char * s, int sz );
10451049
/// converts utf-8 string fragment to wide unicode string
10461050
void Utf8ToUnicode(const lUInt8 * src, int &srclen, lChar16 * dst, int &dstlen);
1051+
/// converts wtf-8 string to wide unicode string
1052+
lString16 Wtf8ToUnicode( const lString8 & str );
1053+
/// converts utf-8 c-string to wide unicode string
1054+
lString16 Wtf8ToUnicode( const char * s );
1055+
/// converts utf-8 string fragment to wide unicode string
1056+
lString16 Wtf8ToUnicode( const char * s, int sz );
10471057
/// decodes path like "file%20name" to "file name"
10481058
lString16 DecodeHTMLUrlString( lString16 s );
10491059
/// truncates string by specified size, appends ... if truncated, prefers to wrap whole words

0 commit comments

Comments
 (0)