Skip to content

Commit 5a2685f

Browse files
committed
fix R-to-Java string conversion to handle CE_NATIVE correctly (#228); add DEBUG_ENCODING define support
1 parent 5a6a95e commit 5a2685f

1 file changed

Lines changed: 21 additions & 0 deletions

File tree

src/Rglue.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,11 @@ SEXP j2SEXP(JNIEnv *env, jobject o, int releaseLocal) {
160160
/* returns string from a CHARSXP making sure that the result is in UTF-8
161161
NOTE: this should NOT be used to create Java strings as they require UTF-16 natively */
162162
const char *rj_char_utf8(SEXP s) {
163+
#ifdef DEBUG_ENCODING
164+
fprintf(stderr, "rJava.rj_char_utf8, CE=%d: \"%s\"\n", (int)Rf_getCharCE(s), CHAR(s));
165+
// { const char *c0 = CHAR(s); while (*c0) fprintf(stderr, " %02x", (int)((unsigned char)*(c0++))); }
166+
// fprintf(stderr, "\n");
167+
#endif
163168
return (Rf_getCharCE(s) == CE_UTF8) ? CHAR(s) : Rf_reEnc(CHAR(s), getCharCE(s), CE_UTF8, 0); /* subst. invalid chars: 1=hex, 2=., 3=?, other=skip */
164169
}
165170

@@ -184,20 +189,32 @@ int rj_char_utf16(SEXP s, jchar **buf) {
184189
char *dst = (char*) js;
185190
int end_test = 1;
186191

192+
#ifdef DEBUG_ENCODING
193+
fprintf(stderr, "rJava.rj_char_utf16, CE=%d:", (int)ce_in);
194+
{ const char *c0 = c; while (*c0) fprintf(stderr, " %02x", (int)((unsigned char)*(c0++))); }
195+
fprintf(stderr, "\n");
196+
#endif
197+
187198
switch (ce_in) {
188199
#ifdef WIN32
189200
case CE_NATIVE:
201+
/* reEnc uses this, but translateCharUtf8 uses "" so let's go with ""
190202
sprintf(cpbuf, "CP%d", localeCP);
191203
ifrom = cpbuf;
204+
*/
192205
break;
193206
case CE_LATIN1: ifrom = "CP1252"; break;
194207
#else
208+
case CE_NATIVE: break; /* is already "" */
195209
case CE_LATIN1: ifrom = "latin1"; break;
196210
#endif
197211
default:
198212
ifrom = "UTF-8"; break;
199213
}
200214

215+
#ifdef DEBUG_ENCODING
216+
fprintf(stderr, " '%s' -> UTF-16: ", ifrom);
217+
#endif
201218
ih = Riconv_open(((char*)&end_test)[0] == 1 ? "UTF-16LE" : "UTF-16BE", ifrom);
202219
if(ih == (void *)(-1))
203220
Rf_error("Unable to start conversion to UTF-16");
@@ -215,6 +232,10 @@ int rj_char_utf16(SEXP s, jchar **buf) {
215232
}
216233
}
217234
Riconv_close(ih);
235+
#ifdef DEBUG_ENCODING
236+
{ const jchar *j = js; while (j < (const jchar*)dst) fprintf(stderr, " %04x", (unsigned int)*(j++)); }
237+
fprintf(stderr, "\n");
238+
#endif
218239
return dst - (char*) js;
219240
}
220241

0 commit comments

Comments
 (0)