Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import org.apache.commons.io.IOUtils;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
Expand All @@ -25,7 +26,10 @@ public class CharsetSupport {
private static final String[][] CHARSET_FALLBACK_MAP = new String[][] {
// Some Android versions don't support KOI8-U
{"koi8-u", "koi8-r"},
{"iso-2022-jp-[\\d]+", "iso-2022-jp"}
{"iso-2022-jp-[\\d]+", "iso-2022-jp"},
// EUC-JP aliases that some mailers use
{"x-euc-jp", "euc-jp"},
{"euc_jp", "euc-jp"},
};


Expand All @@ -34,8 +38,11 @@ static String fixupCharset(String charset, Message message) throws MessagingExce
charset = DEFAULT_CHARSET;

charset = charset.toLowerCase(Locale.US);
if (charset.equals("cp932"))
if (charset.equals("cp932") || charset.equals("shift-jis") || charset.equals("sjis") ||
charset.equals("ms932") || charset.equals("windows-31j") || charset.equals("x-sjis") ||
charset.equals("x-ms-cp932")) {
charset = SHIFT_JIS;
}

if (charset.equals(SHIFT_JIS) || charset.equals("iso-2022-jp")) {
String variant = JisSupport.getJisVariantFromMessage(message);
Expand All @@ -56,6 +63,15 @@ static String readToString(InputStream in, String charset) throws IOException {
charset = "x-" + charset.substring(2, charset.length() - 17) + "-shift_jis-2007";
}

// Android's ICU4J ISO-2022-JP decoder is stricter than the JVM decoder and can silently fail on
// QP-decoded byte sequences, causing ESC bytes to appear as invisible control characters while
// $B and (B escape sequence remnants become visible literal text.
// Always use Iso2022JpToShiftJisInputStream for reliable decoding across all Android versions.
if (charset.equals("iso-2022-jp")) {
in = new Iso2022JpToShiftJisInputStream(in);
charset = SHIFT_JIS;
}

// shift_jis variants are supported by Eclair and later.
if (JisSupport.isShiftJis(charset) && !Charset.isSupported(charset)) {
// If the JIS variant is iPhone, map the Unicode private use area in iPhone to the one in Android after
Expand Down Expand Up @@ -97,6 +113,21 @@ static String readToString(InputStream in, String charset) throws IOException {
charset = DEFAULT_CHARSET;
}

// When charset defaulted to US-ASCII (i.e., Content-Type had no charset parameter),
// auto-detect ISO-2022-JP by scanning for its 7-bit escape sequences (ESC$B or ESC$@).
// Japanese email clients (especially feature phones and carrier webmail) often omit the
// charset parameter for ISO-2022-JP bodies, causing garbled "$B..." output when decoded
// as US-ASCII.
if (charset.equalsIgnoreCase(DEFAULT_CHARSET)) {
byte[] bodyBytes = IOUtils.toByteArray(in);
if (hasIso2022JpEscapeSequence(bodyBytes)) {
in = new Iso2022JpToShiftJisInputStream(new ByteArrayInputStream(bodyBytes));
charset = SHIFT_JIS;
} else {
in = new ByteArrayInputStream(bodyBytes);
}
}

/*
* Convert and return as new String
*/
Expand All @@ -107,6 +138,25 @@ static String readToString(InputStream in, String charset) throws IOException {
return str;
}

/**
* Returns true if the byte array contains an ISO-2022-JP character-set designation
* escape sequence: ESC $ B (JIS X 0208-1983) or ESC $ @ (JIS X 0208-1978).
*
* Japanese email clients — especially feature phones and carrier webmail — often omit the
* charset parameter from Content-Type and send a raw 7-bit ISO-2022-JP body. Checking for
* these two sequences is sufficient to distinguish such content from ordinary US-ASCII text
* while keeping false-positive risk negligible.
*/
static boolean hasIso2022JpEscapeSequence(byte[] data) {
for (int i = 0; i < data.length - 2; i++) {
if (data[i] == 0x1B && data[i + 1] == '$'
&& (data[i + 2] == 'B' || data[i + 2] == '@')) {
return true;
}
}
return false;
}

private static String importStringFromIphone(String str) {
StringBuilder buff = new StringBuilder(str.length());
for (int i = 0; i < str.length(); i = str.offsetByCodePoints(i, 1)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,33 @@ private static String getJisVariantFromReceivedHeaders(Part message) {
}

private static String getAddressFromReceivedHeader(String receivedHeader) {
// Not implemented yet! Extract an address from the FOR clause of the given Received header.
return null;
// Extract an address from the FOR clause of the given Received header.
// Example: "... for <user@docomo.ne.jp>;" or "... for user@docomo.ne.jp;"
int forIndex = receivedHeader.toLowerCase(java.util.Locale.US).indexOf(" for ");
if (forIndex == -1) {
return null;
}
String afterFor = receivedHeader.substring(forIndex + 5).trim();
// Strip angle brackets if present
if (afterFor.startsWith("<")) {
int close = afterFor.indexOf('>');
if (close == -1) {
return null;
}
afterFor = afterFor.substring(1, close);
} else {
// Address ends at the first whitespace or semicolon
int end = afterFor.length();
for (int i = 0; i < afterFor.length(); i++) {
char c = afterFor.charAt(i);
if (c == ';' || c == ' ' || c == '\t' || c == '\r' || c == '\n') {
end = i;
break;
}
}
afterFor = afterFor.substring(0, end);
}
return afterFor.isEmpty() ? null : afterFor;
}

private static String getJisVariantFromFromHeaders(Message message) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import org.junit.Test;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;


public class CharsetSupportTest {
Expand Down Expand Up @@ -100,6 +102,32 @@ public void testFixupCharset() throws Exception {
assertEquals(expect, CharsetSupport.fixupCharset(charsetOnMail, message));
}

@Test
public void testFixupCharset_shiftJisAliases() throws Exception {
MimeMessage message = new MimeMessage();
assertEquals("shift_jis", CharsetSupport.fixupCharset("shift-jis", message));
assertEquals("shift_jis", CharsetSupport.fixupCharset("sjis", message));
assertEquals("shift_jis", CharsetSupport.fixupCharset("ms932", message));
assertEquals("shift_jis", CharsetSupport.fixupCharset("windows-31j", message));
assertEquals("shift_jis", CharsetSupport.fixupCharset("x-sjis", message));
assertEquals("shift_jis", CharsetSupport.fixupCharset("x-ms-cp932", message));
}

@Test
public void readToString_withXEucJpAlias_shouldFallBackToEucJp() throws IOException {
// "test" in ASCII — just verifies the alias is recognized without throwing
InputStream inputStream = new ByteArrayInputStream("test".getBytes());
String result = CharsetSupport.readToString(inputStream, "x-euc-jp");
assertEquals("test", result);
}

@Test
public void readToString_withEucJpUnderscoreAlias_shouldFallBackToEucJp() throws IOException {
InputStream inputStream = new ByteArrayInputStream("test".getBytes());
String result = CharsetSupport.readToString(inputStream, "euc_jp");
assertEquals("test", result);
}

@Test
public void readToString_withUnsupportedCharset_shouldFallBackToAscii() throws IOException {
InputStream inputStream = new ByteArrayInputStream("input".getBytes());
Expand All @@ -119,4 +147,42 @@ public void readToString_withInvalidCharset_shouldFallBackToAscii() throws IOExc

assertEquals("input", result);
}

// hasIso2022JpEscapeSequence

@Test
public void hasIso2022JpEscapeSequence_withEscDollarB_returnsTrue() {
byte[] data = {0x1B, '$', 'B', 0x25, 0x46};
assertTrue(CharsetSupport.hasIso2022JpEscapeSequence(data));
}

@Test
public void hasIso2022JpEscapeSequence_withEscDollarAt_returnsTrue() {
byte[] data = {0x1B, '$', '@', 0x25, 0x46};
assertTrue(CharsetSupport.hasIso2022JpEscapeSequence(data));
}

@Test
public void hasIso2022JpEscapeSequence_withNoEscSequence_returnsFalse() {
byte[] data = "Hello, world!".getBytes(java.nio.charset.StandardCharsets.US_ASCII);
assertFalse(CharsetSupport.hasIso2022JpEscapeSequence(data));
}

@Test
public void hasIso2022JpEscapeSequence_withEscOpenParenB_returnsFalse() {
// ESC ( B is the return-to-ASCII sequence; alone it should not trigger detection
byte[] data = {0x1B, '(', 'B'};
assertFalse(CharsetSupport.hasIso2022JpEscapeSequence(data));
}

@Test
public void hasIso2022JpEscapeSequence_withEmptyArray_returnsFalse() {
assertFalse(CharsetSupport.hasIso2022JpEscapeSequence(new byte[0]));
}

@Test
public void hasIso2022JpEscapeSequence_withTooShortForSequence_returnsFalse() {
byte[] data = {0x1B, '$'}; // only 2 bytes, need at least 3
assertFalse(CharsetSupport.hasIso2022JpEscapeSequence(data));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package com.fsck.k9.mail.internet;

import org.junit.Test;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;


public class JisSupportTest {

// getJisVariantFromMessage via From header

@Test
public void getJisVariantFromMessage_docomoSender_returnsDocomo() throws Exception {
MimeMessage message = new MimeMessage();
message.setHeader("From", "user@docomo.ne.jp");
assertEquals("docomo", JisSupport.getJisVariantFromMessage(message));
}

@Test
public void getJisVariantFromMessage_softbankSender_returnsSoftbank() throws Exception {
MimeMessage message = new MimeMessage();
message.setHeader("From", "user@softbank.ne.jp");
assertEquals("softbank", JisSupport.getJisVariantFromMessage(message));
}

@Test
public void getJisVariantFromMessage_kddiSender_returnsKddi() throws Exception {
MimeMessage message = new MimeMessage();
message.setHeader("From", "user@ezweb.ne.jp");
assertEquals("kddi", JisSupport.getJisVariantFromMessage(message));
}

@Test
public void getJisVariantFromMessage_unknownSender_returnsNull() throws Exception {
MimeMessage message = new MimeMessage();
message.setHeader("From", "user@example.com");
assertNull(JisSupport.getJisVariantFromMessage(message));
}

@Test
public void getJisVariantFromMessage_iPhoneMailer_returnsIphone() throws Exception {
MimeMessage message = new MimeMessage();
message.setHeader("From", "user@example.com");
message.setHeader("X-Mailer", "iPhone Mail A380");
assertEquals("iphone", JisSupport.getJisVariantFromMessage(message));
}

// getJisVariantFromMessage via Received header FOR clause

@Test
public void getJisVariantFromMessage_receivedForDocomoAngleBracket_returnsDocomo() throws Exception {
MimeMessage message = new MimeMessage();
message.setHeader("From", "user@example.com");
message.setHeader("Received", "from mail.example.com (mail.example.com [1.2.3.4]) for <user@docomo.ne.jp>;");
assertEquals("docomo", JisSupport.getJisVariantFromMessage(message));
}

@Test
public void getJisVariantFromMessage_receivedForDocomoNoAngleBracket_returnsDocomo() throws Exception {
MimeMessage message = new MimeMessage();
message.setHeader("From", "user@example.com");
message.setHeader("Received", "from mail.example.com (mail.example.com [1.2.3.4]) for user@docomo.ne.jp;");
assertEquals("docomo", JisSupport.getJisVariantFromMessage(message));
}

@Test
public void getJisVariantFromMessage_receivedForEzwebAddress_returnsKddi() throws Exception {
MimeMessage message = new MimeMessage();
message.setHeader("From", "user@example.com");
message.setHeader("Received", "from smtp.example.net for <user@ezweb.ne.jp>;");
assertEquals("kddi", JisSupport.getJisVariantFromMessage(message));
}

@Test
public void getJisVariantFromMessage_receivedForUnknownAddress_returnsNull() throws Exception {
MimeMessage message = new MimeMessage();
message.setHeader("From", "user@example.com");
message.setHeader("Received", "from smtp.example.net for <user@example.com>;");
assertNull(JisSupport.getJisVariantFromMessage(message));
}

@Test
public void getJisVariantFromMessage_receivedWithoutFor_returnsNull() throws Exception {
MimeMessage message = new MimeMessage();
message.setHeader("From", "user@example.com");
message.setHeader("Received", "from smtp.example.net by mx.example.com;");
assertNull(JisSupport.getJisVariantFromMessage(message));
}
}
Loading
Loading