Skip to content

Commit 585fdfe

Browse files
pcaimatzbot
authored andcommitted
[ruby/prism] add Windows-874 encoding
ruby/prism@0670dd3b9a
1 parent 229f6e5 commit 585fdfe

4 files changed

Lines changed: 39 additions & 0 deletions

File tree

prism/enc/pm_encoding.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,5 +210,6 @@ extern pm_encoding_t pm_encoding_windows_1256;
210210
extern pm_encoding_t pm_encoding_windows_1257;
211211
extern pm_encoding_t pm_encoding_windows_1258;
212212
extern pm_encoding_t pm_encoding_windows_31j;
213+
extern pm_encoding_t pm_encoding_windows_874;
213214

214215
#endif

prism/enc/pm_tables.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,6 +1104,30 @@ static uint8_t pm_encoding_windows_1258_table[256] = {
11041104
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
11051105
};
11061106

1107+
/**
1108+
* Each element of the following table contains a bitfield that indicates a
1109+
* piece of information about the corresponding windows-874 character.
1110+
*/
1111+
static uint8_t pm_encoding_windows_874_table[256] = {
1112+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
1113+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
1114+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
1115+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
1116+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
1117+
0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
1118+
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
1119+
0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
1120+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
1121+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
1122+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
1123+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
1124+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
1125+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
1126+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
1127+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
1128+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
1129+
};
1130+
11071131
/**
11081132
* Returns the size of the next character in the ASCII encoding. This basically
11091133
* means that if the top bit is not set, the character is 1 byte long.
@@ -1214,6 +1238,7 @@ PRISM_ENCODING_TABLE(windows_1255)
12141238
PRISM_ENCODING_TABLE(windows_1256)
12151239
PRISM_ENCODING_TABLE(windows_1257)
12161240
PRISM_ENCODING_TABLE(windows_1258)
1241+
PRISM_ENCODING_TABLE(windows_874)
12171242

12181243
#undef PRISM_ENCODING_TABLE
12191244

@@ -1686,3 +1711,13 @@ pm_encoding_t pm_encoding_windows_1258 = {
16861711
.isupper_char = pm_encoding_windows_1258_isupper_char,
16871712
.multibyte = false
16881713
};
1714+
1715+
/** Windows-874 */
1716+
pm_encoding_t pm_encoding_windows_874 = {
1717+
.name = "Windows-874",
1718+
.char_width = pm_encoding_single_char_width,
1719+
.alnum_char = pm_encoding_windows_874_alnum_char,
1720+
.alpha_char = pm_encoding_windows_874_alpha_char,
1721+
.isupper_char = pm_encoding_windows_874_isupper_char,
1722+
.multibyte = false
1723+
};

prism/prism.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6075,6 +6075,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
60756075
ENCODING1("CP860", pm_encoding_ibm860);
60766076
ENCODING1("CP861", pm_encoding_ibm861);
60776077
ENCODING1("CP862", pm_encoding_ibm862);
6078+
ENCODING1("CP874", pm_encoding_windows_874);
60786079
ENCODING1("CP878", pm_encoding_koi8_r);
60796080
ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j);
60806081
ENCODING1("CP936", pm_encoding_gbk);
@@ -6156,6 +6157,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
61566157
break;
61576158
case 'W': case 'w':
61586159
ENCODING1("Windows-31J", pm_encoding_windows_31j);
6160+
ENCODING1("Windows-874", pm_encoding_windows_874);
61596161
ENCODING1("Windows-1250", pm_encoding_windows_1250);
61606162
ENCODING1("Windows-1251", pm_encoding_windows_1251);
61616163
ENCODING1("Windows-1252", pm_encoding_windows_1252);

test/prism/encoding_test.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class EncodingTest < TestCase
5353
Encoding::Windows_1256 => 0x00...0x100,
5454
Encoding::Windows_1257 => 0x00...0x100,
5555
Encoding::Windows_1258 => 0x00...0x100,
56+
Encoding::Windows_874 => 0x00...0x100,
5657
Encoding::Big5 => 0x00...0x10000,
5758
Encoding::CP51932 => 0x00...0x10000,
5859
Encoding::GBK => 0x00...0x10000,

0 commit comments

Comments
 (0)