Skip to content

Commit ade31e7

Browse files
committed
adding missing char.lua
1 parent 8411ad7 commit ade31e7

1 file changed

Lines changed: 50 additions & 0 deletions

File tree

  • wiktionary_pron/lua_modules/string
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
local char = string.char
2+
3+
local concat = table.concat
4+
local tonumber = tonumber
5+
6+
local function err(cp)
7+
error("Codepoint " .. cp .. " is out of range: codepoints must be between 0x0 and 0x10FFFF.", 2)
8+
end
9+
10+
local function utf8_char(cp)
11+
cp = tonumber(cp)
12+
if cp < 0 then
13+
err("-0x" .. ("%X"):format(-cp + 1))
14+
elseif cp < 0x80 then
15+
return char(cp)
16+
elseif cp < 0x800 then
17+
return char(
18+
0xC0 + cp / 0x40,
19+
0x80 + cp % 0x40
20+
)
21+
elseif cp < 0x10000 then
22+
if cp >= 0xD800 and cp < 0xE000 then
23+
return "?" -- mw.ustring.char returns "?" for surrogates.
24+
end
25+
return char(
26+
0xE0 + cp / 0x1000,
27+
0x80 + cp / 0x40 % 0x40,
28+
0x80 + cp % 0x40
29+
)
30+
elseif cp < 0x110000 then
31+
return char(
32+
0xF0 + cp / 0x40000,
33+
0x80 + cp / 0x1000 % 0x40,
34+
0x80 + cp / 0x40 % 0x40,
35+
0x80 + cp % 0x40
36+
)
37+
end
38+
err("0x" .. ("%X"):format(cp))
39+
end
40+
41+
return function(cp, ...)
42+
if ... == nil then
43+
return utf8_char(cp)
44+
end
45+
local ret = { cp, ... }
46+
for i = 1, #ret do
47+
ret[i] = utf8_char(ret[i])
48+
end
49+
return concat(ret)
50+
end

0 commit comments

Comments
 (0)