Skip to content

Commit 05954ce

Browse files
authored
fix: update toolchain v4.30.0-rc2 (#139)
1 parent a816554 commit 05954ce

10 files changed

Lines changed: 169 additions & 1 deletion

File tree

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
zip -rq docs-${TAG_NAME}.zip doc doc-data
3838
3939
- name: Release
40-
uses: softprops/action-gh-release@v2
40+
uses: softprops/action-gh-release@v3
4141
with:
4242
files: |
4343
docs/docs-${{ github.ref_name }}.tar.gz

UnicodeBasic.lean

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,27 @@ namespace Unicode
5858
@[inline]
5959
def getName (char : Char) : String := lookupName char.val
6060

61+
/-!
62+
## Script ##
63+
-/
64+
65+
/-- Get character script
66+
67+
Unicode property: `Script`
68+
-/
69+
@[inline]
70+
def getScript (char : Char) : Script := lookupScript char.val
71+
72+
/-- Get script name
73+
74+
Returns `none` if the script code is unassigned.
75+
76+
Unicode property: `Script`
77+
-/
78+
@[inline]
79+
def getScriptName? (s : Script) : Option String :=
80+
lookupScriptName s |>.map toString
81+
6182
/-!
6283
## Bidirectional Properties ##
6384
-/

UnicodeBasic/TableLookup.lean

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ protected abbrev oMath : UInt64 := 0x800000000
2222
@[extern "unicode_prop_lookup"]
2323
protected opaque lookupProp (c : UInt32) : UInt64
2424

25+
@[extern "unicode_script_lookup"]
26+
protected opaque lookupScript (c : UInt32) : Script
27+
2528
end CLib
2629

2730
/-- Binary search -/
@@ -386,4 +389,22 @@ where
386389
str : String := include_str "../data/table/White_Space.txt"
387390
table : Thunk <| Array (UInt32 × UInt32) := parsePropTable str
388391

392+
/-- Get the script of a code point using lookup table
393+
394+
Unicode property: `Script` -/
395+
@[inline]
396+
def lookupScript (c : UInt32) : Script := CLib.lookupScript c
397+
398+
/-- Get the name of a script
399+
400+
Unicode property: `Script` -/
401+
def lookupScriptName (s : Script) : Option String.Slice :=
402+
let table := table.get
403+
if s.code < table[0]!.1 then none else
404+
match table[find s.code (fun i => table[i]!.1) 0 table.size.toUSize]! with
405+
| (c, v) => if s.code = c then some v else none
406+
where
407+
str : String := include_str "../data/table/Script_Name.txt"
408+
table : Thunk <| Array (UInt32 × String.Slice) := parseTable str fun _ n => n[0]!
409+
389410
end Unicode

UnicodeBasic/Types.lean

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ Copyright © 2023-2025 François G. Dorais. All rights reserved.
33
Released under Apache 2.0 license as described in the file LICENSE.
44
-/
55

6+
import Std.Data.HashMap
7+
68
/-- Low-level conversion from `UInt32` to `Char` (*unsafe*)
79
810
This function translates to a no-op in the compiler. However, it does not
@@ -1011,4 +1013,64 @@ def BidiClass.ofAbbrev! (abbr : String.Slice) : BidiClass :=
10111013
instance : Repr BidiClass where
10121014
reprPrec bc _ := s!"Unicode.BidiClass.{bc.toAbbrev}"
10131015

1016+
/-!
1017+
## Scripts ##
1018+
-/
1019+
1020+
/-- Check if valid script identifier -/
1021+
@[inline]
1022+
def Script.isValid (c : UInt32) : Bool :=
1023+
let c0 := (c >>> 24).toUInt8
1024+
let c1 := (c >>> 16).toUInt8
1025+
let c2 := (c >>> 8).toUInt8
1026+
let c3 := c.toUInt8
1027+
(c0 ≤ 'Z'.toUInt8 && 'A'.toUInt8 ≤ c0)
1028+
&& (c1 ≤ 'z'.toUInt8 && 'a'.toUInt8 ≤ c1)
1029+
&& (c2 ≤ 'z'.toUInt8 && 'a'.toUInt8 ≤ c2)
1030+
&& (c3 ≤ 'z'.toUInt8 && 'a'.toUInt8 ≤ c3)
1031+
1032+
/-- Script identifier type -/
1033+
structure Script where
1034+
code : UInt32
1035+
is_valid : Script.isValid code
1036+
deriving DecidableEq, Hashable
1037+
1038+
namespace Script
1039+
1040+
/-- Default value is `Zzzz` (`Unknown`) -/
1041+
instance : Inhabited Script where
1042+
default := {
1043+
code := (((('Z'.val <<< 8 ||| 'z'.val) <<< 8) ||| 'z'.val) <<< 8) ||| 'z'.val
1044+
is_valid := by decide
1045+
}
1046+
1047+
/-- String abbreviation of script -/
1048+
@[extern "unicode_script_to_abbrev"]
1049+
def toAbbrev : Script → String
1050+
| ⟨c, _⟩ =>
1051+
let c0 := Char.ofUInt8 (c >>> 24).toUInt8
1052+
let c1 := Char.ofUInt8 (c >>> 16).toUInt8
1053+
let c2 := Char.ofUInt8 (c >>> 8).toUInt8
1054+
let c3 := Char.ofUInt8 c.toUInt8
1055+
String.ofList [c0, c1, c2, c3]
1056+
1057+
@[extern "unicode_script_of_abbrev"]
1058+
private opaque ofAbbrevAux (abbr : String) : UInt32
1059+
1060+
/-- Get script from abbreviation -/
1061+
def ofAbbrev? (abbr : String.Slice) : Option Script :=
1062+
if abbr.utf8ByteSize = 4 then
1063+
let code := ofAbbrevAux abbr.toString
1064+
if h : Script.isValid code then
1065+
some ⟨code, h⟩
1066+
else
1067+
none
1068+
else
1069+
none
1070+
1071+
@[inline, inherit_doc ofAbbrev?]
1072+
def ofAbbrev! (abbr : String.Slice) : Script := ofAbbrev? abbr |>.get!
1073+
1074+
end Script
1075+
10141076
end Unicode

UnicodeCLib/basic.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#include <lean/lean.h>
2+
#ifdef _WIN32
3+
#include <winsock2.h>
4+
#else
25
#include <arpa/inet.h>
6+
#endif
37
#include "basic.h"
48

59
static inline int unicode_script_is_valid(uint32_t c) {

UnicodeCLib/basic.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ LEAN_EXPORT uint64_t unicode_prop_lookup(uint32_t c);
6969

7070
LEAN_EXPORT uint64_t unicode_case_lookup(uint32_t c);
7171

72+
LEAN_EXPORT uint32_t unicode_script_lookup(uint32_t c);
73+
74+
LEAN_EXPORT uint32_t unicode_script_of_abbrev(b_lean_obj_arg abbr);
75+
76+
LEAN_EXPORT lean_obj_res unicode_script_to_abbrev(uint32_t scr);
77+
7278
#ifdef __cplusplus
7379
}
7480
#endif

UnicodeData.lean

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ Released under Apache 2.0 license as described in the file LICENSE.
66
import UnicodeData.Aliases
77
import UnicodeData.Basic
88
import UnicodeData.PropList
9+
import UnicodeData.Scripts

lakefile.lean

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,5 @@ script updateUCD do downloadUCD [
6262
"PropList.txt",
6363
"PropertyAliases.txt",
6464
"PropertyValueAliases.txt",
65+
"Scripts.txt",
6566
]

makeCLib.lean

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,45 @@ static const unicode_data_t table[] = {"
191191
file.putStrLn s!"\{UINT32_C({c₀}),UINT32_C({c₁}),UINT64_C({d})},"
192192
file.putStrLn "};"
193193

194+
def scriptTable : Array (UInt32 × UInt32 × UInt64) :=
195+
let t := Scripts.data.toArray.flatMap fun (sc, t) =>
196+
let sc := Script.ofAbbrev! <| PropertyValueAliases.getShortName! "sc" sc
197+
t.map fun (c₀, c₁) => (c₀, c₁, sc.code.toUInt64)
198+
t.qsort fun (a, _) (b, _) => a < b
199+
200+
def makeScriptC : IO Unit :=
201+
IO.FS.withFile (clibDir / "script.c") .write fun file => do
202+
file.putStrLn s!"/* THIS IS A GENERATED FILE - DO NOT EDIT */
203+
#include \"basic.h\"
204+
205+
#define TABLE_SIZE {scriptTable.size}
206+
#define SCRIPT_UNKNOWN UINT32_C({Script.ofAbbrev! "Zzzz" |>.code})"
207+
file.putStrLn "
208+
static const unicode_data_t table[TABLE_SIZE];
209+
210+
uint32_t unicode_script_lookup(uint32_t c) {
211+
unicode_data_t v;
212+
if (c >= table[0].cmin) {
213+
unsigned int lo = 0;
214+
unsigned int hi = TABLE_SIZE;
215+
unsigned int i = (lo + hi)/2;
216+
while (i != lo) {
217+
if (c < table[i].cmin) hi = i;
218+
else lo = i;
219+
i = (lo + hi)/2;
220+
}
221+
if (c <= table[i].cmax) return (uint32_t)table[i].data;
222+
}
223+
return SCRIPT_UNKNOWN;
224+
}
225+
226+
static const unicode_data_t table[] = {"
227+
for (c₀, c₁, d) in scriptTable do
228+
file.putStrLn s!"\{UINT32_C({c₀}),UINT32_C({c₁}),UINT64_C({d})},"
229+
file.putStrLn "};"
230+
194231
def main : IO UInt32 := do
195232
makeCaseC
196233
makePropC
234+
makeScriptC
197235
return 0

makeTables.lean

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,12 @@ def mkWhiteSpace : Array (UInt32 × UInt32) :=
520520
| (c₀, some c₁) => (c₀, c₁)
521521
| (c₀, none) => (c₀, c₀)
522522

523+
def mkScriptName : Array (UInt32 × String) :=
524+
let t := PropertyAliases.getValues! "Script" |>.map fun name =>
525+
let s := Script.ofAbbrev! <| PropertyValueAliases.getShortName! "Script" name
526+
(s.code, name.toString)
527+
t.qsort fun (a, _) (b, _) => a < b
528+
523529
def main (args : List String) : IO UInt32 := do
524530
let args := if args != [] then args else [
525531
"Bidi_Class",
@@ -530,6 +536,7 @@ def main (args : List String) : IO UInt32 := do
530536
"Default_Ignorable_Code_Point",
531537
"Name",
532538
"Numeric_Value",
539+
"Script_Name",
533540
"White_Space"]
534541
let tableDir : System.FilePath := "."/"data"/"table"
535542
IO.FS.createDirAll tableDir
@@ -767,6 +774,13 @@ def main (args : List String) : IO UInt32 := do
767774
else
768775
file.putStrLn <| toHexStringAux c₀ ++ ";" ++ toHexStringAux c₁
769776
IO.println s!"Size: {(statsProp table).1} + {(statsProp table).2}"
777+
| "Script_Name" =>
778+
IO.println s!"Generating table {arg}"
779+
let table := mkScriptName
780+
IO.FS.withFile (tableDir/(arg ++ ".txt")) .write fun file => do
781+
for (c, name) in table do
782+
file.putStrLn <| toHexStringAux c ++ ";" ++ name
783+
IO.println s!"Size: {table.size}"
770784
| "Titlecase" =>
771785
IO.println s!"Generating table {arg}"
772786
let table ← mkTitlecase

0 commit comments

Comments
 (0)