Skip to content

Commit 5de4b93

Browse files
committed
perf: table compression
1 parent a8c8b01 commit 5de4b93

3 files changed

Lines changed: 2356 additions & 2391 deletions

File tree

UnicodeBasic/TableLookup.lean

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -177,18 +177,20 @@ def lookupGC (c : UInt32) : GC :=
177177
match table[find c (fun i => table[i]!.1) 0 table.size.toUSize]! with
178178
| (_, v, gc) =>
179179
if c ≤ v then
180-
if gc == .LC then
181-
if c &&& 1 == 0 then .Lu else .Ll
182-
else if gc == .PG then
183-
if c &&& 1 == 0 then .Ps else .Pe
184-
else if gc == .PQ then
185-
if c &&& 1 == 0 then .Pi else .Pf
180+
let pb : UInt32 := gc >>> 31
181+
let gc : UInt32 := gc &&& 0x7FFFFFFF
182+
if gc == GC.LC then
183+
if (c &&& 1) == pb then .Lu else .Ll
184+
else if gc == GC.PG then
185+
if (c &&& 1) == pb then .Ps else .Pe
186+
else if gc == GC.PQ then
187+
if (c &&& 1) == pb then .Pi else .Pf
186188
else gc
187189
else .Cn
188190
where
189191
str : String := include_str "../data/table/General_Category.txt"
190-
table : Thunk <| Array (UInt32 × UInt32 × GC) :=
191-
parseDataTable str fun _ _ x => GC.ofAbbrev! x[0]!
192+
table : Thunk <| Array (UInt32 × UInt32 × UInt32) :=
193+
parseDataTable str fun _ _ x => UInt32.ofNat x[0]!.toNat?.get!
192194

193195
set_option linter.deprecated false in
194196
@[deprecated Unicode.lookupGC (since := "v1.3.0")]
@@ -290,6 +292,19 @@ where
290292
| _ => panic! "invalid table data"
291293
else .numeric (-4) none
292294

295+
/-- Get other properties using lookup table
296+
297+
Unicode properties: `Other_Alphabetic`, `Other_Lowercase`, `Other_Uppercase`, `Other_Math` -/
298+
def lookupOther (c : UInt32) : UInt32 :=
299+
let table := table.get
300+
if c < table[0]!.1 then 0 else
301+
match table[find c (fun i => table[i]!.1) 0 table.size.toUSize]! with
302+
| (_, v, op) => if c ≤ v then UInt32.ofNat op else 0
303+
where
304+
str : String := include_str "../data/table/Other.txt"
305+
table : Thunk <| Array (UInt32 × UInt32 × Nat) :=
306+
parseDataTable str fun _ _ x => x[0]!.toNat?.get!
307+
293308
/-! Properties -/
294309

295310
/-- Check if code point is alphabetic using lookup table

0 commit comments

Comments
 (0)