@@ -3,6 +3,8 @@ Copyright © 2023-2025 François G. Dorais. All rights reserved.
33Released under Apache 2.0 license as described in the file LICENSE.
44-/
55
6+ import Std.Data.HashMap
7+
68/-- Low-level conversion from `UInt32` to `Char` (*unsafe* )
79
810 This function translates to a no-op in the compiler. However, it does not
@@ -1011,4 +1013,64 @@ def BidiClass.ofAbbrev! (abbr : String.Slice) : BidiClass :=
10111013instance : Repr BidiClass where
10121014 reprPrec bc _ := s! "Unicode.BidiClass.{ bc.toAbbrev} "
10131015
1016+ /-!
1017+ ## Scripts ##
1018+ -/
1019+
1020+ /-- Check if valid script identifier -/
1021+ @[inline]
1022+ def Script.isValid (c : UInt32) : Bool :=
1023+ let c0 := (c >>> 24 ).toUInt8
1024+ let c1 := (c >>> 16 ).toUInt8
1025+ let c2 := (c >>> 8 ).toUInt8
1026+ let c3 := c.toUInt8
1027+ (c0 ≤ 'Z' .toUInt8 && 'A' .toUInt8 ≤ c0)
1028+ && (c1 ≤ 'z' .toUInt8 && 'a' .toUInt8 ≤ c1)
1029+ && (c2 ≤ 'z' .toUInt8 && 'a' .toUInt8 ≤ c2)
1030+ && (c3 ≤ 'z' .toUInt8 && 'a' .toUInt8 ≤ c3)
1031+
1032+ /-- Script identifier type -/
1033+ structure Script where
1034+ code : UInt32
1035+ is_valid : Script.isValid code
1036+ deriving DecidableEq, Hashable
1037+
1038+ namespace Script
1039+
1040+ /-- Default value is `Zzzz` (`Unknown`) -/
1041+ instance : Inhabited Script where
1042+ default := {
1043+ code := (((('Z' .val <<< 8 ||| 'z' .val) <<< 8 ) ||| 'z' .val) <<< 8 ) ||| 'z' .val
1044+ is_valid := by decide
1045+ }
1046+
1047+ /-- String abbreviation of script -/
1048+ @ [extern "unicode_script_to_abbrev" ]
1049+ def toAbbrev : Script → String
1050+ | ⟨c, _⟩ =>
1051+ let c0 := Char.ofUInt8 (c >>> 24 ).toUInt8
1052+ let c1 := Char.ofUInt8 (c >>> 16 ).toUInt8
1053+ let c2 := Char.ofUInt8 (c >>> 8 ).toUInt8
1054+ let c3 := Char.ofUInt8 c.toUInt8
1055+ String.ofList [c0, c1, c2, c3]
1056+
1057+ @ [extern "unicode_script_of_abbrev" ]
1058+ private opaque ofAbbrevAux (abbr : String) : UInt32
1059+
1060+ /-- Get script from abbreviation -/
1061+ def ofAbbrev? (abbr : String.Slice) : Option Script :=
1062+ if abbr.utf8ByteSize = 4 then
1063+ let code := ofAbbrevAux abbr.toString
1064+ if h : Script.isValid code then
1065+ some ⟨code, h⟩
1066+ else
1067+ none
1068+ else
1069+ none
1070+
1071+ @ [inline, inherit_doc ofAbbrev?]
1072+ def ofAbbrev! (abbr : String.Slice) : Script := ofAbbrev? abbr |>.get!
1073+
1074+ end Script
1075+
10141076end Unicode
0 commit comments