Skip to content

Commit 4c08c70

Browse files
committed
chore: restore history
1 parent 061d218 commit 4c08c70

11 files changed

Lines changed: 173 additions & 4 deletions

File tree

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
zip -rq docs-${TAG_NAME}.zip doc doc-data
3838
3939
- name: Release
40-
uses: softprops/action-gh-release@v2
40+
uses: softprops/action-gh-release@v3
4141
with:
4242
files: |
4343
docs/docs-${{ github.ref_name }}.tar.gz

.github/workflows/update-toolchain.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ jobs:
1414
- name: checkout
1515
uses: actions/checkout@v6
1616
with:
17+
ref: nightly-testing
1718
fetch-depth: 0
1819

1920
- name: install jq
@@ -22,7 +23,7 @@ jobs:
2223
- name: get toolchain tags
2324
id: toolchain-tag
2425
run: |
25-
REPO_TAG=$(sed 's/.*://1' lean-toolchain | tr -d '\n')
26+
REPO_TAG=$(git show main:lean-toolchain | cut -d':' -f2 | tr -d '\n')
2627
LEAN_TAG=$(curl -s "https://api.github.com/repos/leanprover/lean4/releases" | jq -r '.[0].tag_name' | tr -d '\n')
2728
echo "repo_tag=$REPO_TAG" >> $GITHUB_OUTPUT
2829
echo "lean_tag=$LEAN_TAG" >> $GITHUB_OUTPUT
@@ -32,8 +33,7 @@ jobs:
3233
if: ${{ steps.toolchain-tag.outputs.lean_tag != steps.toolchain-tag.outputs.repo_tag }}
3334
run: |
3435
git config user.name "${{ github.actor }}"
35-
git config user.email "${{ github.actor_id}}+${{ github.actor }}@users.noreply.github.com"
36-
git merge -X theirs origin/nightly-testing
36+
git config user.email "${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com"
3737
echo "leanprover/lean4:${{ steps.toolchain-tag.outputs.lean_tag }}" > lean-toolchain
3838
3939
- name: create pull request
@@ -43,5 +43,6 @@ jobs:
4343
token: ${{ secrets.UNICODE_BASIC_TOKEN }}
4444
commit-message: "chore: update toolchain ${{ steps.toolchain-tag.outputs.lean_tag }}"
4545
branch: update-toolchain-${{ steps.toolchain-tag.outputs.lean_tag }}
46+
base: main
4647
delete-branch: true
4748
title: "chore: update toolchain ${{ steps.toolchain-tag.outputs.lean_tag }}"

UnicodeBasic.lean

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,27 @@ namespace Unicode
5858
@[inline]
5959
def getName (char : Char) : String := lookupName char.val
6060

61+
/-!
62+
## Script ##
63+
-/
64+
65+
/-- Get character script
66+
67+
Unicode property: `Script`
68+
-/
69+
@[inline]
70+
def getScript (char : Char) : Script := lookupScript char.val
71+
72+
/-- Get script name
73+
74+
Returns `none` if the script code is unassigned.
75+
76+
Unicode property: `Script`
77+
-/
78+
@[inline]
79+
def getScriptName? (s : Script) : Option String :=
80+
lookupScriptName s |>.map toString
81+
6182
/-!
6283
## Bidirectional Properties ##
6384
-/

UnicodeBasic/TableLookup.lean

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ protected abbrev oMath : UInt64 := 0x800000000
2222
@[extern "unicode_prop_lookup"]
2323
protected opaque lookupProp (c : UInt32) : UInt64
2424

25+
@[extern "unicode_script_lookup"]
26+
protected opaque lookupScript (c : UInt32) : Script
27+
2528
end CLib
2629

2730
/-- Binary search -/
@@ -386,4 +389,22 @@ where
386389
str : String := include_str "../data/table/White_Space.txt"
387390
table : Thunk <| Array (UInt32 × UInt32) := parsePropTable str
388391

392+
/-- Get the script of a code point using lookup table
393+
394+
Unicode property: `Script` -/
395+
@[inline]
396+
def lookupScript (c : UInt32) : Script := CLib.lookupScript c
397+
398+
/-- Get the name of a script
399+
400+
Unicode property: `Script` -/
401+
def lookupScriptName (s : Script) : Option String.Slice :=
402+
let table := table.get
403+
if s.code < table[0]!.1 then none else
404+
match table[find s.code (fun i => table[i]!.1) 0 table.size.toUSize]! with
405+
| (c, v) => if s.code = c then some v else none
406+
where
407+
str : String := include_str "../data/table/Script_Name.txt"
408+
table : Thunk <| Array (UInt32 × String.Slice) := parseTable str fun _ n => n[0]!
409+
389410
end Unicode

UnicodeBasic/Types.lean

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ Copyright © 2023-2025 François G. Dorais. All rights reserved.
33
Released under Apache 2.0 license as described in the file LICENSE.
44
-/
55

6+
import Std.Data.HashMap
7+
68
/-- Low-level conversion from `UInt32` to `Char` (*unsafe*)
79
810
This function translates to a no-op in the compiler. However, it does not
@@ -1011,4 +1013,64 @@ def BidiClass.ofAbbrev! (abbr : String.Slice) : BidiClass :=
10111013
instance : Repr BidiClass where
10121014
reprPrec bc _ := s!"Unicode.BidiClass.{bc.toAbbrev}"
10131015

1016+
/-!
1017+
## Scripts ##
1018+
-/
1019+
1020+
/-- Check if valid script identifier -/
1021+
@[inline]
1022+
def Script.isValid (c : UInt32) : Bool :=
1023+
let c0 := (c >>> 24).toUInt8
1024+
let c1 := (c >>> 16).toUInt8
1025+
let c2 := (c >>> 8).toUInt8
1026+
let c3 := c.toUInt8
1027+
(c0 ≤ 'Z'.toUInt8 && 'A'.toUInt8 ≤ c0)
1028+
&& (c1 ≤ 'z'.toUInt8 && 'a'.toUInt8 ≤ c1)
1029+
&& (c2 ≤ 'z'.toUInt8 && 'a'.toUInt8 ≤ c2)
1030+
&& (c3 ≤ 'z'.toUInt8 && 'a'.toUInt8 ≤ c3)
1031+
1032+
/-- Script identifier type -/
1033+
structure Script where
1034+
code : UInt32
1035+
is_valid : Script.isValid code
1036+
deriving DecidableEq, Hashable
1037+
1038+
namespace Script
1039+
1040+
/-- Default value is `Zzzz` (`Unknown`) -/
1041+
instance : Inhabited Script where
1042+
default := {
1043+
code := (((('Z'.val <<< 8 ||| 'z'.val) <<< 8) ||| 'z'.val) <<< 8) ||| 'z'.val
1044+
is_valid := by decide
1045+
}
1046+
1047+
/-- String abbreviation of script -/
1048+
@[extern "unicode_script_to_abbrev"]
1049+
def toAbbrev : Script → String
1050+
| ⟨c, _⟩ =>
1051+
let c0 := Char.ofUInt8 (c >>> 24).toUInt8
1052+
let c1 := Char.ofUInt8 (c >>> 16).toUInt8
1053+
let c2 := Char.ofUInt8 (c >>> 8).toUInt8
1054+
let c3 := Char.ofUInt8 c.toUInt8
1055+
String.ofList [c0, c1, c2, c3]
1056+
1057+
@[extern "unicode_script_of_abbrev"]
1058+
private opaque ofAbbrevAux (abbr : String) : UInt32
1059+
1060+
/-- Get script from abbreviation -/
1061+
def ofAbbrev? (abbr : String.Slice) : Option Script :=
1062+
if abbr.utf8ByteSize = 4 then
1063+
let code := ofAbbrevAux abbr.toString
1064+
if h : Script.isValid code then
1065+
some ⟨code, h⟩
1066+
else
1067+
none
1068+
else
1069+
none
1070+
1071+
@[inline, inherit_doc ofAbbrev?]
1072+
def ofAbbrev! (abbr : String.Slice) : Script := ofAbbrev? abbr |>.get!
1073+
1074+
end Script
1075+
10141076
end Unicode

UnicodeCLib/basic.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#include <lean/lean.h>
2+
#ifdef _WIN32
3+
#include <winsock2.h>
4+
#else
25
#include <arpa/inet.h>
6+
#endif
37
#include "basic.h"
48

59
static inline int unicode_script_is_valid(uint32_t c) {

UnicodeCLib/basic.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ LEAN_EXPORT uint64_t unicode_prop_lookup(uint32_t c);
6969

7070
LEAN_EXPORT uint64_t unicode_case_lookup(uint32_t c);
7171

72+
LEAN_EXPORT uint32_t unicode_script_lookup(uint32_t c);
73+
74+
LEAN_EXPORT uint32_t unicode_script_of_abbrev(b_lean_obj_arg abbr);
75+
76+
LEAN_EXPORT lean_obj_res unicode_script_to_abbrev(uint32_t scr);
77+
7278
#ifdef __cplusplus
7379
}
7480
#endif

UnicodeData.lean

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ Released under Apache 2.0 license as described in the file LICENSE.
66
import UnicodeData.Aliases
77
import UnicodeData.Basic
88
import UnicodeData.PropList
9+
import UnicodeData.Scripts

lakefile.lean

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,5 @@ script updateUCD do downloadUCD [
6262
"PropList.txt",
6363
"PropertyAliases.txt",
6464
"PropertyValueAliases.txt",
65+
"Scripts.txt",
6566
]

makeCLib.lean

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,45 @@ static const unicode_data_t table[] = {"
191191
file.putStrLn s!"\{UINT32_C({c₀}),UINT32_C({c₁}),UINT64_C({d})},"
192192
file.putStrLn "};"
193193

194+
def scriptTable : Array (UInt32 × UInt32 × UInt64) :=
195+
let t := Scripts.data.toArray.flatMap fun (sc, t) =>
196+
let sc := Script.ofAbbrev! <| PropertyValueAliases.getShortName! "sc" sc
197+
t.map fun (c₀, c₁) => (c₀, c₁, sc.code.toUInt64)
198+
t.qsort fun (a, _) (b, _) => a < b
199+
200+
def makeScriptC : IO Unit :=
201+
IO.FS.withFile (clibDir / "script.c") .write fun file => do
202+
file.putStrLn s!"/* THIS IS A GENERATED FILE - DO NOT EDIT */
203+
#include \"basic.h\"
204+
205+
#define TABLE_SIZE {scriptTable.size}
206+
#define SCRIPT_UNKNOWN UINT32_C({Script.ofAbbrev! "Zzzz" |>.code})"
207+
file.putStrLn "
208+
static const unicode_data_t table[TABLE_SIZE];
209+
210+
uint32_t unicode_script_lookup(uint32_t c) {
211+
unicode_data_t v;
212+
if (c >= table[0].cmin) {
213+
unsigned int lo = 0;
214+
unsigned int hi = TABLE_SIZE;
215+
unsigned int i = (lo + hi)/2;
216+
while (i != lo) {
217+
if (c < table[i].cmin) hi = i;
218+
else lo = i;
219+
i = (lo + hi)/2;
220+
}
221+
if (c <= table[i].cmax) return (uint32_t)table[i].data;
222+
}
223+
return SCRIPT_UNKNOWN;
224+
}
225+
226+
static const unicode_data_t table[] = {"
227+
for (c₀, c₁, d) in scriptTable do
228+
file.putStrLn s!"\{UINT32_C({c₀}),UINT32_C({c₁}),UINT64_C({d})},"
229+
file.putStrLn "};"
230+
194231
def main : IO UInt32 := do
195232
makeCaseC
196233
makePropC
234+
makeScriptC
197235
return 0

0 commit comments

Comments
 (0)