Skip to content

Commit 45c6d87

Browse files
authored
fix: use String.Slice (#98)
1 parent ca6ee50 commit 45c6d87

9 files changed

Lines changed: 281 additions & 312 deletions

File tree

UnicodeBasic/CharacterDatabase.lean

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,21 @@ namespace Unicode
1212
around field values are not significant. Line comments are prefixed with a
1313
number sign `#` (U+0023).
1414
-/
15-
structure UCDStream extends Substring.Raw where
15+
structure UCDStream extends String.Slice where
1616
/-- `isUnihan` is true if the records are tab separated -/
1717
isUnihan := false
1818
deriving Inhabited
1919

2020
namespace UCDStream
2121

22-
/-- Make a `UCDStream` from a substring -/
23-
def ofSubstring (str : Substring.Raw) : UCDStream := { str with }
22+
/-- Make a `UCDStream` from a string slice -/
23+
def ofStringSlice (str : String.Slice) : UCDStream := { str with }
2424

2525
/-- Make a `UCDStream` from a string -/
26-
def ofString (str : String) : UCDStream where
27-
str := str
28-
startPos := 0
29-
stopPos := str.rawEndPos
26+
def ofString (str : String) : UCDStream := ofStringSlice str.toSlice
27+
28+
/-- Make a `UCDStream` from a substring -/
29+
def ofSubstring (str : Substring.Raw) : UCDStream := ofStringSlice str.toString.toSlice
3030

3131
/-- Make a `UCDStream` from a file -/
3232
def ofFile (path : System.FilePath) : IO UCDStream :=
@@ -36,29 +36,30 @@ def ofFile (path : System.FilePath) : IO UCDStream :=
3636
3737
Line comments are stripped and blank lines are skipped.
3838
-/
39-
protected partial def nextLine? (stream : UCDStream) : Option (Substring.Raw × UCDStream) := do
40-
if stream.isEmpty then failure else
41-
let line := stream.trimLeft.takeWhile (.!='\n')
42-
let nextPos := stream.next line.stopPos
43-
let line := (line.takeWhile (.!='#')).trimRight
39+
protected partial def nextLine? (stream : UCDStream) : Option (String.Slice × UCDStream) := do
40+
let line := stream.trimAsciiEnd.takeWhile (.!='\n')
41+
if h : line.rawEndPos < stream.rawEndPos then
42+
let nextPos := stream.findNextPos line.rawEndPos h
43+
let line := (line.takeWhile (.!='#')).trimAsciiEnd
4444
if line.isEmpty then
45-
UCDStream.nextLine? {stream with startPos := nextPos}
45+
UCDStream.nextLine? {stream with toSlice := stream.replaceStart nextPos}
4646
else
47-
return (line, {stream with startPos := nextPos})
47+
return (line, {stream with toSlice := stream.replaceStart nextPos})
48+
else failure
4849

4950
/-- Get the next record from the `UCDStream`
5051
5152
Spaces around field values are trimmed.
5253
-/
53-
protected def next? (stream : UCDStream) : Option (Array Substring.Raw × UCDStream) := do
54+
protected def next? (stream : UCDStream) : Option (Array String.Slice × UCDStream) := do
5455
let sep := if stream.isUnihan then "\t" else ";"
5556
let mut arr := #[]
5657
let (line, table) ← stream.nextLine?
57-
for item in line.splitOn sep do
58-
arr := arr.push item.trim
58+
for item in line.split sep do
59+
arr := arr.push item.trimAscii
5960
return (arr, table)
6061

61-
instance : Std.Stream UCDStream (Array Substring.Raw) where
62+
instance : Std.Stream UCDStream (Array String.Slice) where
6263
next? := UCDStream.next?
6364

6465
end UCDStream

UnicodeBasic/TableLookup.lean

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ private partial def find (c : UInt32) (t : USize → UInt32) (lo hi : USize) : U
3636
find c t mid hi
3737

3838
/-- Parse a simple table -/
39-
private def parseTable (s : String) (f : UInt32 → Array Substring.Raw → α) : Thunk <| Array (UInt32 × α) := Id.run do
39+
private def parseTable (s : String) (f : UInt32 → Array String.Slice → α) : Thunk <| Array (UInt32 × α) := Id.run do
4040
let mut r := #[]
4141
let mut stream := UCDStream.ofString s
4242
for record in stream do
@@ -46,7 +46,7 @@ private def parseTable (s : String) (f : UInt32 → Array Substring.Raw → α)
4646
return r
4747

4848
/-- Parse a range compressed data table -/
49-
private def parseDataTable (s : String) (f : UInt32 → UInt32 → Array Substring.Raw → α) : Thunk <| Array (UInt32 × UInt32 × α) := Id.run do
49+
private def parseDataTable (s : String) (f : UInt32 → UInt32 → Array String.Slice → α) : Thunk <| Array (UInt32 × UInt32 × α) := Id.run do
5050
let mut r := #[]
5151
let mut stream := UCDStream.ofString s
5252
for record in stream do
@@ -171,7 +171,7 @@ where
171171
else if x[0]! == "<square>" then some .square
172172
else if x[0]! == "<fraction>" then some .fraction
173173
else if x[0]! == "<compat>" then some .compat
174-
else panic! s!"invalid compatibility tag {x[0]!}"
174+
else panic! s!"invalid compatibility tag {x[0]!.copy}"
175175
(tag, x[1:].toArray.map ofHexString!)
176176

177177
/-- Get general category of a code point using lookup table
@@ -194,7 +194,7 @@ def lookupName (c : UInt32) : String :=
194194
match table[find c (fun i => table[i]!.1) 0 table.size.toUSize]! with
195195
| (_, v, d) =>
196196
if c ≤ v then
197-
if d.get 0 == '<' then
197+
if Char.ofUInt8 (d.getUTF8Byte! 0) == '<' then
198198
if d == "<Control>" then
199199
s!"<control-{toHexStringAux c}>"
200200
else if d == "<Private Use>" then
@@ -227,12 +227,12 @@ def lookupName (c : UInt32) : String :=
227227
"TANGUT COMPONENT-" ++ i
228228
else if d == "<Tangut Ideograph>" then
229229
"TANGUT IDEOGRAPH-" ++ toHexStringAux c
230-
else panic! s!"unknown name range {d}"
231-
else Substring.Raw.toString d
230+
else panic! s!"unknown name range {d.copy}"
231+
else String.Slice.copy d
232232
else s!"<noncharacter-{toHexStringAux c}>"
233233
where
234234
str : String := include_str "../data/table/Name.txt"
235-
table : Thunk <| Array (UInt32 × UInt32 × Substring.Raw) :=
235+
table : Thunk <| Array (UInt32 × UInt32 × String.Slice) :=
236236
parseDataTable str fun _ _ x => x[0]!
237237

238238
/-- Get numeric value of a code point using lookup table
@@ -265,7 +265,7 @@ where
265265
str : String := include_str "../data/table/Numeric_Value.txt"
266266
table : Thunk <| Array (UInt32 × UInt32 × NumericType) :=
267267
parseDataTable str fun _ _ a =>
268-
let s := a[0]!.toString
268+
let s := a[0]!.copy
269269
if s == "decimal" then
270270
.decimal 0
271271
else if "digit".isPrefixOf s then

0 commit comments

Comments
 (0)