Skip to content

Commit 033908b

Browse files
committed
toml work
Signed-off-by: George Lemon <georgelemon@protonmail.com>
1 parent 873d1f5 commit 033908b

1 file changed

Lines changed: 91 additions & 6 deletions

File tree

src/openparser/toml.nim

Lines changed: 91 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ proc skipWhitespace(l: var TomlLexer, wsBeforeToken: var int): int =
152152
return 0
153153
result = lineIndentAt(l, l.pos)
154154

155+
proc peekChar*(lex: TomlLexer, offset: int): char =
156+
# Lookahead character at current position + offset without advancing
157+
lex.charAt(lex.pos + offset)
155158

156159
proc readIdentifier(l: var TomlLexer): string =
157160
# Read an unquoted identifier (e.g. for keys or unquoted values)
@@ -171,7 +174,7 @@ proc readString(l: var TomlLexer, quote: char): string =
171174
advance(l) # Skip the opening quote
172175
while true:
173176
if l.current == '\0':
174-
raise newException(ValueError, "Unterminated string literal")
177+
raise newException(OpenParserTomlError, "Unterminated string literal")
175178
if l.current == quote:
176179
advance(l)
177180
break
@@ -191,6 +194,53 @@ proc readString(l: var TomlLexer, quote: char): string =
191194
result.add(l.current)
192195
advance(l)
193196

197+
proc readMultiLineString(l: var TomlLexer): string =
198+
# Read a multi-line string delimited by triple quotes """...""" or '''...'''
199+
# consume the opening triple quotes
200+
advance(l); advance(l); advance(l)
201+
# optional initial newline after opening delimiter is trimmed per TOML
202+
if l.current == '\n':
203+
inc l.line
204+
l.col = 0
205+
advance(l)
206+
while true:
207+
if l.current == '\0':
208+
raise newException(OpenParserTomlError, "Unterminated multi-line string literal")
209+
# check for closing triple quotes
210+
if l.current == '"' and l.charAt(l.pos+1) == '"' and l.charAt(l.pos+2) == '"':
211+
advance(l); advance(l); advance(l)
212+
break
213+
if l.current == '\\':
214+
# handle escapes and line continuations
215+
advance(l)
216+
if l.current == '\0':
217+
raise newException(OpenParserTomlError, "Unterminated escape in multi-line string")
218+
case l.current
219+
of '"': result.add('"')
220+
of '\\': result.add('\\')
221+
of 'n': result.add('\n')
222+
of 'r': result.add('\r')
223+
of 't': result.add('\t')
224+
of '\n':
225+
# line continuation: backslash + newline -> skip newline and following indentation
226+
inc l.line
227+
l.col = 0
228+
advance(l)
229+
while l.current in {' ', '\t'}:
230+
advance(l)
231+
continue
232+
else:
233+
# unknown escape: preserve backslash + char
234+
result.add('\\')
235+
result.add(l.current)
236+
advance(l)
237+
else:
238+
if l.current == '\n':
239+
inc l.line
240+
l.col = 0
241+
result.add(l.current)
242+
advance(l)
243+
194244
proc readNumber(l: var TomlLexer, kind: var TomlTokenKind): string =
195245
result = ""
196246
kind = ttkInteger
@@ -474,6 +524,7 @@ let tokens = {
474524
'}': ttkRC
475525
}.toTable
476526

527+
const strQuote = ['\'', '"']
477528
proc nextToken*(p: var TomlParser): TomlToken =
478529
## Lexical analysis to produce the next token from the input
479530
var wsBefore = 0
@@ -492,8 +543,12 @@ proc nextToken*(p: var TomlParser): TomlToken =
492543
result.kind = ttkComment
493544
result.value = p.lex.readComment()
494545
of '"', '\'':
495-
result.kind = ttkString
496-
result.value = p.lex.readString(p.lex.current)
546+
if p.lex.current in strQuote and p.lex.peekChar(1) == p.lex.current and p.lex.peekChar(2) == p.lex.current:
547+
result.kind = ttkString
548+
result.value = p.lex.readMultiLineString()
549+
else:
550+
result.kind = ttkString
551+
result.value = p.lex.readString(p.lex.current)
497552
of '0'..'9', '-', '+':
498553
result.value = p.lex.readNumber(result.kind)
499554
of '=', '.', ',', '[', ']', '{', '}':
@@ -516,7 +571,7 @@ proc nextToken*(p: var TomlParser): TomlToken =
516571
result.kind = ttkIdentifier
517572
result.value = p.lex.readIdentifier()
518573
else:
519-
raise newException(ValueError, "Invalid character: " & $(p.lex.current))
574+
raise newException(OpenParserTomlError, "Invalid character: " & $(p.lex.current))
520575

521576
proc error(p: var TomlParser, msg: string) =
522577
# Prefer current token coordinates over lexer cursor (lookahead-safe).
@@ -583,8 +638,36 @@ proc parseTomlDateTime(s: string): DateTime =
583638
return times.parse(sNoTz, fmt)
584639
except:
585640
discard
586-
raise newException(ValueError, "Failed to parse TOML datetime: " & s)
641+
raise newException(OpenParserTomlError, "Failed to parse TOML datetime: " & s)
642+
587643

644+
proc parseHook*(p: var TomlParser, v: var TomlNode)
645+
proc parseObject*(p: var TomlParser, ln: int): TomlNode
646+
647+
proc parseInlineObject(p: var TomlParser): TomlNode =
648+
p.advance() # consume '{'
649+
var obj = newTomlTable()
650+
while p.curr.kind != ttkRC:
651+
case p.curr.kind
652+
of ttkIdentifier:
653+
let key = p.curr.value
654+
p.advance() # consume identifier
655+
if p.curr.kind != ttkEquals:
656+
p.error("Expected '=' after key in inline table, got " & $p.curr.kind)
657+
p.advance() # consume '='
658+
var val: TomlNode
659+
p.parseHook(val)
660+
obj.tableVal[key] = val
661+
if p.curr.kind == ttkComma:
662+
p.advance() # consume comma and continue
663+
elif p.curr.kind != ttkRC:
664+
p.error("Expected ',' or '}' in inline table, got " & $p.curr.kind)
665+
of ttkComment:
666+
p.advance() # skip comments
667+
else:
668+
p.error("Expected key or end of inline table, got " & $p.curr.kind)
669+
p.advance() # consume '}'
670+
result = obj
588671

589672
proc parseHook*(p: var TomlParser, v: var TomlNode) =
590673
# echo "Hook: " & $p.curr.kind & " at line " & $p.curr.line & ", col " & $p.curr.col
@@ -604,6 +687,8 @@ proc parseHook*(p: var TomlParser, v: var TomlNode) =
604687
of ttkDateTime:
605688
v = newTomlDateTime(parseTomlDateTime(p.curr.value))
606689
p.advance()
690+
of ttkLC:
691+
v = p.parseInlineObject()
607692
else:
608693
p.error("Expected a value, got " & $p.curr.kind)
609694

@@ -708,6 +793,6 @@ proc parseTOML*[T](input: TOML, t: typedesc[T]): T =
708793
when isMainModule:
709794
proc dumpHook*(s: var string, val: DateTime) =
710795
s.add(val.format("yyyy-MM-dd'T'HH:mm:ss"))
711-
796+
712797
let doc = parseTOML(readFile("example.toml"))
713798
echo toJson(doc)

0 commit comments

Comments
 (0)