Skip to content

Commit 7315b56

Browse files
committed
Reworked Parsing
1 parent b4cfd86 commit 7315b56

14 files changed

Lines changed: 134 additions & 202 deletions

Source/Decode.js

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,19 @@ export default function decode(string){
1515

1616
// log([ ... tokens ]);
1717

18-
const state = {
19-
tokens : tokens ,
20-
object : {}
18+
try {
19+
const state = {
20+
tokens : tokens ,
21+
object : {}
22+
}
23+
24+
object(state);
25+
26+
return parse(state.object);
27+
} catch (e) {
28+
log([ ...normalize(new Tokenizer(string).iterator()) ]);
29+
throw e;
2130
}
22-
23-
object(state);
24-
25-
return parse(state.object);
2631
}
2732

2833

@@ -43,7 +48,7 @@ function object(state){
4348
switch(token.value.type){
4449
case 'ObjectEnd' :
4550
return;
46-
case 'Word' :
51+
case 'Member' :
4752
member({
4853
parent : object ,
4954
tokens : tokens ,
@@ -74,10 +79,7 @@ function member(state){
7479
if(type === 'Colon')
7580
break;
7681

77-
switch(type){
78-
default:
79-
throw `Invalid Member Seperator Token ${ token.value.type }`;
80-
}
82+
throw `Invalid Member Seperator Token ${ type }`;
8183
}
8284

8385
while(true){
@@ -89,7 +91,7 @@ function member(state){
8991

9092
const { type } = token.value;
9193

92-
if([ 'Multiline' , 'Word' ].includes(type)){
94+
if([ 'String' ].includes(type)){
9395

9496
const { value } = token.value;
9597

@@ -177,7 +179,7 @@ function array(state){
177179

178180
const { type } = token.value;
179181

180-
if(type === 'Word'){
182+
if(type === 'String'){
181183
array.push(token.value.value);
182184
break;
183185
}

Source/Normalize.js

Lines changed: 91 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -1,110 +1,74 @@
11

2+
const { log } = console;
23

3-
export default function * normalize(tokens){
4-
yield * reduce(unwrap(trim(forwardCombine(multilineJoiner(multiline(tokens))))));
5-
}
64

5+
export default function * normalize(tokens){
76

8-
function * unwrap(tokens){
9-
10-
let a = false
11-
12-
for(const token of tokens)
13-
if(a)
14-
yield token;
15-
else
16-
if(![ 'Newline' , 'Space' , 'ObjectStart' ].includes(token.type)){
17-
yield token;
18-
a = true;
19-
}
7+
yield *
8+
forwardCombine(
9+
prepareQuotedString(
10+
prepareMultiLines(
11+
prepareMultiStrings(
12+
tokens
13+
))));
2014
}
2115

22-
function * multilineJoiner(tokens){
23-
16+
17+
function * select(tokens,tokenType,callback){
2418
for(const token of tokens)
25-
yield (token.type === 'Multiline')
26-
? combineMultiline(token)
19+
yield (token.type === tokenType)
20+
? callback(token.value)
2721
: token ;
2822
}
2923

30-
function combineMultiline(multiline){
31-
32-
const unindent = (match) =>
33-
match.substring(multiline.indent);
34-
35-
const value = multiline.value
36-
.map(({ value }) => value)
37-
.join('')
38-
.replace(/^[^\S\n]*\n/,'')
39-
.replace(/\n[^\S\n]*$/,'')
40-
.replace(/^[^\S\n]+/gm,unindent);
41-
42-
return { type : 'Multiline' , value };
24+
function * prepareMultiLines(tokens){
25+
yield * select(tokens,'MultiLine',(value) => {
26+
27+
const indent = value
28+
.match(/^[\s]*/)[0]
29+
.length;
30+
31+
value = value
32+
.trim()
33+
.slice(3,-3)
34+
35+
return {
36+
type : 'String' ,
37+
value
38+
}
39+
})
4340
}
4441

45-
function * multiline(tokens){
46-
47-
let before = tokens.next();
48-
49-
if(before.done)
50-
return;
51-
52-
before = before.value;
53-
54-
let open = false;
55-
let parts = [];
56-
let indent = 0;
57-
58-
for(let { type , value } of tokens){
59-
if(open){
60-
if(type === 'Multiline'){
61-
open = false
62-
yield { type : 'Multiline' , value : parts , indent };
63-
64-
before = null;
65-
continue;
66-
} else {
67-
68-
69-
parts.push({
70-
type , value
71-
});
72-
73-
continue;
74-
}
75-
} else {
76-
if(type === 'Multiline'){
77-
open = true;
78-
parts = [];
79-
80-
indent = 0;
81-
82-
if(before?.type === 'Space')
83-
indent = before.value.length;
84-
85-
continue;
86-
}
87-
}
88-
89-
if(before)
90-
yield before;
42+
function * prepareMultiStrings(tokens){
43+
yield * select(tokens,'MultiString',(value) => {
9144

92-
before = { type , value };
93-
}
94-
95-
if(before)
96-
yield before;
45+
const indent = value
46+
.match(/^[\s]*/)[0]
47+
.length;
48+
49+
const detent = new RegExp(`^\\s{1,${ indent }}`,'gm');
50+
51+
value = value
52+
.trim()
53+
.slice(3,-3)
54+
.replace(/^[^\S\n]*\n/,'')
55+
.replace(/\n[^\S\n]*$/,'')
56+
.replace(detent,'')
57+
58+
return {
59+
type : 'String' ,
60+
value
61+
}
62+
})
9763
}
9864

99-
function * trim(tokens){
100-
101-
for(let { type , value } of tokens){
102-
103-
if(type === 'Word')
104-
value = value.trim();
105-
106-
yield { type , value }
107-
}
65+
function * prepareQuotedString(tokens){
66+
for(const token of tokens)
67+
yield ([ 'SingleString' , 'DoubleString' ].includes(token.type))
68+
? { type : 'String' , value : token.value.slice(1,-1) }
69+
: (token.type === 'Quoteless')
70+
? { type : 'String' , value : token.value.trim() }
71+
: token ;
10872
}
10973

11074

@@ -114,7 +78,7 @@ function * forwardCombine(tokens){
11478

11579
while (!before.done) {
11680

117-
const now = tokens.next();
81+
let now = tokens.next();
11882

11983
if(now.done)
12084
break;
@@ -131,19 +95,16 @@ function * forwardCombine(tokens){
13195
nowType === 'Newline'
13296
) continue;
13397

134-
break;
135-
case 'Word' :
136-
13798
if(
138-
nowType === 'Space' ||
139-
nowType === 'Word'
99+
nowType === 'Member'
140100
){
141-
before.value.value += now.value.value;
142-
continue;
101+
now = { value : { type : 'String' , value : now.value.value } }
143102
}
144103

145104
break;
105+
146106
case 'ObjectStart' :
107+
case 'ArrayStart' :
147108

148109
if(
149110
nowType === 'Space' ||
@@ -161,7 +122,7 @@ function * forwardCombine(tokens){
161122
break;
162123

163124
case 'Newline' :
164-
125+
165126
if(
166127
nowType === 'Newline' ||
167128
nowType === 'Space'
@@ -170,19 +131,45 @@ function * forwardCombine(tokens){
170131
if(
171132
nowType === 'Comma'
172133
){
173-
before = { type : 'Comma' }
134+
before = { value : { type : 'Comma' } }
174135
continue;
175136
}
176137

177138
break;
178139
case 'Space' :
179140

180141
if(
181-
nowType === 'Comma'
142+
nowType === 'Space'
143+
) continue;
144+
145+
if(
146+
nowType === 'Colon'
182147
){
183-
before = { type : 'Comma' }
148+
before = { value : { type : 'Colon' } }
184149
continue;
185150
}
151+
152+
break;
153+
case 'String':
154+
155+
if(
156+
nowType === 'Colon'
157+
){
158+
before = { value : { type : 'Member' , value : before.value.value } }
159+
}
160+
161+
break;
162+
case 'Member':
163+
164+
if(
165+
nowType === 'Space'
166+
) continue;
167+
168+
if(
169+
nowType !== 'Colon'
170+
){
171+
before = { value : { type : 'String' , value : before.value.value } }
172+
}
186173
}
187174

188175
yield before.value;
@@ -192,22 +179,3 @@ function * forwardCombine(tokens){
192179
if(!before.done)
193180
yield before.value;
194181
}
195-
196-
197-
const complex = [ 'Word' , 'Space' , 'Multiline' ];
198-
199-
const isSimple = ({ type }) =>
200-
! complex.includes(type);
201-
202-
function * reduce(tokens){
203-
for(const token of tokens)
204-
yield removeRedundantValues(token);
205-
}
206-
207-
function removeRedundantValues(token){
208-
209-
if(isSimple(token))
210-
delete token.value;
211-
212-
return token;
213-
}

Source/Tokenizer.js

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,15 @@ const tokens = [
1616
[ 'Newline' , /^\r?\n/ ] ,
1717

1818
[ 'MultiString' , /^[^\S\n]*'''[\s\S]*?'''/u , 0 ] ,
19+
[ 'MultiLine' , /^[^\S\n]*'''[^\n]*?'''/u , 0 ] ,
1920
[ 'Space' , /^[^\S\n]+/ , 0 ] ,
2021

2122
[ 'MultiComment' , /^\/\*([\s\S]*?)\*\//u ] ,
22-
[ 'SingleString' , /^'(([^\\]|(\\["'\\/bfnrt])|(\\u\d{4}))*?)'/ , 1 ] ,
23-
[ 'DoubleString' , /^"(([^\\]|(\\["'\\/bfnrt])|(\\u\d{4}))*?)"/ , 1 ] ,
23+
[ 'SingleString' , /^'(([^\\]|(\\["'\\/bfnrt])|(\\u\d{4}))*?)'/ , 0 ] ,
24+
[ 'DoubleString' , /^"(([^\\]|(\\["'\\/bfnrt])|(\\u\d{4}))*?)"/ , 0 ] ,
2425
[ 'Comment' , /^(#)|(\/\/)([^\n]*)/u ] ,
25-
[ 'Member' , /^[^\s,:\[\]\{\}][^\s]*/ , 0 ] ,
26-
[ 'Quoteless' , /^[^\s,:\[\]\{\}][^\n]*/ , 0 ]
26+
[ 'Member' , /^([^\s,:\[\]\{\}]*)[\s]*:/ , 1 ] ,
27+
[ 'Quoteless' , /^[^\n,:\[\]\{\}][^\n,]*/ , 0 ] ,
2728
]
2829

2930

@@ -34,7 +35,8 @@ export default class Tokenizer {
3435

3536
constructor(string){
3637
this.#string = string
37-
.trim();
38+
.replace(/^[\s]*{?[\s]*/,'')
39+
.replace(/[\s]*}?[\s]*$/,'');
3840
}
3941

4042

@@ -62,7 +64,7 @@ export default class Tokenizer {
6264

6365
const found = string.match(regex);
6466

65-
this.#string = string.substring(found[0].length);
67+
this.#string = string.substring(found[match ?? 0].length);
6668

6769
const token = { type };
6870

0 commit comments

Comments
 (0)