Skip to content

Commit 89ba65f

Browse files
committed
Reworked Tokenizer
1 parent 34cae07 commit 89ba65f

6 files changed

Lines changed: 70 additions & 63 deletions

File tree

Source/Tokenizer.js

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,43 +2,38 @@
22
const { iterator } = Symbol
33

44

5-
const tokens = prepare({
6-
'\'\'\'' : 'Multiline' ,
7-
'[^\\s,:\\[\\]\\{\\}]+' : 'Word' ,
8-
'[^\\S\\n]+' : 'Space' ,
9-
'(\\r)?\\n' : 'Newline' ,
10-
'\\{' : 'ObjectStart' ,
11-
'\\}' : 'ObjectEnd' ,
12-
'\\[' : 'ArrayStart' ,
13-
'\\]' : 'ArrayEnd' ,
14-
',' : 'Comma' ,
15-
':' : 'Colon'
16-
})
17-
18-
19-
function prepare(tokens){
20-
return Object
21-
.entries(tokens)
22-
.map(toToken);
23-
}
24-
25-
function toToken([ pattern , token ]){
26-
return [ toRegex(pattern) , token ];
27-
}
28-
29-
function toRegex(pattern){
30-
return new RegExp(`^(${ pattern })`,'u');
31-
}
5+
const tokens = [
6+
7+
[ 'ObjectStart' , /^\{/ ] ,
8+
[ 'ObjectEnd' , /^\}/ ] ,
9+
10+
[ 'ArrayStart' , /^\[/ ] ,
11+
[ 'ArrayEnd' , /^\]/ ] ,
12+
13+
[ 'Comma' , /^,/ ] ,
14+
[ 'Colon' , /^:/ ] ,
15+
16+
[ 'Newline' , /^\r?\n/ ] ,
17+
[ 'Space' , /^[\s]+/ , 0 ] ,
18+
19+
[ 'MultiString' , /^'''([\s\S]*?)'''/u , 1 ] ,
20+
[ 'MultiComment' , /^\/\*([\s\S]*?)\*\//u ] ,
21+
[ 'SingleString' , /^'(([^\\]|(\\["'\\/bfnrt])|(\\u\d{4}))*?)'/ , 1 ] ,
22+
[ 'DoubleString' , /^"(([^\\]|(\\["'\\/bfnrt])|(\\u\d{4}))*?)"/ , 1 ] ,
23+
[ 'Comment' , /^(#)|(\/\/)([^\n]*)/u ] ,
24+
[ 'Member' , /^[^\s,:\[\]\{\}][^\s]*/ , 0 ] ,
25+
[ 'Quoteless' , /^[^\s,:\[\]\{\}][^\n]*/ , 0 ]
26+
]
3227

3328

3429
export default class Tokenizer {
3530

36-
#position;
31+
#position = 0;
3732
#string;
3833

3934
constructor(string){
40-
this.#position = 0;
41-
this.#string = string;
35+
this.#string = string
36+
.trim();
4237
}
4338

4439

@@ -61,16 +56,21 @@ export default class Tokenizer {
6156

6257
const string = this.#string
6358

64-
for(const [ regex , type ] of tokens)
59+
for(const [ type , regex , match ] of tokens)
6560
if(regex.test(string)){
6661

67-
const match = string.match(regex);
62+
const found = string.match(regex);
63+
64+
console.log(type,found);
65+
66+
this.#string = string.substring(found[0].length);
6867

69-
this.#string = string.substring(match[0].length);
68+
const token = { type };
7069

71-
const [ _ , value ] = match;
70+
if(match != null)
71+
token.value = found[match];
7272

73-
return { type , value }
73+
return token;
7474
}
7575

7676
throw `No Token Found for '${ string }'`;

Tests/Tokenize.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
2+
clear
3+
4+
echo ""
5+
echo ""
6+
echo ""
7+
echo ""
8+
echo ""
9+
echo ""
10+
echo ""
11+
echo ""
12+
echo ""
13+
14+
deno test \
15+
--importmap=Tests/Imports.json \
16+
Tests/Tokens

Tests/Tokens/Brackets.test.js

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,9 @@ const hjson = '{}';
66

77

88
const tokens = [{
9-
type : 'ObjectStart' ,
10-
value : '{'
9+
type : 'ObjectStart'
1110
},{
12-
type : 'ObjectEnd' ,
13-
value : '}'
11+
type : 'ObjectEnd'
1412
}];
1513

1614

Tests/Tokens/Member.test.js

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,41 +9,30 @@ const hjson =
99

1010

1111
const tokens = [{
12-
type : 'ObjectStart' ,
13-
value : '{'
12+
type : 'ObjectStart'
1413
},{
15-
type : 'Newline' ,
16-
value : '\n'
14+
type : 'Newline'
1715
},{
1816
type : 'Space' ,
1917
value : ' '
2018
},{
21-
type : 'Word' ,
19+
type : 'Member' ,
2220
value : 'member'
2321
},{
2422
type : 'Space' ,
2523
value : ' '
2624
},{
27-
type : 'Colon' ,
28-
value : ':'
25+
type : 'Colon'
2926
},{
3027
type : 'Space' ,
3128
value : ' '
3229
},{
33-
type : 'Word' ,
34-
value : '\'String'
30+
type : 'SingleString' ,
31+
value : 'String Value'
3532
},{
36-
type : 'Space' ,
37-
value : ' '
38-
},{
39-
type : 'Word' ,
40-
value : 'Value\''
41-
},{
42-
type : 'Newline' ,
43-
value : '\n'
33+
type : 'Newline'
4434
},{
45-
type : 'ObjectEnd' ,
46-
value : '}'
35+
type : 'ObjectEnd'
4736
}];
4837

4938

Tests/Tokens/Space.test.js

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,7 @@ const hjson = Object
2626
.join('');
2727

2828

29-
const tokens = [{
30-
type : 'Space' ,
31-
value : hjson
32-
}];
29+
const tokens = [];
3330

3431
test('String made of whitespace',() => {
3532

Tests/Tokens/Test.js

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,19 @@
22
import { assertEquals } from 'Assert'
33
import Tokenizer from 'Tokenizer'
44

5+
const { log } = console;
6+
57

68
export const test = Deno.test;
79

810

911
export function assertSameTokens(hjson,tokens){
10-
assertEquals(tokenize(hjson),tokens);
12+
13+
const parsed = tokenize(hjson);
14+
15+
log(parsed);
16+
17+
assertEquals(parsed,tokens);
1118
}
1219

1320
function tokenize(string){

0 commit comments

Comments
 (0)