forked from RyanMarcus/dirty-json
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.js
168 lines (134 loc) · 3.85 KB
/
lexer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
// < begin copyright >
// Copyright Ryan Marcus 2018
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// < end copyright >
"use strict";
const Lexer = require("lex");
const unescapeJs = require("unescape-js");
const utf8 = require("utf8");
// terminals
const LEX_KV = 0;
const LEX_KVLIST = 1;
const LEX_VLIST = 2;
const LEX_BOOLEAN = 3;
const LEX_COVALUE = 4;
const LEX_CVALUE = 5;
const LEX_FLOAT = 6;
const LEX_INT = 7;
const LEX_KEY = 8;
const LEX_LIST = 9;
const LEX_OBJ = 10;
const LEX_QUOTE = 11;
const LEX_RB = 12;
const LEX_RCB = 13;
const LEX_TOKEN = 14;
const LEX_VALUE = 15;
// non-terminals
const LEX_COLON = -1;
const LEX_COMMA = -2;
const LEX_LCB = -3;
const LEX_LB = -4;
const LEX_DOT = -5;
const lexMap = {
":": {type: LEX_COLON},
",": {type: LEX_COMMA},
"{": {type: LEX_LCB},
"}": {type: LEX_RCB},
"[": {type: LEX_LB},
"]": {type: LEX_RB},
".": {type: LEX_DOT} // TODO: remove?
};
const lexSpc = [
[/:/, LEX_COLON],
[/,/, LEX_COMMA],
[/{/, LEX_LCB],
[/}/, LEX_RCB],
[/\[/, LEX_LB],
[/\]/, LEX_RB],
[/\./, LEX_DOT] // TODO: remove?
];
function parseString(str) {
// unescape-js doesn't cover the \/ case, but we will here.
str = str.replace(/\\\//, '/');
return unescapeJs(str);
}
function getLexer(string) {
let lexer = new Lexer();
let col = 0;
let row = 0;
lexer.addRule(/"((?:\\.|[^"])*)($|")/, (lexeme, txt) => {
col += lexeme.length;
return {type: LEX_QUOTE, value: parseString(txt), row, col};
});
lexer.addRule(/'((?:\\.|[^'])*)($|')/, (lexeme, txt) => {
col += lexeme.length;
return {type: LEX_QUOTE, value: parseString(txt), row, col};
});
// floats with a dot
lexer.addRule(/[\-0-9]*\.[0-9]*([eE][\+\-]?)?[0-9]*/, lexeme => {
col += lexeme.length;
return {type: LEX_FLOAT, value: parseFloat(lexeme), row, col};
});
// floats without a dot but with e notation
lexer.addRule(/\-?[0-9]+([eE][\+\-]?)[0-9]*/, lexeme => {
col += lexeme.length;
return {type: LEX_FLOAT, value: parseFloat(lexeme), row, col};
});
lexer.addRule(/\-?[0-9]+/, lexeme => {
col += lexeme.length;
return {type: LEX_INT, value: parseInt(lexeme), row, col};
});
lexSpc.forEach(item => {
lexer.addRule(item[0], lexeme => {
col += lexeme.length;
return {type: item[1], value: lexeme, row, col};
});
});
lexer.addRule(/\s/, lexeme => {
// chomp whitespace...
if (lexeme == "\n") {
col = 0;
row++;
} else {
col += lexeme.length;
}
});
lexer.addRule(/./, lexeme => {
col += lexeme.length;
let lt = LEX_TOKEN;
let val = lexeme;
return {type: lt, value: val, row, col};
});
lexer.setInput(string);
return lexer;
}
module.exports.lexString = lexString;
function lexString(str, emit) {
let lex = getLexer(str);
let token = "";
while ((token = lex.lex())) {
emit(token);
}
}
module.exports.getAllTokens = getAllTokens;
function getAllTokens(str) {
let arr = [];
let emit = function (i) {
arr.push(i);
};
lexString(str, emit);
return arr;
}