-
Notifications
You must be signed in to change notification settings - Fork 11
/
tokenizer.js
92 lines (79 loc) · 1.64 KB
/
tokenizer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
/**
* PL/0 tokenizer
*
* https://en.wikipedia.org/wiki/PL/0
*/
let patterns = {
VAR: /VAR/,
CONST: /CONST/,
PROCEDURE: /PROCEDURE/,
CALL: /CALL/,
WHILE: /WHILE/,
DO: /DO/,
BEGIN: /BEGIN/,
END: /END/,
IDENTIFIER: /[a-z_][a-z0-9_]*/,
NUMBER: /[0-9]+/,
ASSIGNMENT: /:=/,
WRITE: /!/,
LPAREN: /\(/,
RPAREN: /\)/,
MULT: /\*/,
DIV: /\//,
PLUS: /\+/,
MINUS: /-/,
EQ: /=/,
NE: /#/,
LE: /<=/,
GE: />=/,
LT: /</,
GT: />/,
COMMA: /,/,
PERIOD: /\./,
SEMICOLON: /;/,
WHITESPACE: /\s/
};
/**
* Tokenize a string
*
* @param {string} input String to tokenize
* @param {boolean} [ignoreWhitespace=true] If true, no WHITESPACE tokens will be produced
*/
function tokenize(input, ignoreWhitespace=true) {
let tokens = [];
let normPatterns = {};
while (input != '') {
let tokenized = false;
for (let token of Object.keys(patterns)) {
// Modify user patterns to search from start of string
if (!(token in normPatterns)) {
normPatterns[token] = new RegExp('^' + patterns[token].source, patterns[token].flags + 'i');
}
// Match a token
let m = input.match(normPatterns[token]);
if (m !== null && m.index === 0) {
// Found a match
tokenized = true;
let strval = m[0];
let len = strval.length;
if (token != 'WHITESPACE' || !ignoreWhitespace) {
tokens.push({
token: token,
length: len,
strval: strval,
intval: parseInt(strval),
floatval: parseFloat(strval)
});
}
// On to the next token
input = input.slice(len);
break;
}
}
if (!tokenized) {
throw `Unrecognized token at: "${input}"`;
}
}
return tokens;
}
module.exports = tokenize;