179 lines
5.1 KiB
JavaScript
179 lines
5.1 KiB
JavaScript
function isLetter(char) {
|
|
return /[A-Za-z]/.test(char);
|
|
}
|
|
function isOperator(char) {
|
|
return /\+|\-|\*|\/|\=|\=\=|\>|\<|\>\=|\<\=|\=\>|\;/.test(char);
|
|
}
|
|
function isDigit(char) {
|
|
return /\d/.test(char);
|
|
}
|
|
function isWhitespace(char) {
|
|
return /\s/.test(char);
|
|
}
|
|
function Token(type, subtype, value) {
|
|
this.type = type;
|
|
this.subtype = subtype;
|
|
this.value = value;
|
|
}
|
|
function opType(operator) {
|
|
if (false)
|
|
return 'left';
|
|
else if (false)
|
|
return 'right';
|
|
else if (/\;/.test(operator))
|
|
return 'none';
|
|
else
|
|
return 'dual';
|
|
}
|
|
function isKeyword(value) {
|
|
return value == 'let';
|
|
}
|
|
function oldTokenize(rawCode) {
|
|
let chars = rawCode.split('');
|
|
// Create letter, operator, number, and string buffers.
|
|
let lb = [];
|
|
let ob = [];
|
|
let nb = [];
|
|
let sb = [];
|
|
// Create token array.
|
|
let tokens = [];
|
|
let stringData = {
|
|
inString: false,
|
|
stringType: null
|
|
};
|
|
// Check for comments.
|
|
for (let i = 0; i < chars.length - 1; i++) {
|
|
let char = chars[i];
|
|
if (char == '/' && chars[i + 1] == '/') {
|
|
// Remove the characters from the comment.
|
|
chars.splice(i, chars.indexOf('\n', i) - i);
|
|
// Adjust the index accordingly.
|
|
i -= chars.indexOf('\n', i) - i;
|
|
}
|
|
if (char == '/' && chars[i + 1] == '*') {
|
|
// If multiline comment /* find the next */
|
|
let endindex;
|
|
for (let j = 0; j < chars.length - 1; j++) {
|
|
if (chars[j] == '*' && chars[j + 1] == '/') {
|
|
endindex = j + 2;
|
|
console.log(chars.splice(i, (j + 2) - i));
|
|
}
|
|
}
|
|
// Adjust the index accordingly.
|
|
i -= endindex - i;
|
|
}
|
|
}
|
|
// Check for characters to be escaped.
|
|
for (let i = 0; i < chars.length; i++) {
|
|
let char = chars[i];
|
|
if ((char == '\\') && (i + 1 != chars.length)) {
|
|
chars.splice(i, 2, `${char}${chars[i+1]}`);
|
|
continue;
|
|
}
|
|
}
|
|
// Loop through all the characters.
|
|
for (let i = 0; i < chars.length; i++) {
|
|
let char = chars[i];
|
|
// Behave differently in/out of a string.
|
|
if (stringData.inString) {
|
|
if (char == `'`) {
|
|
stringData.inString = false;
|
|
tokens.push(new Token('string', 'n/a', sb.join('')));
|
|
sb = [];
|
|
} else {
|
|
sb.push(char);
|
|
}
|
|
} else {
|
|
if (char == `'`) {
|
|
stringData.inString = true;
|
|
} else {
|
|
// Parsing chars, ignoring strings.
|
|
if (isLetter(char)) {
|
|
lb.push(char);
|
|
if (ob.length > 0) {
|
|
let op = ob.join('');
|
|
tokens.push(new Token('operator', opType(op), op));
|
|
ob = [];
|
|
}
|
|
if (nb.length > 0) {
|
|
tokens.push(new Token('number', 'n/a', nb.join('')));
|
|
nb = [];
|
|
}
|
|
} else if (isOperator(char)) {
|
|
ob.push(char);
|
|
if (lb.length > 0) {
|
|
tokens.push(new Token('variable', 'n/a', lb.join('')));
|
|
lb = [];
|
|
}
|
|
if (nb.length > 0) {
|
|
tokens.push(new Token('number', 'n/a', nb.join('')));
|
|
nb = [];
|
|
}
|
|
} else if (isDigit(char)) {
|
|
nb.push(char);
|
|
if (lb.length > 0) {
|
|
tokens.push(new Token('variable', 'n/a', lb.join('')));
|
|
lb = [];
|
|
}
|
|
if (ob.length > 0) {
|
|
let op = ob.join('');
|
|
tokens.push(new Token('operator', opType(op), op));
|
|
ob = [];
|
|
}
|
|
} else if (isWhitespace(char)) {
|
|
// Close all buffers.
|
|
if (lb.length > 0) {
|
|
tokens.push(new Token('variable', 'n/a', lb.join('')));
|
|
lb = [];
|
|
}
|
|
if (ob.length > 0) {
|
|
let op = ob.join('');
|
|
tokens.push(new Token('operator', opType(op), op));
|
|
ob = [];
|
|
}
|
|
if (nb.length > 0) {
|
|
tokens.push(new Token('number', 'n/a', nb.join('')));
|
|
nb = [];
|
|
}
|
|
} else if (isDelimiter(char)) {
|
|
// Close all buffers.
|
|
if (lb.length > 0) {
|
|
tokens.push(new Token('variable', 'n/a', lb.join('')));
|
|
lb = [];
|
|
}
|
|
if (ob.length > 0) {
|
|
let op = ob.join('');
|
|
tokens.push(new Token('operator', opType(op), op));
|
|
ob = [];
|
|
}
|
|
if (nb.length > 0) {
|
|
tokens.push(new Token('number', 'n/a', nb.join('')));
|
|
nb = [];
|
|
}
|
|
// Categorize and push.
|
|
if (char == '(' || char == ')') {
|
|
tokens.push(new Token('delimiter', char == '(' ? 'left' : 'right', 'parenthesis'));
|
|
} else if (char == '[' || char == ']') {
|
|
tokens.push(new Token('delimiter', char == '[' ? 'left' : 'right', 'bracket'));
|
|
} else if (char == '{' || char == '}') {
|
|
tokens.push(new Token('delimiter', char == '{' ? 'left' : 'right', 'brace'));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Check for keywords.
|
|
for (let i = 0; i < tokens.length; i++) {
|
|
if (tokens[i].type == 'variable' && isKeyword(tokens[i].value)) {
|
|
tokens[i].type = 'keyword';
|
|
}
|
|
}
|
|
// Add indexes.
|
|
let layer = 0;
|
|
for (let i = 0; i < tokens.length; i++) {
|
|
tokens[i].index = i;
|
|
}
|
|
return tokens;
|
|
};
|
|
console.log(oldTokenize(`let x = 'it\\'s cool outside';`));
|