the parsers finally working dont screw it up

This commit is contained in:
ElementG9 2019-10-05 21:07:18 -06:00
parent 5c0a481dbe
commit 812ec86aca
4 changed files with 366 additions and 101 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@ other/
package-lock.json
node_modules/
jsdoc/
ast.json

View File

@ -1,44 +1,126 @@
/**
* @module parser
* @file Manages the parser phase of Pivot.
* @file Manages the parsing phase of Pivot.
* @author Garen Tyler <garentyler@gmail.com>
* @requires module:types
*/
const Token = require('./types.js').Token;
const Group = require('./types.js').Token;
const Group = require('./types.js').Group;
const tokenizer = require('./tokenizer.js');
module.exports = function(tokens) {
/**
* @function parse
* @desc Takes in an array of tokens, and outputs an AST.
* @param {Token[]} tokens The input tokens.
* @returns {Token[]} The tokens structured in an AST.
* @public
*/
function parse(tokens) {
// Create our output array.
let ast = tokens;
// Add indexes and levels.
ast = addIndexes(ast);
ast = addLevels(ast);
// Get the deepest level.
let deepestLevel = getDeepestLevel(ast);
// Loop through for each level.
for (let currentLevel = deepestLevel; currentLevel > 0; currentLevel--) {
console.log('looping for level ' + currentLevel);
let groupBuffer = [];
for (let j = 0; j < ast.length; j++) {
// Create previousLevel and nextLevel.
// let previousTokenLevel = 0;
// if (typeof ast[j-1] != 'undefined')
// previousTokenLevel = ast[j-1].level;
let nextTokenLevel = 0;
if (typeof ast[j+1] != 'undefined')
nextTokenLevel = ast[j+1].level;
if (ast[j].level == currentLevel) {
groupBuffer.push(ast[j]); // Add the token to the groupBuffer.
if (ast[j].level > nextTokenLevel) {
let g = new Group(groupBuffer[0].value, groupBuffer);
g.index = g.tokens[0].index;
g.level = g.tokens[0].level - 1; // -1 because the group is on the level below.
ast.splice(g.tokens[0].index, g.tokens.length + 1, g);
j = g.tokens[0].index;
groupBuffer = [];
}
}
// // Take care of falling edges.
// if (ast[j].level > nextTokenLevel && ast[j].level == currentLevel) {
// // The first item in the group is always a delimiter, steal info from that.
// console.log(groupBuffer[0]);
// }
}
}
return ast;
}
/**
* @function addIndexes
* @desc Adds basic indexes to the tokens.
* @param {Token[]} tokens The tokens.
* @returns {Token[]} The tokens with indexes.
* @private
*/
function addIndexes(tokens) {
return tokens.map((t, i) => {
t.index = i;
return t;
});
}
/**
* @function addLevels
* @desc Adds basic levels to the tokens. The levels are dependent on the delimiters.
* @param {Token[]} tokens The tokens.
* @returns {Token[]} The tokens with levels.
* @private
*/
function addLevels(tokens) {
let level = 0;
// Add level markers.
tokens.forEach((t, i) => {
if (t.type == 'delimiter' && t.subtype == 'left') {
tokens[i].level = level;
tokens = tokens.map((t, i) => {
if (t.type == 'delimiter' && t.subtype == 'left')
level++;
} else if (t.type == 'delimiter' && t.subtype == 'right') {
t.level = level;
if (t.type == 'delimiter' && t.subtype == 'right')
level--;
tokens[i].level = level;
} else {
tokens[i].level = level;
}
return t;
});
// Group.
let currentLevel = 0;
let groupStack = [0];
tokens.forEach((t, i) => {
if (currentLevel < t.level) {
tokens.splice(i, 0, new Group(tokens[i - 1].value, []));
groupStack.push(i);
currentLevel++;
tokens[i].level = currentLevel;
}
if (t.level != 0) {
tokens[groupStack.slice(-1)].tokens.push(t);
}
if (currentLevel > t.level) {
groupStack.pop();
currentLevel--;
}
});
if (currentLevel != 0) {} // Error: missing delimiter.
return tokens;
if (level > 0)
throw new SyntaxError('Missing closing delimiter');
else if (level < 0)
throw new SyntaxError('Missing opening delimiter');
else
return tokens;
}
/**
* @function getDeepestLevel
* @desc Finds the deepest level.
* @param {Token[]} tokens The tokens.
* @returns {Number} The deepest level.
* @private
*/
function getDeepestLevel(tokens) {
return tokens.reduce((deepest, t) => {
return t.level > deepest ? t.level : deepest;
}, 0);
}
module.exports = {
parse,
util: {
addIndexes,
addLevels,
getDeepestLevel
}
};
require('fs').writeFileSync('ast.json', JSON.stringify(parse(tokenizer.tokenize('let x = (5 + (6 * 2));')), null, 2), () => {});

View File

@ -5,7 +5,7 @@
* @requires module:types
*/
const Token = require('./types.js').Token;
const Group = require('./types.js').Token;
const Group = require('./types.js').Group;
/**
* @function tokenize

View File

@ -4,6 +4,8 @@ const types = require('../src/types.js');
const tokenizer = require('../src/tokenizer.js');
const parser = require('../src/parser.js');
console.log(parser.parse(tokenizer.tokenize('let x = (5 + 3);')));
describe('types.js', () => {
it('Has a Token child.', () => {
assert.equal(types.hasOwnProperty('Token'), true);
@ -54,73 +56,253 @@ describe('types.js', () => {
});
});
describe('tokenizer.js', () => {
it('Has a tokenize child', () => {
assert.equal(tokenizer.hasOwnProperty('tokenize'), true);
});
it('Has a util child', () => {
assert.equal(tokenizer.hasOwnProperty('util'), true);
});
describe('util', () => {
it('combineEscapedChars works', () => {
assert.equal(tokenizer.util.combineEscapedChars(`let x = 'test\\nnewline';`.split('')).join(''), `let x = 'test\\nnewline';`);
});
it('removeComments works', () => {
assert.equal(tokenizer.util.removeComments(`// Comment\nlet i = 0;`.split('')).join(''), `let i = 0;`);
});
it('changeKeywords works', () => {
let tokens = tokenizer.util.changeKeywords([{
type: 'name',
subtype: 'variable',
value: 'let'
}, {
type: 'name',
subtype: 'variable',
value: 'x'
}]);
let correct = [{
type: 'name',
subtype: 'keyword',
value: 'let'
}, {
type: 'name',
subtype: 'variable',
value: 'x'
}];
let isCorrect = true;
tokens.forEach((t, i) => {
if (t.type != correct[i].type)
throw new Error('Changed type: Expected \''+ correct[i].type +'\' but got ' + t.type)
else if (t.subtype != correct[i].subtype)
throw new Error('Incorrectly changed subtype: Expected \''+ correct[i].subtype +'\' but got ' + t.subtype)
else if (t.value != correct[i].value)
throw new Error('Changed value: Expected \''+ correct[i].value +'\' but got ' + t.value)
it('Has a tokenize child', () => {
assert.equal(tokenizer.hasOwnProperty('tokenize'), true);
});
it('Has a util child', () => {
assert.equal(tokenizer.hasOwnProperty('util'), true);
});
describe('util', () => {
it('combineEscapedChars works', () => {
assert.equal(tokenizer.util.combineEscapedChars(`let x = 'test\\nnewline';`.split('')).join(''), `let x = 'test\\nnewline';`);
});
it('removeComments works', () => {
assert.equal(tokenizer.util.removeComments(`// Comment\nlet i = 0;`.split('')).join(''), `let i = 0;`);
});
it('changeKeywords works', () => {
let tokens = tokenizer.util.changeKeywords([{
type: 'name',
subtype: 'variable',
value: 'let'
}, {
type: 'name',
subtype: 'variable',
value: 'x'
}]);
let correct = [{
type: 'name',
subtype: 'keyword',
value: 'let'
}, {
type: 'name',
subtype: 'variable',
value: 'x'
}];
let isCorrect = true;
tokens.forEach((t, i) => {
if (t.type != correct[i].type)
throw new Error('Changed type: Expected \'' + correct[i].type + '\' but got \'' + t.type + '\'')
else if (t.subtype != correct[i].subtype)
throw new Error('Incorrectly changed subtype: Expected \'' + correct[i].subtype + '\' but got \'' + t.subtype + '\'')
else if (t.value != correct[i].value)
throw new Error('Changed value: Expected \'' + correct[i].value + '\' but got \'' + t.value + '\'')
});
}); it('getDelimiterToken works', () => {
let token = tokenizer.util.getDelimiterToken(')');
if (token.type != 'delimiter')
throw new Error('Incorrect type: Expected \'delimiter\' but got \'' + token.type + '\'')
else if (token.subtype != 'right')
throw new Error('Incorrect subtype: Expected \'right\' but got \'' + token.subtype + '\'')
else if (token.value != 'parenthesis')
throw new Error('Incorrect value: Expected \'parenthesis\' but got \'' + token.value + '\'')
}); it('operatorType works', () => {
assert.equal(tokenizer.util.operatorType('++'), 'left');
assert.equal(tokenizer.util.operatorType(';'), 'none');
assert.equal(tokenizer.util.operatorType('+'), 'dual');
}); it('determineCharType works', () => {
assert.equal(tokenizer.util.determineCharType('+'), 'operator');
assert.equal(tokenizer.util.determineCharType('"'), 'string delimiter');
assert.equal(tokenizer.util.determineCharType('4'), 'digit');
}); it('determineType works', () => {
assert.equal(tokenizer.util.determineType('let'), 'keyword');
assert.equal(tokenizer.util.determineType('dog'), 'unknown');
});
});
}); describe('parser.js', () => {
it('Has a parse child', () => {
assert.equal(parser.hasOwnProperty('parse'), true);
});
it('Has a util child', () => {
assert.equal(parser.hasOwnProperty('util'), true);
});
describe('util', () => {
it('addIndexes works', () => {
let tokens = parser.util.addIndexes([{
type: 'name',
subtype: 'keyword',
value: 'let'
}, {
type: 'name',
subtype: 'variable',
value: 'x'
}]);
let correct = [{
type: 'name',
subtype: 'keyword',
value: 'let',
index: 0
}, {
type: 'name',
subtype: 'variable',
value: 'x',
index: 1
}];
let isCorrect = true;
tokens.forEach((t, i) => {
if (t.type != correct[i].type)
throw new Error('Changed type: Expected \'' + correct[i].type + '\' but got ' + t.type)
else if (t.subtype != correct[i].subtype)
throw new Error('Changed subtype: Expected \'' + correct[i].subtype + '\' but got ' + t.subtype)
else if (t.value != correct[i].value)
throw new Error('Changed value: Expected \'' + correct[i].value + '\' but got ' + t.value)
else if (t.index != correct[i].index)
throw new Error('Incorrect index: Expected \'' + correct[i].index + '\' but got ' + t.index)
});
});
it('addLevels works', () => {
let tokens = parser.util.addLevels([{
type: 'name',
subtype: 'keyword',
value: 'let',
index: 0
}, {
type: 'name',
subtype: 'variable',
value: 'x',
index: 1
}, {
type: 'operator',
subtype: 'dual',
value: '=',
index: 2
}, {
type: 'delimiter',
subtype: 'left',
value: 'parenthesis',
index: 3
}, {
type: 'number',
subtype: 'n/a',
value: '5',
index: 4
}, {
type: 'delimiter',
subtype: 'right',
value: 'parenthesis',
index: 5
}]);
let correct = [{
type: 'name',
subtype: 'keyword',
value: 'let',
index: 0,
level: 0
}, {
type: 'name',
subtype: 'variable',
value: 'x',
index: 1,
level: 0
}, {
type: 'operator',
subtype: 'dual',
value: '=',
index: 2,
level: 0
}, {
type: 'delimiter',
subtype: 'left',
value: 'parenthesis',
index: 3,
level: 1
}, {
type: 'number',
subtype: 'n/a',
value: '5',
index: 4,
level: 1
}, {
type: 'delimiter',
subtype: 'right',
value: 'parenthesis',
index: 5,
level: 1
}];
let isCorrect = true;
tokens.forEach((t, i) => {
if (t.type != correct[i].type)
throw new Error('Changed type: Expected \'' + correct[i].type + '\' but got ' + t.type)
else if (t.subtype != correct[i].subtype)
throw new Error('Changed subtype: Expected \'' + correct[i].subtype + '\' but got \'' + t.subtype + '\'')
else if (t.value != correct[i].value)
throw new Error('Changed value: Expected \'' + correct[i].value + '\' but got \'' + t.value + '\'')
else if (t.index != correct[i].index)
throw new Error('Incorrect index: Expected \'' + correct[i].index + '\' but got \'' + t.index + '\'')
else if (t.level != correct[i].level)
throw new Error('Incorrect level: Expected \'' + correct[i].level + '\' but got \'' + t.level + '\'')
});
});
it('getDeepestLevel works', () => {
let deepestLevel = parser.util.getDeepestLevel([{
type: 'name',
subtype: 'keyword',
value: 'let',
index: 0,
level: 0
}, {
type: 'name',
subtype: 'variable',
value: 'x',
index: 1,
level: 0
}, {
type: 'operator',
subtype: 'dual',
value: '=',
index: 2,
level: 0
}, {
type: 'delimiter',
subtype: 'left',
value: 'parenthesis',
index: 3,
level: 1
}, {
type: 'number',
subtype: 'n/a',
value: '5',
index: 4,
level: 1
}, {
type: 'operator',
subtype: 'dual',
value: '+',
index: 5,
level: 1
}, {
type: 'number',
subtype: 'n/a',
value: '3',
index: 6,
level: 1
},
{
type: 'delimiter',
subtype: 'right',
value: 'parenthesis',
index: 7,
level: 1
}, {
type: 'operator',
subtype: 'none',
value: ';',
index: 8,
level: 0
}
]);
if (deepestLevel != 1)
throw new Error('Incorrect deepestLevel. Expected \'1\' but got \'' + deepestLevel + '\'');
});
});
});
it('getDelimiterToken works', () => {
let token = tokenizer.util.getDelimiterToken(')');
if (token.type != 'delimiter')
throw new Error('Incorrect type: Expected \'delimiter\' but got ' + token.type)
else if (token.subtype != 'right')
throw new Error('Incorrect subtype: Expected \'right\' but got ' + token.subtype)
else if (token.value != 'parenthesis')
throw new Error('Incorrect value: Expected \'parenthesis\' but got ' + token.value)
});
it('operatorType works', () => {
assert.equal(tokenizer.util.operatorType('++'), 'left');
assert.equal(tokenizer.util.operatorType(';'), 'none');
assert.equal(tokenizer.util.operatorType('+'), 'dual');
});
it('determineCharType works', () => {
assert.equal(tokenizer.util.determineCharType('+'), 'operator');
assert.equal(tokenizer.util.determineCharType('"'), 'string delimiter');
assert.equal(tokenizer.util.determineCharType('4'), 'digit');
});
it('determineType works', () => {
assert.equal(tokenizer.util.determineType('let'), 'keyword');
assert.equal(tokenizer.util.determineType('dog'), 'unknown');
});
});
});
describe('parser.js', () => {
});