From 812ec86acaf0cdd6202b12dfc3bc3af8b3b491d5 Mon Sep 17 00:00:00 2001 From: ElementG9 Date: Sat, 5 Oct 2019 21:07:18 -0600 Subject: [PATCH] the parsers finally working dont screw it up --- .gitignore | 1 + src/parser.js | 146 +++++++++++++++++----- src/tokenizer.js | 2 +- test/test.js | 318 +++++++++++++++++++++++++++++++++++++---------- 4 files changed, 366 insertions(+), 101 deletions(-) diff --git a/.gitignore b/.gitignore index 9c4ba90..b23e1a4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ other/ package-lock.json node_modules/ jsdoc/ +ast.json diff --git a/src/parser.js b/src/parser.js index a42a501..7b8e251 100644 --- a/src/parser.js +++ b/src/parser.js @@ -1,44 +1,126 @@ /** * @module parser - * @file Manages the parser phase of Pivot. + * @file Manages the parsing phase of Pivot. * @author Garen Tyler * @requires module:types */ const Token = require('./types.js').Token; -const Group = require('./types.js').Token; +const Group = require('./types.js').Group; +const tokenizer = require('./tokenizer.js'); -module.exports = function(tokens) { +/** + * @function parse + * @desc Takes in an array of tokens, and outputs an AST. + * @param {Token[]} tokens The input tokens. + * @returns {Token[]} The tokens structured in an AST. + * @public + */ +function parse(tokens) { + // Create our output array. + let ast = tokens; + + // Add indexes and levels. + ast = addIndexes(ast); + ast = addLevels(ast); + + // Get the deepest level. + let deepestLevel = getDeepestLevel(ast); + + // Loop through for each level. + for (let currentLevel = deepestLevel; currentLevel > 0; currentLevel--) { + console.log('looping for level ' + currentLevel); + let groupBuffer = []; + for (let j = 0; j < ast.length; j++) { + // Create previousLevel and nextLevel. + // let previousTokenLevel = 0; + // if (typeof ast[j-1] != 'undefined') + // previousTokenLevel = ast[j-1].level; + let nextTokenLevel = 0; + if (typeof ast[j+1] != 'undefined') + nextTokenLevel = ast[j+1].level; + + if (ast[j].level == currentLevel) { + groupBuffer.push(ast[j]); // Add the token to the groupBuffer. + if (ast[j].level > nextTokenLevel) { + let g = new Group(groupBuffer[0].value, groupBuffer); + g.index = g.tokens[0].index; + g.level = g.tokens[0].level - 1; // -1 because the group is on the level below. + ast.splice(g.tokens[0].index, g.tokens.length + 1, g); + j = g.tokens[0].index; + groupBuffer = []; + } + } + + // // Take care of falling edges. + // if (ast[j].level > nextTokenLevel && ast[j].level == currentLevel) { + // // The first item in the group is always a delimiter, steal info from that. + // console.log(groupBuffer[0]); + // } + } + } + + return ast; +} + +/** + * @function addIndexes + * @desc Adds basic indexes to the tokens. + * @param {Token[]} tokens The tokens. + * @returns {Token[]} The tokens with indexes. + * @private + */ +function addIndexes(tokens) { + return tokens.map((t, i) => { + t.index = i; + return t; + }); +} + +/** + * @function addLevels + * @desc Adds basic levels to the tokens. The levels are dependent on the delimiters. + * @param {Token[]} tokens The tokens. + * @returns {Token[]} The tokens with levels. + * @private + */ +function addLevels(tokens) { let level = 0; - // Add level markers. - tokens.forEach((t, i) => { - if (t.type == 'delimiter' && t.subtype == 'left') { - tokens[i].level = level; + tokens = tokens.map((t, i) => { + if (t.type == 'delimiter' && t.subtype == 'left') level++; - } else if (t.type == 'delimiter' && t.subtype == 'right') { + t.level = level; + if (t.type == 'delimiter' && t.subtype == 'right') level--; - tokens[i].level = level; - } else { - tokens[i].level = level; - } + return t; }); - // Group. - let currentLevel = 0; - let groupStack = [0]; - tokens.forEach((t, i) => { - if (currentLevel < t.level) { - tokens.splice(i, 0, new Group(tokens[i - 1].value, [])); - groupStack.push(i); - currentLevel++; - tokens[i].level = currentLevel; - } - if (t.level != 0) { - tokens[groupStack.slice(-1)].tokens.push(t); - } - if (currentLevel > t.level) { - groupStack.pop(); - currentLevel--; - } - }); - if (currentLevel != 0) {} // Error: missing delimiter. - return tokens; + if (level > 0) + throw new SyntaxError('Missing closing delimiter'); + else if (level < 0) + throw new SyntaxError('Missing opening delimiter'); + else + return tokens; +} + +/** + * @function getDeepestLevel + * @desc Finds the deepest level. + * @param {Token[]} tokens The tokens. + * @returns {Number} The deepest level. + * @private + */ +function getDeepestLevel(tokens) { + return tokens.reduce((deepest, t) => { + return t.level > deepest ? t.level : deepest; + }, 0); +} + +module.exports = { + parse, + util: { + addIndexes, + addLevels, + getDeepestLevel + } }; + +require('fs').writeFileSync('ast.json', JSON.stringify(parse(tokenizer.tokenize('let x = (5 + (6 * 2));')), null, 2), () => {}); diff --git a/src/tokenizer.js b/src/tokenizer.js index f61dd43..d60c3f3 100644 --- a/src/tokenizer.js +++ b/src/tokenizer.js @@ -5,7 +5,7 @@ * @requires module:types */ const Token = require('./types.js').Token; -const Group = require('./types.js').Token; +const Group = require('./types.js').Group; /** * @function tokenize diff --git a/test/test.js b/test/test.js index 365ae6d..e1038e4 100644 --- a/test/test.js +++ b/test/test.js @@ -4,6 +4,8 @@ const types = require('../src/types.js'); const tokenizer = require('../src/tokenizer.js'); const parser = require('../src/parser.js'); +console.log(parser.parse(tokenizer.tokenize('let x = (5 + 3);'))); + describe('types.js', () => { it('Has a Token child.', () => { assert.equal(types.hasOwnProperty('Token'), true); @@ -54,73 +56,253 @@ describe('types.js', () => { }); }); describe('tokenizer.js', () => { - it('Has a tokenize child', () => { - assert.equal(tokenizer.hasOwnProperty('tokenize'), true); - }); - it('Has a util child', () => { - assert.equal(tokenizer.hasOwnProperty('util'), true); - }); - describe('util', () => { - it('combineEscapedChars works', () => { - assert.equal(tokenizer.util.combineEscapedChars(`let x = 'test\\nnewline';`.split('')).join(''), `let x = 'test\\nnewline';`); - }); - it('removeComments works', () => { - assert.equal(tokenizer.util.removeComments(`// Comment\nlet i = 0;`.split('')).join(''), `let i = 0;`); - }); - it('changeKeywords works', () => { - let tokens = tokenizer.util.changeKeywords([{ - type: 'name', - subtype: 'variable', - value: 'let' - }, { - type: 'name', - subtype: 'variable', - value: 'x' - }]); - let correct = [{ - type: 'name', - subtype: 'keyword', - value: 'let' - }, { - type: 'name', - subtype: 'variable', - value: 'x' - }]; - let isCorrect = true; - tokens.forEach((t, i) => { - if (t.type != correct[i].type) - throw new Error('Changed type: Expected \''+ correct[i].type +'\' but got ' + t.type) - else if (t.subtype != correct[i].subtype) - throw new Error('Incorrectly changed subtype: Expected \''+ correct[i].subtype +'\' but got ' + t.subtype) - else if (t.value != correct[i].value) - throw new Error('Changed value: Expected \''+ correct[i].value +'\' but got ' + t.value) + it('Has a tokenize child', () => { + assert.equal(tokenizer.hasOwnProperty('tokenize'), true); + }); + it('Has a util child', () => { + assert.equal(tokenizer.hasOwnProperty('util'), true); + }); + describe('util', () => { + it('combineEscapedChars works', () => { + assert.equal(tokenizer.util.combineEscapedChars(`let x = 'test\\nnewline';`.split('')).join(''), `let x = 'test\\nnewline';`); + }); + it('removeComments works', () => { + assert.equal(tokenizer.util.removeComments(`// Comment\nlet i = 0;`.split('')).join(''), `let i = 0;`); + }); + it('changeKeywords works', () => { + let tokens = tokenizer.util.changeKeywords([{ + type: 'name', + subtype: 'variable', + value: 'let' + }, { + type: 'name', + subtype: 'variable', + value: 'x' + }]); + let correct = [{ + type: 'name', + subtype: 'keyword', + value: 'let' + }, { + type: 'name', + subtype: 'variable', + value: 'x' + }]; + let isCorrect = true; + tokens.forEach((t, i) => { + if (t.type != correct[i].type) + throw new Error('Changed type: Expected \'' + correct[i].type + '\' but got \'' + t.type + '\'') + else if (t.subtype != correct[i].subtype) + throw new Error('Incorrectly changed subtype: Expected \'' + correct[i].subtype + '\' but got \'' + t.subtype + '\'') + else if (t.value != correct[i].value) + throw new Error('Changed value: Expected \'' + correct[i].value + '\' but got \'' + t.value + '\'') + }); + }); it('getDelimiterToken works', () => { + let token = tokenizer.util.getDelimiterToken(')'); + if (token.type != 'delimiter') + throw new Error('Incorrect type: Expected \'delimiter\' but got \'' + token.type + '\'') + else if (token.subtype != 'right') + throw new Error('Incorrect subtype: Expected \'right\' but got \'' + token.subtype + '\'') + else if (token.value != 'parenthesis') + throw new Error('Incorrect value: Expected \'parenthesis\' but got \'' + token.value + '\'') + }); it('operatorType works', () => { + assert.equal(tokenizer.util.operatorType('++'), 'left'); + assert.equal(tokenizer.util.operatorType(';'), 'none'); + assert.equal(tokenizer.util.operatorType('+'), 'dual'); + }); it('determineCharType works', () => { + assert.equal(tokenizer.util.determineCharType('+'), 'operator'); + assert.equal(tokenizer.util.determineCharType('"'), 'string delimiter'); + assert.equal(tokenizer.util.determineCharType('4'), 'digit'); + }); it('determineType works', () => { + assert.equal(tokenizer.util.determineType('let'), 'keyword'); + assert.equal(tokenizer.util.determineType('dog'), 'unknown'); + }); + }); + }); describe('parser.js', () => { + it('Has a parse child', () => { + assert.equal(parser.hasOwnProperty('parse'), true); + }); + it('Has a util child', () => { + assert.equal(parser.hasOwnProperty('util'), true); + }); + describe('util', () => { + it('addIndexes works', () => { + let tokens = parser.util.addIndexes([{ + type: 'name', + subtype: 'keyword', + value: 'let' + }, { + type: 'name', + subtype: 'variable', + value: 'x' + }]); + let correct = [{ + type: 'name', + subtype: 'keyword', + value: 'let', + index: 0 + }, { + type: 'name', + subtype: 'variable', + value: 'x', + index: 1 + }]; + let isCorrect = true; + tokens.forEach((t, i) => { + if (t.type != correct[i].type) + throw new Error('Changed type: Expected \'' + correct[i].type + '\' but got ' + t.type) + else if (t.subtype != correct[i].subtype) + throw new Error('Changed subtype: Expected \'' + correct[i].subtype + '\' but got ' + t.subtype) + else if (t.value != correct[i].value) + throw new Error('Changed value: Expected \'' + correct[i].value + '\' but got ' + t.value) + else if (t.index != correct[i].index) + throw new Error('Incorrect index: Expected \'' + correct[i].index + '\' but got ' + t.index) + }); + }); + it('addLevels works', () => { + let tokens = parser.util.addLevels([{ + type: 'name', + subtype: 'keyword', + value: 'let', + index: 0 + }, { + type: 'name', + subtype: 'variable', + value: 'x', + index: 1 + }, { + type: 'operator', + subtype: 'dual', + value: '=', + index: 2 + }, { + type: 'delimiter', + subtype: 'left', + value: 'parenthesis', + index: 3 + }, { + type: 'number', + subtype: 'n/a', + value: '5', + index: 4 + }, { + type: 'delimiter', + subtype: 'right', + value: 'parenthesis', + index: 5 + }]); + let correct = [{ + type: 'name', + subtype: 'keyword', + value: 'let', + index: 0, + level: 0 + }, { + type: 'name', + subtype: 'variable', + value: 'x', + index: 1, + level: 0 + }, { + type: 'operator', + subtype: 'dual', + value: '=', + index: 2, + level: 0 + }, { + type: 'delimiter', + subtype: 'left', + value: 'parenthesis', + index: 3, + level: 1 + }, { + type: 'number', + subtype: 'n/a', + value: '5', + index: 4, + level: 1 + }, { + type: 'delimiter', + subtype: 'right', + value: 'parenthesis', + index: 5, + level: 1 + }]; + let isCorrect = true; + tokens.forEach((t, i) => { + if (t.type != correct[i].type) + throw new Error('Changed type: Expected \'' + correct[i].type + '\' but got ' + t.type) + else if (t.subtype != correct[i].subtype) + throw new Error('Changed subtype: Expected \'' + correct[i].subtype + '\' but got \'' + t.subtype + '\'') + else if (t.value != correct[i].value) + throw new Error('Changed value: Expected \'' + correct[i].value + '\' but got \'' + t.value + '\'') + else if (t.index != correct[i].index) + throw new Error('Incorrect index: Expected \'' + correct[i].index + '\' but got \'' + t.index + '\'') + else if (t.level != correct[i].level) + throw new Error('Incorrect level: Expected \'' + correct[i].level + '\' but got \'' + t.level + '\'') + }); + }); + it('getDeepestLevel works', () => { + let deepestLevel = parser.util.getDeepestLevel([{ + type: 'name', + subtype: 'keyword', + value: 'let', + index: 0, + level: 0 + }, { + type: 'name', + subtype: 'variable', + value: 'x', + index: 1, + level: 0 + + }, { + type: 'operator', + subtype: 'dual', + value: '=', + index: 2, + level: 0 + }, { + type: 'delimiter', + subtype: 'left', + value: 'parenthesis', + index: 3, + level: 1 + }, { + type: 'number', + subtype: 'n/a', + value: '5', + index: 4, + level: 1 + }, { + type: 'operator', + subtype: 'dual', + value: '+', + index: 5, + level: 1 + }, { + type: 'number', + subtype: 'n/a', + value: '3', + index: 6, + level: 1 + }, + { + type: 'delimiter', + subtype: 'right', + value: 'parenthesis', + index: 7, + level: 1 + }, { + type: 'operator', + subtype: 'none', + value: ';', + index: 8, + level: 0 + } + ]); + if (deepestLevel != 1) + throw new Error('Incorrect deepestLevel. Expected \'1\' but got \'' + deepestLevel + '\''); + }); }); }); - it('getDelimiterToken works', () => { - let token = tokenizer.util.getDelimiterToken(')'); - if (token.type != 'delimiter') - throw new Error('Incorrect type: Expected \'delimiter\' but got ' + token.type) - else if (token.subtype != 'right') - throw new Error('Incorrect subtype: Expected \'right\' but got ' + token.subtype) - else if (token.value != 'parenthesis') - throw new Error('Incorrect value: Expected \'parenthesis\' but got ' + token.value) - }); - it('operatorType works', () => { - assert.equal(tokenizer.util.operatorType('++'), 'left'); - assert.equal(tokenizer.util.operatorType(';'), 'none'); - assert.equal(tokenizer.util.operatorType('+'), 'dual'); - }); - it('determineCharType works', () => { - assert.equal(tokenizer.util.determineCharType('+'), 'operator'); - assert.equal(tokenizer.util.determineCharType('"'), 'string delimiter'); - assert.equal(tokenizer.util.determineCharType('4'), 'digit'); - }); - it('determineType works', () => { - assert.equal(tokenizer.util.determineType('let'), 'keyword'); - assert.equal(tokenizer.util.determineType('dog'), 'unknown'); - }); - }); -}); -describe('parser.js', () => { - -});