diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..dfe0770 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto diff --git a/.gitignore b/.gitignore index f5c6948..ea8c4bf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1 @@ -package-lock.json -node_modules/ -jsdoc/ -ast.json -target/ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..ed4741f --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,5 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "pivot" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..61281f9 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "pivot" +version = "0.1.0" +authors = ["ElementG9 "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/LICENSE b/LICENSE index ab97016..f7297e7 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019 Garen Tyler +Copyright (c) 2020 Garen Tyler Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/bin/pivot.js b/bin/pivot.js deleted file mode 100755 index 0fd7c3d..0000000 --- a/bin/pivot.js +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env node -const args = process.argv.slice(2); -const tokenizer = require('../src/tokenizer.js'); -const parser = require('../src/parser.js'); -const code = require('../src/code.js'); - -if (typeof args[0] != 'undefined') { - // Execute from file. -} else { // REPL. - const rl = require('readline-sync'); - const exec = require('child_process').exec; - function repl(prompt, func) { - let answer; - while (answer != 'exit') { - answer = rl.question(prompt); - if (answer == 'exit') - process.exit(0); - if (answer == 'clear') { - console.clear(); - continue; - } - func(answer); - } - } - console.log('Welcome to Pivot v0.2.0 Alpha.'); - console.log('Type \'exit\' to exit.'); - let data = { - log: "console.log" - }; - repl('> ', (answer) => { - let jsAnswer = code.translate(parser.parse(tokenizer.tokenize(answer)), data); - data = jsAnswer.data; - // console.log(require('util').inspect(jsAnswer, { depth: null })); - eval(jsAnswer.code); - }); -} diff --git a/doc/operators.txt b/doc/operators.txt deleted file mode 100644 index b0c4fb3..0000000 --- a/doc/operators.txt +++ /dev/null @@ -1,82 +0,0 @@ -A higher precedence operator becomes an operand for a lower precedence one. -Higher precedence operators get resolved first. - -Operators in Pivot: -+-------------+------------+----------------------------------+--------------------+-----------------+ -| Precedence | Operator | Description | Operands | Associativity | -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 16 | () | Grouping | internal | n/a | grouping() -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 15 | . | Member Access | dual | Left to Right | memberAccess() -| 15 | [] | Computed Member Access | before, internal | Left to Right | computedMemberAccess() -| 15 | () | Function Call | before, internal | Left to Right | functionCall() -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 14 | let | Variable Creation | after | Right to Left | keywords() -| 14 | const | Constant Creation | after | Right to Left | keywords() -| 14 | new | Object Creation | after | Right to Left | keywords() -| 14 | return | Function Return | after | n/a | keywords() -| 14 | () {} | Function Creation | internal | n/a | functionCreation() -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 13 | ++ | Postfix Increment | before | Left to Right | postfixOperators() -| 13 | -- | Postfix Decrement | before | Left to Right | postfixOperators() -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 12 | ! | Logical NOT | after | Right to Left | prefixOperators() -| 12 | - | Unary Negation | after | Right to Left | prefixOperators() -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 11 | ** | Exponentiation | dual | Right to Left | mathOperators(0) -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 10 | * | Multiplication | dual | Left to Right | mathOperators(1) -| 10 | / | Division | dual | Left to Right | mathOperators(1) -| 10 | % | Modulus | dual | Left to Right | mathOperators(1) -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 9 | + | Addition | dual | Left to Right | mathOperators(2) -| 9 | - | Subtraction | dual | Left to Right | mathOperators(2) -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 7 | < | Less Than | dual | Left to Right | comparisonOperators() -| 7 | <= | Less Than or Equal | dual | Left to Right | comparisonOperators() -| 7 | > | Greater Than | dual | Left to Right | comparisonOperators() -| 7 | >= | Greater Than or Equal | dual | Left to Right | comparisonOperators() -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 6 | == | Equality | dual | Left to Right | assign() -| 6 | != | Inequality | dual | Left to Right | assign() -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 4 | && | Logical AND | dual | Left to Right | logicOperators() -| 4 | ^^ | Logical XOR | dual | Left to Right | logicOperators() -| 4 | || | Logical OR | dual | Left to Right | logicOperators() -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 3 | = | Assignment | dual | Right to Left | opAssign() -| 3 | += | Add and Assign | dual | Right to Left | opAssign() -| 3 | -= | Subtract and Assign | dual | Right to Left | opAssign() -| 3 | **= | Exponentiate and Assign | dual | Right to Left | opAssign() -| 3 | *= | Multiply and Assign | dual | Right to Left | opAssign() -| 3 | /= | Divide and Assign | dual | Right to Left | opAssign() -| 3 | %= | Modulo and Assign | dual | Right to Left | opAssign() -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 2 | , | Comma | none | Left to Right | -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 1 | ; | Statement End | before | Left to Right | -+-------------+------------+----------------------------------+--------------------+-----------------+ - -Possible Operators in Pivot: -+-------------+------------+----------------------------------+--------------------+-----------------+ -| Precedence | Operator | Description | Operands | Associativity | -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 12 | ~ | Bitwise NOT | after | Right to Left | -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 8 | << | Bitwise Left Shift | dual | Left to Right | -| 8 | >> | Bitwise Right Shift | dual | Left to Right | -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 5 | and | Logical AND | dual | Left to Right | -| 5 | xor | Logical XOR | dual | Left to Right | -| 5 | or | Logical OR | dual | Left to Right | -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 5 | & | Bitwise AND | dual | Left to Right | -| 5 | ^ | Bitwise XOR | dual | Left to Right | -| 5 | | | Bitwise OR | dual | Left to Right | -+-------------+------------+----------------------------------+--------------------+-----------------+ -| 3 | <<= | Bitwise Left Shift and Assign | dual | Right to Left | -| 3 | >>= | Bitwise Right Shift and Assign | dual | Right to Left | -| 3 | &= | Bitwise AND and Assign | dual | Right to Left | -| 3 | ^= | Bitwise XOR and Assign | dual | Right to Left | -| 3 | |= | Bitwise OR and Assign | dual | Right to Left | -+-------------+------------+----------------------------------+--------------------+-----------------+ diff --git a/other/repl.js b/other/repl.js deleted file mode 100644 index e8cd9dd..0000000 --- a/other/repl.js +++ /dev/null @@ -1,11 +0,0 @@ -const rl = require('readline-sync'); -function repl(prompt, func) { - let answer; - while (answer != 'exit') { - answer = rl.question(prompt); - if (answer == 'exit') - continue; - func(answer); - } -} -module.exports = repl; diff --git a/other/rpn.js b/other/rpn.js deleted file mode 100644 index e490609..0000000 --- a/other/rpn.js +++ /dev/null @@ -1,43 +0,0 @@ -// This is an unrelated file to Pivot. -// I wanted to put this in the repository in order to test another system of operators. -// This is reverse polish notation, implemented in JavaScript. -var solve = (exp) => { - var stack = []; - var expression = exp.split(" "); - for (var j = 0; j < expression.length; j++) { - var key = expression[j]; - if (key.match(/\d/)) { - stack.push(parseInt(key)); - } else if (key.match(/\w/)) { - if (Object.keys(progData).includes(key)) { - stack.push(progData[key]); - } else { - stack.push(key); - } - } - switch (key) { - case "+": // add - var opItems = stack.splice(stack.length - 2, 2); - var result = opItems[0] + opItems[1]; - stack.push(result); - break; - case "-": // subtract - var opItems = stack.splice(stack.length - 2, 2); - var result = opItems[0] - opItems[1]; - stack.push(result); - break; - case "*": // multiply - var opItems = stack.splice(stack.length - 2, 2); - var result = opItems[0] * opItems[1]; - stack.push(result); - break; - case "/": // divide - var opItems = stack.splice(stack.length - 2, 2); - var result = opItems[0] / opItems[1]; - stack.push(result); - break; - } - } - return stack; -}; -module.exports = solve; diff --git a/other/shuntingyard.js b/other/shuntingyard.js deleted file mode 100644 index 12bf33b..0000000 --- a/other/shuntingyard.js +++ /dev/null @@ -1,178 +0,0 @@ -const repl = require('./repl.js'); -// My implementation of the algorithm from https://en.wikipedia.org/wiki/Shunting-yard_algorithm. -function shuntingYardSolve(exp) { - exp = exp.split(''); - // Remove whitespace. - exp = exp.filter(e => /\S+/.test(e)); - let output = []; - let opStack = []; - let inNumber = false; - let numStack = ''; - for (let i = 0; i < exp.length; i++) { - if (/\d|\./.test(exp[i])) { - if (inNumber) - numStack += exp[i]; - else { - inNumber = true; - numStack = exp[i]; - } - } else if (/\w/.test(exp[i])) { - if (inNumber) { - inNumber = false; - output.push((numStack)); - numStack = ''; - } - output.push(exp[i]); - } else if (/\+|-|\*|\/|=|\^/.test(exp[i])) { - if (inNumber) { - inNumber = false; - output.push((numStack)); - numStack = ''; - } - if (exp[i] == '-') { - if (i == 0) - exp[i] = ':'; - else if (exp[i - 1] == '(' || /\+|-|\*|\/|=|\^/.test(exp[i - 1])) - exp[i] = ':'; - } - let op = { - value: exp[i] - }; - if (exp[i] == ':' || exp[i] == '^') - op.precedence = 4; - else if (exp[i] == '*' || exp[i] == '/') - op.precedence = 3; - else if (exp[i] == '+' || exp[i] == '-') - op.precedence = 2; - else if (exp[i] == '=') - op.precedence = 1; - if (typeof opStack.slice(-1)[0] != 'undefined') - while (opStack.slice(-1)[0].precedence > op.precedence && opStack.slice(-1)[0].value != '(') - output.push(opStack.pop().value); - opStack.push(op); - } else if (exp[i] == '(') { - if (inNumber) { - inNumber = false; - output.push((numStack)); - numStack = ''; - } - opStack.push({ - value: exp[i], - precedence: 5 - }); - } else if (exp[i] == ')') { - if (inNumber) { - inNumber = false; - output.push((numStack)); - numStack = ''; - } - if (typeof opStack.slice(-1)[0] != 'undefined') { - while (opStack.slice(-1)[0].value != '(') - output.push(opStack.pop().value); - if (opStack.slice(-1)[0].value == '(') - opStack.pop(); - } else throw new SyntaxError('Mismatched parentheses') - } - } - if (numStack.length > 0) - output.push((numStack)); - while (opStack.length > 0) - output.push(opStack.pop().value); - return output; -} -// Reverse Polish notation implemented in JavaScript. -function rpnSolve(exp, data) { - let stack = []; - for (let i = 0; i < exp.length; i++) { - let key = exp[i]; - if (key.match(/\d|[A-Za-z]/)) - stack.push((key)); - let opItems; - let result; - switch (key) { - case '+': // Add. - opItems = stack.splice(stack.length - 2, 2); - opItems = opItems.map(item => { - if (typeof data[item] != 'undefined') - return parseFloat(data[item]); - else return parseFloat(item); - }); - result = opItems[0] + opItems[1]; - stack.push(result); - break; - case '-': // Subtract. - opItems = stack.splice(stack.length - 2, 2); - opItems = opItems.map(item => { - if (typeof data[item] != 'undefined') - return parseFloat(data[item]); - else return parseFloat(item); - }); - result = opItems[0] - opItems[1]; - stack.push(result); - break; - case '*': // Multiply - opItems = stack.splice(stack.length - 2, 2); - opItems = opItems.map(item => { - if (typeof data[item] != 'undefined') - return parseFloat(data[item]); - else return parseFloat(item); - }); - result = opItems[0] * opItems[1]; - stack.push(result); - break; - case '/': // Divide - opItems = stack.splice(stack.length - 2, 2); - opItems = opItems.map(item => { - if (typeof data[item] != 'undefined') - return parseFloat(data[item]); - else return parseFloat(item); - }); - result = opItems[0] / opItems[1]; - stack.push(result); - break; - case '^': // Exponentiation - opItems = stack.splice(stack.length - 2, 2); - opItems = opItems.map(item => { - if (typeof data[item] != 'undefined') - return parseFloat(data[item]); - else return parseFloat(item); - }); - result = opItems[0] ** opItems[1]; - stack.push(result); - break; - case ':': // Unary negation - opItems = stack.splice(stack.length - 1, 1); - opItems = opItems.map(item => { - if (typeof data[item] != 'undefined') - return parseFloat(data[item]); - else return parseFloat(item); - }); - result = -opItems[0]; - stack.push(result); - break; - case '=': // Assignment - opItems = stack.splice(stack.length - 2, 2); - data[opItems[0]] = parseFloat(opItems[1]); - stack.push(parseFloat(opItems[1])); - break; - } - } - return { - result: stack[0], - data: data - }; -}; - -function solve(exp) { - let data = {}; - exp = exp.split(';'); - exp.forEach(e => { - console.log(shuntingYardSolve(e).join(' ')); - let result = rpnSolve(shuntingYardSolve(e), data); - data = result.data; - console.log(result); - }); -}; - -console.log('Math Solver by ElementG9:'); -repl('> ', solve); diff --git a/other/temp.js b/other/temp.js deleted file mode 100644 index 69b636d..0000000 --- a/other/temp.js +++ /dev/null @@ -1,178 +0,0 @@ -function isLetter(char) { - return /[A-Za-z]/.test(char); -} -function isOperator(char) { - return /\+|\-|\*|\/|\=|\=\=|\>|\<|\>\=|\<\=|\=\>|\;/.test(char); -} -function isDigit(char) { - return /\d/.test(char); -} -function isWhitespace(char) { - return /\s/.test(char); -} -function Token(type, subtype, value) { - this.type = type; - this.subtype = subtype; - this.value = value; -} -function opType(operator) { - if (false) - return 'left'; - else if (false) - return 'right'; - else if (/\;/.test(operator)) - return 'none'; - else - return 'dual'; -} -function isKeyword(value) { - return value == 'let'; -} -function oldTokenize(rawCode) { - let chars = rawCode.split(''); - // Create letter, operator, number, and string buffers. - let lb = []; - let ob = []; - let nb = []; - let sb = []; - // Create token array. - let tokens = []; - let stringData = { - inString: false, - stringType: null - }; - // Check for comments. - for (let i = 0; i < chars.length - 1; i++) { - let char = chars[i]; - if (char == '/' && chars[i + 1] == '/') { - // Remove the characters from the comment. - chars.splice(i, chars.indexOf('\n', i) - i); - // Adjust the index accordingly. - i -= chars.indexOf('\n', i) - i; - } - if (char == '/' && chars[i + 1] == '*') { - // If multiline comment /* find the next */ - let endindex; - for (let j = 0; j < chars.length - 1; j++) { - if (chars[j] == '*' && chars[j + 1] == '/') { - endindex = j + 2; - console.log(chars.splice(i, (j + 2) - i)); - } - } - // Adjust the index accordingly. - i -= endindex - i; - } - } - // Check for characters to be escaped. - for (let i = 0; i < chars.length; i++) { - let char = chars[i]; - if ((char == '\\') && (i + 1 != chars.length)) { - chars.splice(i, 2, `${char}${chars[i+1]}`); - continue; - } - } - // Loop through all the characters. - for (let i = 0; i < chars.length; i++) { - let char = chars[i]; - // Behave differently in/out of a string. - if (stringData.inString) { - if (char == `'`) { - stringData.inString = false; - tokens.push(new Token('string', 'n/a', sb.join(''))); - sb = []; - } else { - sb.push(char); - } - } else { - if (char == `'`) { - stringData.inString = true; - } else { - // Parsing chars, ignoring strings. - if (isLetter(char)) { - lb.push(char); - if (ob.length > 0) { - let op = ob.join(''); - tokens.push(new Token('operator', opType(op), op)); - ob = []; - } - if (nb.length > 0) { - tokens.push(new Token('number', 'n/a', nb.join(''))); - nb = []; - } - } else if (isOperator(char)) { - ob.push(char); - if (lb.length > 0) { - tokens.push(new Token('variable', 'n/a', lb.join(''))); - lb = []; - } - if (nb.length > 0) { - tokens.push(new Token('number', 'n/a', nb.join(''))); - nb = []; - } - } else if (isDigit(char)) { - nb.push(char); - if (lb.length > 0) { - tokens.push(new Token('variable', 'n/a', lb.join(''))); - lb = []; - } - if (ob.length > 0) { - let op = ob.join(''); - tokens.push(new Token('operator', opType(op), op)); - ob = []; - } - } else if (isWhitespace(char)) { - // Close all buffers. - if (lb.length > 0) { - tokens.push(new Token('variable', 'n/a', lb.join(''))); - lb = []; - } - if (ob.length > 0) { - let op = ob.join(''); - tokens.push(new Token('operator', opType(op), op)); - ob = []; - } - if (nb.length > 0) { - tokens.push(new Token('number', 'n/a', nb.join(''))); - nb = []; - } - } else if (isDelimiter(char)) { - // Close all buffers. - if (lb.length > 0) { - tokens.push(new Token('variable', 'n/a', lb.join(''))); - lb = []; - } - if (ob.length > 0) { - let op = ob.join(''); - tokens.push(new Token('operator', opType(op), op)); - ob = []; - } - if (nb.length > 0) { - tokens.push(new Token('number', 'n/a', nb.join(''))); - nb = []; - } - // Categorize and push. - if (char == '(' || char == ')') { - tokens.push(new Token('delimiter', char == '(' ? 'left' : 'right', 'parenthesis')); - } else if (char == '[' || char == ']') { - tokens.push(new Token('delimiter', char == '[' ? 'left' : 'right', 'bracket')); - } else if (char == '{' || char == '}') { - tokens.push(new Token('delimiter', char == '{' ? 'left' : 'right', 'brace')); - } - } - } - } - } - // Check for keywords. - for (let i = 0; i < tokens.length; i++) { - if (tokens[i].type == 'variable' && isKeyword(tokens[i].value)) { - tokens[i].type = 'keyword'; - } - } - // Add indexes. - let layer = 0; - for (let i = 0; i < tokens.length; i++) { - tokens[i].index = i; - } - return tokens; -}; -console.log(oldTokenize(`let x = 'it\\'s cool outside';`)); diff --git a/package.json b/package.json deleted file mode 100644 index ecf7feb..0000000 --- a/package.json +++ /dev/null @@ -1,40 +0,0 @@ -{ - "name": "pivot-lang", - "version": "0.2.0", - "description": "Pivot is a new programming language built on JavaScript", - "main": "./bin/pivot.js", - "directories": { - "test": "test" - }, - "scripts": { - "test": "mocha", - "build": "npm run test; jsdoc src/ -r -d ./jsdoc/ -R ./readme.md" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/ElementG9/Pivot.git" - }, - "keywords": [ - "pivot", - "javascript", - "programming", - "language" - ], - "author": "Garen Tyler ", - "license": "MIT", - "bugs": { - "url": "https://github.com/ElementG9/Pivot/issues" - }, - "homepage": "https://github.com/ElementG9/Pivot#readme", - "bin": { - "pivot": "./bin/pivot.js" - }, - "preferGlobal": true, - "dependencies": { - "fs": "0.0.1-security", - "readline-sync": "^1.4.10" - }, - "devDependencies": { - "mocha": "^6.2.2" - } -} diff --git a/src/code.js b/src/code.js deleted file mode 100644 index 57624a2..0000000 --- a/src/code.js +++ /dev/null @@ -1,41 +0,0 @@ -/** - * @module code - * @file Runs the code / transpiles the code to JavaScript - * @author Garen Tyler - */ - - /** - * @function translate - * @desc Translates the code to JS, given an AST - * @param {Token[]} ast The ast. - * @returns {String} The JS code. - * @public - */ -function translate(ast, data) { - let out = ''; - for (let i = 0; i < ast.length; i++) { - if (ast[i].type == 'operator' && ast[i].subtype == 'function call') { - let temp = ''; - if (!(Object.keys(data).indexOf(ast[i].operands[0].value) > -1)) - throw new ReferenceError(`Undefined function ${ast[i].operands[0].value}`); - else temp += data[ast[i].operands[0].value]; - temp += '('; - for (let j = 0; j < ast[i].operands[1].tokens.length; j++) { - if (j != 0) - temp += ', '; - if (ast[i].operands[1].tokens[j].type == 'string') - temp += `"${ast[i].operands[1].tokens[j].value}"`; - } - temp += ');' - out += temp; - } - } - return { - data, - code: out - }; -} - -module.exports = { - translate -}; diff --git a/src/interpreter.rs b/src/interpreter.rs new file mode 100644 index 0000000..02cc0a2 --- /dev/null +++ b/src/interpreter.rs @@ -0,0 +1,37 @@ +use super::parser::*; + +pub fn interpret(ast: Program) { + for stmt in ast { + use Statement::*; + match stmt { + FunctionCall { name, arguments } => { + let name: &str = &name; + // println!("{} called!", name); + match name { + "log" => { + let mut args: Vec = vec![]; + for a in arguments { + match a { + Expression::Literal(literal) => match literal { + Literal::StringLiteral(s) => args.push(s), + Literal::IntLiteral(i) => args.push(format!("{}i", i)), + Literal::FloatLiteral(f) => { + args.push(format!("{}.{}f", f.trunc(), f.fract())) + } + }, + Expression::Null => { + args.push("null".to_owned()); + } + } + } + println!("{}", args.join(", ")); + } + _ => {} + } + } + Nop => { + // println!("No-op!"); + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..258eba4 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,15 @@ +pub mod interpreter; +pub mod parser; +pub mod tokenizer; + +pub fn interpret(source: &str) { + let tokens = tokenizer::tokenize(source); + // println!("{:#?}", tokens); + let ast = parser::parse(tokens); + // println!("{:#?}", ast); + interpreter::interpret(ast); +} +pub fn interpret_file(filename: &str) { + let src = std::fs::read_to_string(filename).unwrap(); + interpret(&src); +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..ee6d8c1 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,8 @@ +fn main() { + let args = std::env::args().collect::>(); + if args.len() < 2 { + println!("Usage: pivot "); + } else { + pivot::interpret_file(&args[1]); + } +} diff --git a/src/parser.js b/src/parser.js deleted file mode 100644 index ed260f5..0000000 --- a/src/parser.js +++ /dev/null @@ -1,421 +0,0 @@ -/** - * @module parser - * @file Manages the parsing phase of Pivot. - * @author Garen Tyler - * @requires module:types - */ -const Group = require('./types.js').Group; -const Operator = require('./types.js').Operator; - -/** - * @function parse - * @desc Takes in an array of tokens, and outputs an AST. - * @param {Token[]} tokens The input tokens. - * @returns {Token[]} The tokens structured in an AST. - * @public - */ -function parse(tokens) { - // Create our output array. - let ast = tokens; - - // Add indexes and levels. - ast = addIndexes(ast); - ast = addLevels(ast); - - // Start grouping by precedence. - // Precedence 16. - ast = grouping(ast); - // Precedence 15. - ast = memberAccess(ast); - ast = computedMemberAccess(ast); - ast = functionCall(ast); - // Precedence 14. - ast = keywords(ast); - ast = functionCreation(ast); - // Precedence 13. - ast = postfixOperators(ast); - // Precedence 12. - ast = prefixOperators(ast); - // Precedence 11. - ast = mathOperators(ast, 0); // Level 0 math operators: **. - // Precedence 10. - ast = mathOperators(ast, 1); // Level 1 math operators: *, /, %. - // Precedence 9. - ast = mathOperators(ast, 2); // Level 2 math operators: +, -. - // Precedence 7. - // ast = comparisonOperators(ast); - // Precedence 6. - // ast = assign(ast); - // Precedence 4. - // ast = logicOperators(ast); - // Precedence 3. - // ast = opAssign(ast); - return ast; -} - -/** - * @function addIndexes - * @desc Adds basic indexes to the tokens. - * @param {Token[]} tokens The tokens. - * @returns {Token[]} The tokens with indexes. - * @private - */ -function addIndexes(tokens) { - return tokens.map((t, i) => { - t.index = i; - return t; - }); -} - -/** - * @function addLevels - * @desc Adds basic levels to the tokens. The levels are dependent on the delimiters. - * @param {Token[]} tokens The tokens. - * @returns {Token[]} The tokens with levels. - * @private - */ -function addLevels(tokens) { - let level = 0; - tokens = tokens.map((t, i) => { - if (t.type == 'delimiter' && t.subtype == 'left') - level++; - t.level = level; - if (t.type == 'delimiter' && t.subtype == 'right') - level--; - return t; - }); - if (level > 0) - throw new SyntaxError('Missing closing delimiter'); - else if (level < 0) - throw new SyntaxError('Missing opening delimiter'); - else - return tokens; -} - -/** - * @function getDeepestLevel - * @desc Finds the deepest level of the ast. - * @param {Token[]} tokens The tokens. - * @returns {Number} The deepest level. - * @private - */ -function getDeepestLevel(tokens) { - return tokens.reduce((deepest, t) => { - return t.level > deepest ? t.level : deepest; - }, 0); -} - -/** - * @function grouping - * @desc Combine groups of tokens by delimiter. - * @param {Token[]} tokens The tokens. - * @returns {Token[]} The grouped tokens, or the basic ast. - * @private - */ -function grouping(tokens) { - // Get the deepest level. - let deepestLevel = getDeepestLevel(tokens); - let groupBuffer; - let opening; - // Group the deepest levels first. - for (let currentLevel = deepestLevel; currentLevel > 0; currentLevel--) { - groupBuffer = []; // Overwrite groupBuffer. - opening = null; // Overwrite opening. - for (let i = 0; i < tokens.length; i++) { - if (tokens[i].level == currentLevel) { - if (groupBuffer.length == 0) - opening = tokens[i]; - groupBuffer.push(tokens[i]); - if (typeof tokens[i + 1] == 'undefined' || (tokens[i].type == 'delimiter' && tokens[i].subtype == 'right' && tokens[i].value == opening.value)) { // The end of the tokens. - let g = new Group(groupBuffer[0].value, groupBuffer); - g.index = g.tokens[0].index; - g.level = g.tokens[0].level - 1; // -1 because the group is on the level below. - let length = g.tokens.length; // Keep track of how many tokens there are before removing the delimiters. - g.tokens = g.tokens.splice(1, g.tokens.length - 2); // Remove the delimiters in g.tokens. - i -= length - 1; // Reset the counter. - tokens.splice(i, length, g); // Replace the tokens with the new group. - // Reset groupBuffer and opening. - groupBuffer = []; - opening = null; - } - } - } - } - return tokens; -} - -/** - * @function memberAccess - * @desc Combine groups of tokens by member access. - * @param {Token[]} tokens The tokens. - * @returns {Token[]} The ast with grouped member access. - * @private - */ -function memberAccess(ast) { - for (let i = 0; i < ast.length; i++) { - if (ast[i].type == 'group') - ast[i].tokens = memberAccess(ast[i].tokens); // Recursively order the groups. - else if (ast[i].type == 'operator' && ast[i].value == '.') { // Member access operator. - if (typeof ast[i - 1] == 'undefined' || typeof ast[i + 1] == 'undefined') - throw new SyntaxError('Operator requires two operands.'); - let op = new Operator(ast[i].subtype, ast[i].value, [ast[i - 1], ast[i + 1]]); - op.index = ast[i - 1].index; - op.level = ast[i].level; - ast.splice(i - 1, 3, op); - i--; // Removed 3 tokens, put in 1, skip 1 token. Reduce the counter by 1. - } - } - return ast; -} - -/** - * @function computedMemberAccess - * @desc Combine groups of tokens by computed member access. - * @param {Token[]} tokens The tokens. - * @returns {Token[]} The ast with grouped computed member access. - * @private - */ -function computedMemberAccess(ast) { - // Computed member access is Variable, Bracket Group. - for (let i = 0; i < ast.length; i++) { - if (ast[i].type == 'group') - ast[i].tokens = computedMemberAccess(ast[i].tokens); // Recursively order the groups. - else if (ast[i].type == 'name' && ast[i].subtype == 'variable') { // Member access operator. - if (typeof ast[i + 1] == 'undefined') - continue; // Nothing after the variable; skip this loop. - if (ast[i + 1].type == 'group' && ast[i + 1].subtype == 'bracket') { - ast[i + 1].tokens = computedMemberAccess(ast[i + 1].tokens); // Order the group that we care about before we mess with it. - let op = new Operator('n/a', 'member access', [ast[i], ast[i + 1]]); - op.index = ast[i].index; - op.level = ast[i].level; - ast.splice(i, 2, op); - // Removed 2 tokens, put in 1, skip 1 token. Don't reduce the counter. - } else continue; // Not what we need. - } - } - return ast; -} - -/** - * @function functionCall - * @desc Combine groups of tokens by function calls. - * @param {Token[]} tokens The tokens. - * @returns {Token[]} The ast with grouped function calls. - * @private - */ -function functionCall(ast) { - // Function call is Variable, Parenthesis Group. - for (let i = 0; i < ast.length; i++) { - if (ast[i].type == 'group') - ast[i].tokens = functionCall(ast[i].tokens); // Recursively order the groups. - else if ((ast[i].type == 'name' && ast[i].subtype == 'variable') || // Normal function call - (ast[i].type == 'operator' && (ast[i].value == '.' || ast[i].value == 'member access'))) { // Function call in member access. Example: console.log() - if (typeof ast[i + 1] == 'undefined') - continue; // Nothing after the variable; skip this loop. - if (ast[i + 1].type == 'group' && ast[i + 1].subtype == 'parenthesis') { - ast[i + 1].tokens = functionCall(ast[i + 1].tokens); // Order the group that we care about before we mess with it. - let op = new Operator('function call', ast[i].value, [ast[i], ast[i + 1]]); - op.index = ast[i].index; - op.level = ast[i].level; - ast.splice(i, 2, op); - // Removed 2 tokens, put in 1, skip 1 token. Don't reduce the counter. - } else continue; // Not what we need. - } - } - return ast; -} - -/** - * @function keywords - * @desc Combine groups of tokens by keywords. - * @param {Token[]} tokens The tokens. - * @returns {Token[]} The ast with grouped keywords. - * @private - */ -function keywords(ast) { - for (let i = ast.length - 1; i >= 0; i--) { // Keywords are rtl associative, so loop backwards. - if (ast[i].type == 'group') - ast[i].tokens = keywords(ast[i].tokens); // Recursively order the groups. - else if (ast[i].type == 'name' && ast[i].subtype == 'keyword') { - if (typeof ast[i + 1] == 'undefined') - throw new SyntaxError('Keyword requires one operand after it.'); - let key = new Operator('keyword', ast[i].value, [ast[i + 1]]); - key.level = ast[i].level; - key.index = ast[i].index; - ast.splice(i, 2, key); - // Looping backwards and didn't remove any items before the current one. Don't reduce the counter. - } - } - return ast; -} - -/** - * @function functionCreation - * @desc Combine groups of tokens by function creation. - * @param {Token[]} tokens The tokens. - * @returns {Token[]} The ast with grouped function creation. - * @private - */ -function functionCreation(ast) { - // Function call is Parenthesis Group, Brace Group. - for (let i = 0; i < ast.length; i++) { - if (ast[i].type == 'group') - ast[i].tokens = functionCreation(ast[i].tokens); // Recursively order the groups. - if (typeof ast[i + 1] == 'undefined') - continue; // Skip this loop. - if ((ast[i].type == 'group' && ast[i].subtype == 'parenthesis') && (ast[i + 1].type == 'group' && ast[i + 1].subtype == 'brace')) { - // Parenthesis group followed by brace group. - ast[i + 1].tokens = functionCreation(ast[i + 1].tokens); // Order the other group before we do anything. - let op = new Operator('function creation', 'n/a', [ast[i], ast[i + 1]]); - op.index = ast[i].index; - op.level = ast[i].level; - ast.splice(i, 2, op); - } - } - return ast; -} - -/** - * @function postfixOperators - * @desc Recursively structures the postfix operators. - * @param {Token[]} ast The ast. - * @returns {Token[]} The ast with structured postfix operators. - * @private - */ -function postfixOperators(ast) { - for (let i = 0; i < ast.length; i++) { - // Take care of the tokens in the groups. - if (ast[i].type == 'group') { - if (ast[i].tokens.length > 0) { - ast[i].tokens = postfixOperators(ast[i].tokens); - } - } else if (ast[i].type == 'operator') { - if (typeof ast[i].operands != 'undefined') { - ast[i].operands = postfixOperators(ast[i].operands); - } - } - if (ast[i].type == 'operator' && ast[i].subtype == 'postfix') { // The operand is on the left. - if (typeof ast[i - 1] == 'undefined') - throw new SyntaxError('Postfix operator requires one operand before it.'); - let op = new Operator(ast[i].subtype, ast[i].value, [ast[i - 1]]); - op.index = ast[i].index; - op.level = ast[i].level; - ast.splice(i - 1, 2, op); - // Removing 2 tokens, adding 1, skip 1 token. Don't reduce the counter. - } - } - return ast; -} - -/** - * @function prefixOperators - * @desc Recursively structures the prefix operators. - * @param {Token[]} ast The ast. - * @returns {Token[]} The ast with structured prefix operators. - * @private - */ -function prefixOperators(ast) { - for (let i = ast.length - 1; i >= 0; i--) { // Prefix operators are rtl associative, so loop backwards. - // Take care of the tokens in the groups. - if (ast[i].type == 'group') { - if (ast[i].tokens.length > 0) { - ast[i].tokens = prefixOperators(ast[i].tokens); - } - } else if (ast[i].type == 'operator') { - if (typeof ast[i].operands != 'undefined') { - ast[i].operands = prefixOperators(ast[i].operands); - } - } - if (ast[i].type == 'operator' && ast[i].subtype == 'prefix') { // The operand is on the right. - if (typeof ast[i + 1] == 'undefined') - throw new SyntaxError('Prefix operator requires one operand after it.'); - let op = new Operator(ast[i].subtype, ast[i].value, [ast[i + 1]]); - op.index = ast[i].index; - op.level = ast[i].level; - ast.splice(i, 2, op); - // Removing 2 tokens, adding 1, skip 1 token. Don't reduce the counter. - } - } - return ast; -} - -/** - * @function mathOperators - * @desc Recursively structures the math operators. - * @param {Token[]} ast The ast. - * @param {Token[]} level The level of math to do. (Order of operations) - * @returns {Token[]} The ast with structured math operators. - * @private - */ -function mathOperators(ast, level) { - if (level == 0) { // Level 0 operators: ** - for (let i = ast.length - 1; i >= 0; i--) { // Exponentiation is rtl associative, so loop backwards. - // Take care of the tokens in the groups. - if (ast[i].type == 'group') { - if (ast[i].tokens.length > 0) { - ast[i].tokens = mathOperators(ast[i].tokens, level); - } - } else if (ast[i].type == 'operator') { - if (typeof ast[i].operands != 'undefined') { - ast[i].operands = mathOperators(ast[i].operands, level); - } - } - if (ast[i].type == 'operator' && ast[i].value == '**') { - if (typeof ast[i - 1] == 'undefined' || typeof ast[i + 1] == 'undefined') - throw new SyntaxError('Dual operator requires two operands.'); - let op = new Operator('dual', ast[i].value, [ast[i - 1], ast[i + 1]]); - op.index = ast[i].index; - op.level = ast[i].level; - ast.splice(i - 1, 3, op); - i--; - } - } - } else { - for (let i = 0; i < ast.length; i++) { // All other math operators are ltr associative. - // Take care of the tokens in the groups. - if (ast[i].type == 'group') { - if (ast[i].tokens.length > 0) { - ast[i].tokens = mathOperators(ast[i].tokens, level); - } - } else if (ast[i].type == 'operator') { - if (typeof ast[i].operands != 'undefined') { - ast[i].operands = mathOperators(ast[i].operands, level); - } - } - if (level == 1) { - if (ast[i].type == 'operator' && (ast[i].value == '*' || ast[i].value == '/' || ast[i].value == '%')) { - if (typeof ast[i - 1] == 'undefined' || typeof ast[i + 1] == 'undefined') - throw new SyntaxError('Dual operator requires two operands.'); - let op = new Operator('dual', ast[i].value, [ast[i - 1], ast[i + 1]]); - op.index = ast[i].index; - op.level = ast[i].level; - ast.splice(i - 1, 3, op); - i--; - } - } else if (level == 2) { - if (ast[i].type == 'operator' && (ast[i].value == '+' || ast[i].value == '-')) { - if (typeof ast[i - 1] == 'undefined' || typeof ast[i + 1] == 'undefined') - throw new SyntaxError('Dual operator requires two operands.'); - let op = new Operator('dual', ast[i].value, [ast[i - 1], ast[i + 1]]); - op.index = ast[i].index; - op.level = ast[i].level; - ast.splice(i - 1, 3, op); - i--; - } - } - } - } - return ast; -} - -module.exports = { - parse, - util: { - addIndexes, - addLevels, - getDeepestLevel, - grouping, - } -}; - -// require('fs').writeFileSync('ast.json', JSON.stringify(parse(tokenizer.tokenize('let x = (5 + (6 * 2)) - 7;')), null, 2), () => {}); diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..bba55fb --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,117 @@ +use super::tokenizer::{Token, TokenKind}; + +pub fn parse(mut tokens: Vec) -> Program { + let mut stmts: Vec> = vec![]; + let mut current = 0; + loop { + if current >= tokens.len() { + break; + } + // print!("{:?}", tokens[current]); + if tokens[current].kind == TokenKind::Semicolon { + stmts.push(tokens.drain(..=current).collect::>()); + current = 0; + } else { + current += 1; + } + } + let mut statements = vec![]; + // for s in &stmts { + // for t in s { + // print!("{:?}", t.kind); + // } + // print!("\n"); + // } + for s in stmts { + statements.push(parse_statement(s)); + } + statements +} + +fn parse_statement(statement: Vec) -> Statement { + if statement.len() == 1 { + // Must just be a semicolon. + return Statement::Nop; + } + let parse_function_call = |tokens: &Vec| -> Option { + // Check for ( ... ); + if tokens[0].kind != TokenKind::Identifier { + return None; + } else if tokens[1].kind != TokenKind::LeftParen { + return None; + } else if tokens[tokens.len() - 2].kind != TokenKind::RightParen { + return None; + } else if tokens[tokens.len() - 1].kind != TokenKind::Semicolon { + return None; + } else { + let function_name = tokens[0].value.clone(); + let mut args = vec![]; + + let mut current = 2; + loop { + args.push(parse_expression(tokens, &mut current)); + if tokens[current].kind == TokenKind::Comma { + current += 1; + } + if tokens[current].kind == TokenKind::RightParen { + break; + } + } + + Some(Statement::FunctionCall { + name: function_name, + arguments: args, + }) + } + }; + // The only form of statement. + parse_function_call(&statement).expect("could not parse function call") +} +fn parse_expression(tokens: &Vec, current: &mut usize) -> Expression { + if tokens[*current].kind == TokenKind::StringLiteral { + let out = Expression::Literal(Literal::StringLiteral(tokens[*current].value.clone())); + *current += 1; + out + } else if tokens[*current].kind == TokenKind::IntLiteral { + let val = tokens[*current] + .value + .clone() + .parse::() + .expect("could not parse int literal"); + let out = Expression::Literal(Literal::IntLiteral(val)); + *current += 1; + out + } else if tokens[*current].kind == TokenKind::FloatLiteral { + let val = tokens[*current] + .value + .clone() + .parse::() + .expect("could not parse float literal"); + let out = Expression::Literal(Literal::FloatLiteral(val)); + *current += 1; + out + } else { + Expression::Null + } +} + +pub type Program = Vec; +#[derive(Debug, PartialEq)] +pub enum Statement { + FunctionCall { + name: String, + arguments: Vec, + }, + Nop, // Equivalent to a C nop statement. +} +#[derive(Debug, PartialEq)] +pub enum Expression { + Literal(Literal), + Null, +} +#[derive(Debug, PartialEq)] +pub enum Literal { + StringLiteral(String), + IntLiteral(i32), + FloatLiteral(f32), +} diff --git a/src/tokenizer.js b/src/tokenizer.js deleted file mode 100644 index dd581af..0000000 --- a/src/tokenizer.js +++ /dev/null @@ -1,346 +0,0 @@ -/** - * @module tokenizer - * @file Manages the tokenization phase of Pivot. - * @author Garen Tyler - * @requires module:types - */ -const Token = require('./types.js').Token; - -/** - * @function tokenize - * @desc Takes in raw code, and outputs an array of Tokens. - * @param {string} code The raw input code. - * @returns {Token[]} The code, split into tokens. - * @public - */ -function tokenize(code) { - // Split the string into an array of chars. - let chars = code.split(''); - - // Create buffers. - let letterBuffer = []; - let operatorBuffer = []; - let numberBuffer = []; - let stringBuffer = []; - - // Create the output Token[]. - let tokens = []; - - // Create an object to keep track of string data. - let stringData = { - inString: false, - stringType: null - }; - - // Escape chars and remove comments. - chars = combineEscapedChars(chars); - chars = removeComments(chars); - - - // Actually tokenize the chars. - for (let i = 0; i < chars.length; i++) { - let char = chars[i]; - if (stringData.inString) { // Tokenize differently in a string. - // If a string delimiter and the same as the inital delimiter. - if (determineCharType(char) == 'string delimiter' && char == stringData.stringType) { - stringData.inString = false; // Not in a string any more. - tokens.push(new Token('string', 'n/a', stringBuffer.join(''))); // Push the string. - stringBuffer = []; // Clear the string buffer. - } else stringBuffer.push(char); // Add to the string buffer. - } else { // Tokenize normally. - if (determineCharType(char) == 'string delimiter') { - stringData.inString = true; // In a string now. - stringData.stringType = char; - } else if (determineCharType(char) == 'letter') { - letterBuffer.push(char); // Add to the letter buffer. - // End the other buffers. - if (operatorBuffer.length > 0) { - let operator = operatorBuffer.join(''); - tokens.push(new Token('operator', operatorType(operator), operator)); - operatorBuffer = []; - } - if (numberBuffer.length > 0) { - let number = numberBuffer.join(''); - tokens.push(new Token('number', 'n/a', number)); - numberBuffer = []; - } - } else if (determineCharType(char) == 'operator') { - operatorBuffer.push(char); // Add to the operator buffer. - // End the other buffers. - if (letterBuffer.length > 0) { - let variable = letterBuffer.join(''); - tokens.push(new Token('name', 'variable', variable)); - letterBuffer = []; - } - if (numberBuffer.length > 0) { - let number = numberBuffer.join(''); - tokens.push(new Token('number', 'n/a', number)); - numberBuffer = []; - } - } else if (determineCharType(char) == 'digit') { - numberBuffer.push(char); // Add to the number buffer. - // End the other buffers. - if (letterBuffer.length > 0) { - let variable = letterBuffer.join(''); - tokens.push(new Token('name', 'variable', variable)); - letterBuffer = []; - } - if (operatorBuffer.length > 0) { - let operator = operatorBuffer.join(''); - tokens.push(new Token('operator', operatorType(operator), operator)); - operatorBuffer = []; - } - } else if (determineCharType(char) == 'whitespace') { - // End all buffers. - if (letterBuffer.length > 0) { - let variable = letterBuffer.join(''); - tokens.push(new Token('name', 'variable', variable)); - letterBuffer = []; - } - if (numberBuffer.length > 0) { - let number = numberBuffer.join(''); - tokens.push(new Token('number', 'n/a', number)); - numberBuffer = []; - } - if (operatorBuffer.length > 0) { - let operator = operatorBuffer.join(''); - tokens.push(new Token('operator', operatorType(operator), operator)); - operatorBuffer = []; - } - } else if (determineCharType(char) == 'delimiter') { - // End all buffers. - if (letterBuffer.length > 0) { - let variable = letterBuffer.join(''); - tokens.push(new Token('name', 'variable', variable)); - letterBuffer = []; - } - if (numberBuffer.length > 0) { - let number = numberBuffer.join(''); - tokens.push(new Token('number', 'n/a', number)); - numberBuffer = []; - } - if (operatorBuffer.length > 0) { - let operator = operatorBuffer.join(''); - tokens.push(new Token('operator', operatorType(operator), operator)); - operatorBuffer = []; - } - // Push the delimiter. - tokens.push(getDelimiterToken(char)); - } - } - } - - // Empty all the buffers. - if (letterBuffer.length > 0) { - let variable = letterBuffer.join(''); - tokens.push(new Token('name', 'variable', variable)); - letterBuffer = []; - } - if (numberBuffer.length > 0) { - let number = numberBuffer.join(''); - tokens.push(new Token('number', 'n/a', number)); - numberBuffer = []; - } - if (operatorBuffer.length > 0) { - let operator = operatorBuffer.join(''); - tokens.push(new Token('operator', operatorType(operator), operator)); - operatorBuffer = []; - } - - tokens = changeKeywords(tokens); - - return tokens; -} - -/** - * @function combineEscapedChars - * @desc Combines escaped chars into one char. - * @param {string[]} chars The chars. - * @returns {string[]} The chars with combined escaped chars. - * @private - */ -function combineEscapedChars(chars) { - // Check for characters to be escaped. - for (let i = 0; i < chars.length; i++) { - if (chars[i] == '\\') { - chars.splice(i, 2, chars[i] + chars[i + 1]); - i -= 2; - } - } - return chars; -} - -/** - * @function removeComments - * @desc Removes comments. - * @param {string[]} chars The chars. - * @returns {string[]} The chars without comments. - * @private - */ -function removeComments(chars) { - let inComment = false; // Keep track if in a comment. - for (let i = 0; i < chars.length; i++) { - if (chars[i] == '/') { - if (chars[i + 1] == '/') { - inComment = true; - } - } - if (chars[i] == '\n') { - inComment = false; - chars.splice(i, 1); // Remove the newline at the end of the comment. - i--; - } - if (inComment) { - chars.splice(i, 1); // Remove the char in the comment. - i--; - } - } - return chars; -} - -/** - * @function changeKeywords - * @desc Changes tokens with subtype variable to subtype keyword - * @param {Token[]} tokens The tokens - * @returns {Token[]} The tokens with keywords. - * @private - */ -function changeKeywords(tokens) { - return tokens.map(t => { - if (t.subtype == 'variable' && determineType(t.value) == 'keyword') { - t.subtype = 'keyword'; - } - return t; - }); -} - -/** - * @function getDelimiterToken - * @desc Turns a delimiter char into a token. - * @param {string} delimiter The delimiter char. - * @returns {Token} The delimiter token. - * @private - */ -function getDelimiterToken(delimiter) { - if (/\(|\)/.test(delimiter)) - return new Token('delimiter', delimiter == '(' ? 'left' : 'right', 'parenthesis'); - else if (/\[|\]/.test(delimiter)) - return new Token('delimiter', delimiter == '[' ? 'left' : 'right', 'bracket'); - else if (/\{|\}/.test(delimiter)) - return new Token('delimiter', delimiter == '{' ? 'left' : 'right', 'brace'); - else throw new Error('Expected delimiter but got ' + delimiter); -} - -/** - * @function operatorType - * @desc Turns a delimiter char into a token. - * @param {string} delimiter The delimiter char. - * @returns {Token} The delimiter token. - * @private - */ -function operatorType(op) { - switch (op) { - case '.': - case '+': - case '-': - case '*': - case '/': - case '**': - case '%': - case '<<': - case '>>': - case '<': - case '<=': - case '>': - case '>=': - case '==': - case '!=': - case '&': - case '|': - case '^': - case '&&': - case '||': - case '^^': - case '=': - case '+=': - case '-=': - case '*=': - case '/=': - case '**=': - case '%=': - case '<<=': - case '>>=': - case '&=': - case '|=': - case '^=': - return 'dual'; - break; - case '++': - case '--': - return 'postfix'; - break; - case '!': - case '~': - return 'prefix'; - break; - case ',': - case ';': - return 'none'; - break; - default: - throw new TypeError('Unexpected operator ' + op); - break; - } -} - -/** - * @function determineCharType - * @desc Detects the type of characters. - * @param {string} char The input character(s). - * @returns {string} The type of char. - * @private - */ -function determineCharType(char) { - if (/[A-Za-z]/.test(char)) - return 'letter'; - else if (/\.|\+\+|--|!|~|\+|-|\*\*|\*|\/%|<<|>>|<|<=|>|>=|==|!=|&|\^|\||&&|\|\||=|\+=|-=|\*\*=|\*=|\/=|%=|<<=|>>=|&=|\^=|\|=|,|;/.test(char)) - // All the operators in Pivot. - return 'operator'; - else if (/\(|\)|\[|\]|\{|\}/.test(char)) - return 'delimiter'; - else if (/'|"|`/.test(char)) - return 'string delimiter'; - else if (/\d/.test(char)) - return 'digit'; - else if (/\\./.test(char)) - return 'escaped char'; - else if (/\s/.test(char)) - return 'whitespace'; - else throw new TypeError('Unexpected char ' + char); -}; - -/** - * @function determineType - * @desc Detects the type of a string. - * @param {string} str The input string. - * @returns {string} The type of string. - * @private - */ -function determineType(str) { - if (/let|const/.test(str)) // TODO: Add more keywords. - return 'keyword'; - else return 'unknown'; -}; - -module.exports = { - tokenize, - util: { - combineEscapedChars, - removeComments, - changeKeywords, - getDelimiterToken, - operatorType, - determineCharType, - determineType - } -}; diff --git a/src/tokenizer.rs b/src/tokenizer.rs new file mode 100644 index 0000000..69740a3 --- /dev/null +++ b/src/tokenizer.rs @@ -0,0 +1,141 @@ +#[derive(Debug, PartialEq)] +pub enum TokenKind { + Identifier, + StringLiteral, + IntLiteral, + FloatLiteral, + LeftParen, + RightParen, + Semicolon, + Comma, +} +#[derive(Debug, PartialEq)] +pub struct Token { + pub kind: TokenKind, + pub value: String, + pub index: usize, +} + +pub fn tokenize(source: &str) -> Vec { + let chars: Vec = source.chars().collect(); + let mut tokens = Vec::new(); + let mut current: usize = 0; + while let Some(c) = chars.get(current) { + if c.is_alphabetic() { + read_identifier(&chars, &mut current, &mut tokens); + } else if c.is_digit(10) { + read_number(&chars, &mut current, &mut tokens); + } else { + match c { + '\'' | '"' => read_string(&chars, &mut current, &mut tokens, c), + '(' => { + tokens.push(Token { + kind: TokenKind::LeftParen, + value: "(".to_owned(), + index: current, + }); + current += 1; + } + ')' => { + tokens.push(Token { + kind: TokenKind::RightParen, + value: ")".to_owned(), + index: current, + }); + current += 1; + } + ';' => { + tokens.push(Token { + kind: TokenKind::Semicolon, + value: ";".to_owned(), + index: current, + }); + current += 1; + } + ',' => { + tokens.push(Token { + kind: TokenKind::Comma, + value: ",".to_owned(), + index: current, + }); + current += 1; + } + _ => current += 1, // Just skip it if it's incorrect. + } + } + } + tokens +} +fn read_identifier(chars: &Vec, current: &mut usize, tokens: &mut Vec) { + let original_current = *current; + let mut identifier = String::new(); + while let Some(c) = chars.get(*current) { + if c.is_alphabetic() { + identifier.push(*c); + *current += 1; + } else { + break; + } + } + tokens.push(Token { + kind: TokenKind::Identifier, + value: identifier, + index: original_current, + }); +} +fn read_string(chars: &Vec, current: &mut usize, tokens: &mut Vec, delimiter: &char) { + let original_current = *current; + let mut string = String::new(); + *current += 1; // Move forward from the first delimiter. + while let Some(c) = chars.get(*current) { + if c == delimiter { + *current += 1; + break; + } else { + string.push(*c); + *current += 1; + } + } + tokens.push(Token { + kind: TokenKind::StringLiteral, + value: string, + index: original_current, + }); +} +fn read_number(chars: &Vec, current: &mut usize, tokens: &mut Vec) { + let original_current = *current; + + let mut kind = TokenKind::IntLiteral; + + let mut num = String::new(); + while let Some(c) = chars.get(*current) { + if c.is_digit(10) { + num.push(*c); + *current += 1; + } else if let Some(n) = chars.get(*current + 1) { + if *c == 'f' { + kind = TokenKind::FloatLiteral; + *current += 1; + break; + } else if *c == 'i' { + kind = TokenKind::IntLiteral; + *current += 1; + break; + } else if *c == '.' && n.is_digit(10) { + num.push(*c); + num.push(*n); + kind = TokenKind::FloatLiteral; + *current += 2; + } else { + break; + } + } else { + break; + } + } + tokens.push(Token { + kind: kind, + value: num, + index: original_current, + }); +} diff --git a/src/types.js b/src/types.js deleted file mode 100644 index 1bbc0cc..0000000 --- a/src/types.js +++ /dev/null @@ -1,42 +0,0 @@ -/** - * @module types - * @file Provides a consistent source of types for the compiler. - * @author Garen Tyler - */ - -/** - * @class Token - * @classdesc Stores the type of token, subtype, and literal char value. - */ -function Token(type, subtype, value) { - this.type = type; - this.subtype = subtype; - this.value = value; -} - -/** - * @class Group - * @classdesc Stores the type of group, and the tokens in the group. - */ -function Group(type, tokens) { - this.type = 'group' - this.subtype = type; - this.tokens = tokens; -} - -/** - * @class Operator - * @classdesc Stores the type of operator, and tokens for an operator. - */ -function Operator(subtype, value, operands) { - this.type = 'operator'; - this.subtype = subtype; - this.value = value; - this.operands = operands; -} - -module.exports = { - Token, - Group, - Operator -}; diff --git a/test.pvt b/test.pvt new file mode 100644 index 0000000..b5b2946 --- /dev/null +++ b/test.pvt @@ -0,0 +1,3 @@ +log(5i); +log(3f); +log(2.5); diff --git a/test/test.js b/test/test.js deleted file mode 100644 index 3b53bc8..0000000 --- a/test/test.js +++ /dev/null @@ -1,428 +0,0 @@ -const assert = require('assert'); - -const types = require('../src/types.js'); -const tokenizer = require('../src/tokenizer.js'); -const parser = require('../src/parser.js'); - -console.log(parser.parse(tokenizer.tokenize('let x = (5 + 3);'))); - -describe('types.js', () => { - it('Has a Token child.', () => { - assert.equal(types.hasOwnProperty('Token'), true); - }); - it('Has a Group child.', () => { - assert.equal(types.hasOwnProperty('Group'), true); - }); - it('Has a Operator child.', () => { - assert.equal(types.hasOwnProperty('Operator'), true); - }); - - describe('Token', () => { - it('Works as a constructor', () => { - try { - let token = new types.Token('a', 'b', 'c'); - } catch (err) { - throw err; - } - }); - it('Has values \'type\', \'subtype\', and \'value\'', () => { - try { - let token = new types.Token('a', 'b', 'c'); - if (!token.hasOwnProperty('type') || !token.hasOwnProperty('subtype') || !token.hasOwnProperty('value')) - throw new Error('Token is missing \'type\', \'subtype\', or \'value\' properties.'); - if (token.type != 'a' || token.subtype != 'b' || token.value != 'c') - throw new Error('Token incorrectly set \'type\', \'subtype\', or \'value\' properties.'); - } catch (err) { - throw err; - } - }); - }); - describe('Group', () => { - it('Works as a constructor', () => { - try { - let group = new types.Group('a', 'b'); - } catch (err) { - throw err; - } - }); - it('Has values \'type\', \'subtype\', and \'tokens\'', () => { - try { - let group = new types.Group('a', 'b'); - if (!group.hasOwnProperty('type') || !group.hasOwnProperty('subtype') || !group.hasOwnProperty('tokens')) - throw new Error('Group is missing \'type\', \'subtype\', or \'tokens\' properties.'); - if (group.type != 'group' || group.subtype != 'a' || group.tokens != 'b') - throw new Error('Group incorrectly set \'type\', \'subtype\', or \'tokens\' properties.'); - } catch (err) { - throw err; - } - }); - }); - // describe('Operator', () => { - // it('Works as a constructor', () => { - // try { - // let op = new types.Operator('dual', '+', ['3','4']); - // } catch (err) { - // throw err; - // } - // }); - // it('Has values \'type\', \'subtype\', \'value\', and \'operands\'', () => { - // try { - // let op = new types.Operator('dual', '+', ['3','4']); - // if (!op.hasOwnProperty('type') || !op.hasOwnProperty('subtype') || !op.hasOwnProperty('value') || !op.hasOwnProperty('operands')) - // throw new Error('Operator is missing \'type\', \'subtype\', \'value\', or \'operands\' properties.'); - // if (op.type != 'operator' || op.subtype != 'dual' || op.value != '+' || op.operands[0] != '3') - // throw new Error('Operator incorrectly set \'type\', \'subtype\', \'value\', or \'operands\' properties.'); - // } catch (err) { - // throw err; - // } - // }); - // }); -}); -describe('tokenizer.js', () => { - it('combineEscapedChars works', () => { - assert.equal(tokenizer.util.combineEscapedChars(`let x = 'test\\nnewline';`.split('')).join(''), `let x = 'test\\nnewline';`); - }); - it('removeComments works', () => { - assert.equal(tokenizer.util.removeComments(`// Comment\nlet i = 0;`.split('')).join(''), `let i = 0;`); - }); - it('changeKeywords works', () => { - let tokens = tokenizer.util.changeKeywords([{ - type: 'name', - subtype: 'variable', - value: 'let' - }, { - type: 'name', - subtype: 'variable', - value: 'x' - }]); - let correct = [{ - type: 'name', - subtype: 'keyword', - value: 'let' - }, { - type: 'name', - subtype: 'variable', - value: 'x' - }]; - let isCorrect = true; - tokens.forEach((t, i) => { - if (t.type != correct[i].type) - throw new Error('Changed type: Expected \'' + correct[i].type + '\' but got \'' + t.type + '\'') - else if (t.subtype != correct[i].subtype) - throw new Error('Incorrectly changed subtype: Expected \'' + correct[i].subtype + '\' but got \'' + t.subtype + '\'') - else if (t.value != correct[i].value) - throw new Error('Changed value: Expected \'' + correct[i].value + '\' but got \'' + t.value + '\'') - }); - }); - it('getDelimiterToken works', () => { - let token = tokenizer.util.getDelimiterToken(')'); - if (token.type != 'delimiter') - throw new Error('Incorrect type: Expected \'delimiter\' but got \'' + token.type + '\'') - else if (token.subtype != 'right') - throw new Error('Incorrect subtype: Expected \'right\' but got \'' + token.subtype + '\'') - else if (token.value != 'parenthesis') - throw new Error('Incorrect value: Expected \'parenthesis\' but got \'' + token.value + '\'') - }); - it('operatorType works', () => { - assert.equal(tokenizer.util.operatorType('++'), 'postfix'); - assert.equal(tokenizer.util.operatorType(';'), 'none'); - assert.equal(tokenizer.util.operatorType('+'), 'dual'); - }); - it('determineCharType works', () => { - assert.equal(tokenizer.util.determineCharType('+'), 'operator'); - assert.equal(tokenizer.util.determineCharType('"'), 'string delimiter'); - assert.equal(tokenizer.util.determineCharType('4'), 'digit'); - }); - it('determineType works', () => { - assert.equal(tokenizer.util.determineType('let'), 'keyword'); - assert.equal(tokenizer.util.determineType('dog'), 'unknown'); - }); -}); -describe('parser.js', () => { - it('addIndexes works', () => { - let tokens = parser.util.addIndexes([{ - type: 'name', - subtype: 'keyword', - value: 'let' - }, { - type: 'name', - subtype: 'variable', - value: 'x' - }]); - let correct = [{ - type: 'name', - subtype: 'keyword', - value: 'let', - index: 0 - }, { - type: 'name', - subtype: 'variable', - value: 'x', - index: 1 - }]; - let isCorrect = true; - tokens.forEach((t, i) => { - if (t.type != correct[i].type) - throw new Error('Changed type: Expected \'' + correct[i].type + '\' but got ' + t.type) - else if (t.subtype != correct[i].subtype) - throw new Error('Changed subtype: Expected \'' + correct[i].subtype + '\' but got ' + t.subtype) - else if (t.value != correct[i].value) - throw new Error('Changed value: Expected \'' + correct[i].value + '\' but got ' + t.value) - else if (t.index != correct[i].index) - throw new Error('Incorrect index: Expected \'' + correct[i].index + '\' but got ' + t.index) - }); - }); - it('addLevels works', () => { - let tokens = parser.util.addLevels([{ - type: 'name', - subtype: 'keyword', - value: 'let', - index: 0 - }, { - type: 'name', - subtype: 'variable', - value: 'x', - index: 1 - }, { - type: 'operator', - subtype: 'dual', - value: '=', - index: 2 - }, { - type: 'delimiter', - subtype: 'left', - value: 'parenthesis', - index: 3 - }, { - type: 'number', - subtype: 'n/a', - value: '5', - index: 4 - }, { - type: 'delimiter', - subtype: 'right', - value: 'parenthesis', - index: 5 - }]); - let correct = [{ - type: 'name', - subtype: 'keyword', - value: 'let', - index: 0, - level: 0 - }, { - type: 'name', - subtype: 'variable', - value: 'x', - index: 1, - level: 0 - }, { - type: 'operator', - subtype: 'dual', - value: '=', - index: 2, - level: 0 - }, { - type: 'delimiter', - subtype: 'left', - value: 'parenthesis', - index: 3, - level: 1 - }, { - type: 'number', - subtype: 'n/a', - value: '5', - index: 4, - level: 1 - }, { - type: 'delimiter', - subtype: 'right', - value: 'parenthesis', - index: 5, - level: 1 - }]; - let isCorrect = true; - tokens.forEach((t, i) => { - if (t.type != correct[i].type) - throw new Error('Changed type: Expected \'' + correct[i].type + '\' but got ' + t.type) - else if (t.subtype != correct[i].subtype) - throw new Error('Changed subtype: Expected \'' + correct[i].subtype + '\' but got \'' + t.subtype + '\'') - else if (t.value != correct[i].value) - throw new Error('Changed value: Expected \'' + correct[i].value + '\' but got \'' + t.value + '\'') - else if (t.index != correct[i].index) - throw new Error('Incorrect index: Expected \'' + correct[i].index + '\' but got \'' + t.index + '\'') - else if (t.level != correct[i].level) - throw new Error('Incorrect level: Expected \'' + correct[i].level + '\' but got \'' + t.level + '\'') - }); - }); - it('getDeepestLevel works', () => { - let deepestLevel = parser.util.getDeepestLevel([{ - type: 'name', - subtype: 'keyword', - value: 'let', - index: 0, - level: 0 - }, { - type: 'name', - subtype: 'variable', - value: 'x', - index: 1, - level: 0 - - }, { - type: 'operator', - subtype: 'dual', - value: '=', - index: 2, - level: 0 - }, { - type: 'delimiter', - subtype: 'left', - value: 'parenthesis', - index: 3, - level: 1 - }, { - type: 'number', - subtype: 'n/a', - value: '5', - index: 4, - level: 1 - }, { - type: 'operator', - subtype: 'dual', - value: '+', - index: 5, - level: 1 - }, { - type: 'delimiter', - subtype: 'left', - value: 'parenthesis', - index: 6, - level: 2 - }, { - type: 'number', - subtype: 'n/a', - value: '6', - index: 7, - level: 2 - }, { - type: 'operator', - subtype: 'dual', - value: '*', - index: 8, - level: 2 - }, { - type: 'number', - subtype: 'n/a', - value: '2', - index: 9, - level: 2 - }, { - type: 'delimiter', - subtype: 'right', - value: 'parenthesis', - index: 10, - level: 2 - }, { - type: 'delimiter', - subtype: 'right', - value: 'parenthesis', - index: 11, - level: 1 - }, { - type: 'operator', - subtype: 'none', - value: ';', - index: 12, - level: 0 - }]); - if (deepestLevel != 2) - throw new Error('Incorrect deepestLevel. Expected \'2\' but got \'' + deepestLevel + '\''); - }); - it('combineGroups works', () => { - let ast = parser.util.combineGroups([{ - type: 'name', - subtype: 'keyword', - value: 'let', - index: 0, - level: 0 - }, { - type: 'name', - subtype: 'variable', - value: 'x', - index: 1, - level: 0 - - }, { - type: 'operator', - subtype: 'dual', - value: '=', - index: 2, - level: 0 - }, { - type: 'delimiter', - subtype: 'left', - value: 'parenthesis', - index: 3, - level: 1 - }, { - type: 'number', - subtype: 'n/a', - value: '5', - index: 4, - level: 1 - }, { - type: 'operator', - subtype: 'dual', - value: '+', - index: 5, - level: 1 - }, { - type: 'delimiter', - subtype: 'left', - value: 'parenthesis', - index: 6, - level: 2 - }, { - type: 'number', - subtype: 'n/a', - value: '6', - index: 7, - level: 2 - }, { - type: 'operator', - subtype: 'dual', - value: '*', - index: 8, - level: 2 - }, { - type: 'number', - subtype: 'n/a', - value: '2', - index: 9, - level: 2 - }, { - type: 'delimiter', - subtype: 'right', - value: 'parenthesis', - index: 10, - level: 2 - }, { - type: 'delimiter', - subtype: 'right', - value: 'parenthesis', - index: 11, - level: 1 - }, { - type: 'operator', - subtype: 'none', - value: ';', - index: 12, - level: 0 - }]); - if (ast[3].type != 'group') - throw new Error('Incorrectly combined group.'); - if (ast[3].tokens[3].type != 'group') - throw new Error('Incorrectly combined group.'); - }); -});