diff --git a/Cargo.lock b/Cargo.lock index 052e15f..fe72258 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15,6 +15,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" +[[package]] +name = "base64" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3441f0f7b02788e948e47f457ca01f1d7e6d92c693bc132c22b087d3141c03ff" + [[package]] name = "bitflags" version = "1.2.1" @@ -69,6 +75,26 @@ version = "0.1.0" dependencies = [ "nom", "regex", + "ron", + "serde", +] + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +dependencies = [ + "proc-macro2", ] [[package]] @@ -89,18 +115,60 @@ version = "0.6.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8cab7a364d15cde1e505267766a2d3c4e22a843e1a601f0fa7564c0f82ced11c" +[[package]] +name = "ron" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8a58080b7bb83b2ea28c3b7a9a994fd5e310330b7c8ca5258d99b98128ecfe4" +dependencies = [ + "base64", + "bitflags", + "serde", +] + [[package]] name = "ryu" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "serde" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "static_assertions" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "syn" +version = "1.0.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c1e438504729046a5cfae47f97c30d6d083c7d91d94603efdae3477fc070d4c" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "thread_local" version = "1.0.1" @@ -110,6 +178,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + [[package]] name = "version_check" version = "0.9.2" diff --git a/Cargo.toml b/Cargo.toml index 5f855cb..4714f65 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,3 +9,5 @@ edition = "2018" [dependencies] regex = "1.4.1" nom = "5.1.2" +ron = "0.6.2" +serde = "1.0.117" diff --git a/src/ast.rs b/src/ast.rs index d0a0bfc..8578d61 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,4 +1,5 @@ -#[derive(Clone, Debug, PartialEq)] +use serde::{Deserialize, Serialize}; +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub enum AstNodeKind { // Primitives Integer, @@ -23,20 +24,22 @@ pub enum AstNodeKind { FunctionDefinition, VariableDefinition, Assign, + // Blank node + Null, } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct AstNode { pub kind: AstNodeKind, pub value: String, - pub subnodes: Vec + pub subnodes: Vec, } impl AstNode { pub fn new(kind: AstNodeKind, value: String, subnodes: Vec) -> AstNode { AstNode { kind, value, - subnodes + subnodes, } } pub fn emit(&self, f: &mut dyn std::fmt::Write) -> std::fmt::Result { @@ -51,11 +54,11 @@ impl AstNode { Program => { write!(f, "(module\n")?; for node in &self.subnodes { - node.emit(f); + node.emit(f)?; } write!(f, ")") } - _ => Ok(()) + _ => Ok(()), } } @@ -87,42 +90,42 @@ impl AstNode { AstNode { kind: AstNodeKind::NotEqual, value: "not_equal".into(), - subnodes: vec![left, right] + subnodes: vec![left, right], } } pub fn equal(left: AstNode, right: AstNode) -> AstNode { AstNode { kind: AstNodeKind::Equal, value: "equal".into(), - subnodes: vec![left, right] + subnodes: vec![left, right], } } pub fn add(left: AstNode, right: AstNode) -> AstNode { AstNode { kind: AstNodeKind::Add, value: "add".into(), - subnodes: vec![left, right] + subnodes: vec![left, right], } } pub fn subtract(left: AstNode, right: AstNode) -> AstNode { AstNode { kind: AstNodeKind::Subtract, value: "subtract".into(), - subnodes: vec![left, right] + subnodes: vec![left, right], } } pub fn multiply(left: AstNode, right: AstNode) -> AstNode { AstNode { kind: AstNodeKind::Multiply, value: "multiply".into(), - subnodes: vec![left, right] + subnodes: vec![left, right], } } pub fn divide(left: AstNode, right: AstNode) -> AstNode { AstNode { kind: AstNodeKind::Divide, value: "divide".into(), - subnodes: vec![left, right] + subnodes: vec![left, right], } } // Control flow @@ -130,14 +133,18 @@ impl AstNode { AstNode { kind: AstNodeKind::Block, value: "block".into(), - subnodes: statements + subnodes: statements, } } - pub fn if_statement(conditional: AstNode, consequence: AstNode, alternative: AstNode) -> AstNode { + pub fn if_statement( + conditional: AstNode, + consequence: AstNode, + alternative: AstNode, + ) -> AstNode { AstNode { kind: AstNodeKind::IfStatement, value: "if_statement".into(), - subnodes: vec![conditional, consequence, alternative] + subnodes: vec![conditional, consequence, alternative], } } pub fn while_loop(conditional: AstNode, body: AstNode) -> AstNode { @@ -151,7 +158,7 @@ impl AstNode { AstNode { kind: AstNodeKind::Program, value: "program".into(), - subnodes: statements + subnodes: statements, } } // Functions and variables @@ -194,4 +201,51 @@ impl AstNode { subnodes: vec![value], } } + // Blank node + pub fn null() -> AstNode { + AstNode { + kind: AstNodeKind::Null, + value: "".into(), + subnodes: vec![], + } + } + + // Other + pub fn pretty_print(&self, f: &mut std::fmt::Formatter<'_>, indent: usize) -> std::fmt::Result { + for _ in 0..indent { + write!(f, " ")?; + } + write!(f, "{{\n")?; + for _ in 0..indent + 2 { + write!(f, " ")?; + } + write!(f, "kind: {:?}\n", self.kind)?; + for _ in 0..indent + 2 { + write!(f, " ")?; + } + write!(f, "value: {:?}\n", self.value)?; + if self.subnodes.len() > 0 { + for _ in 0..indent + 2 { + write!(f, " ")?; + } + write!(f, "subnodes: [\n")?; + for subnode in &self.subnodes { + subnode.pretty_print(f, indent + 4)?; + write!(f, ",\n")?; + } + for _ in 0..indent + 2 { + write!(f, " ")?; + } + write!(f, "]\n")?; + } + for _ in 0..indent { + write!(f, " ")?; + } + write!(f, "}}") + } +} +impl std::fmt::Display for AstNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.pretty_print(f, 0) + } } diff --git a/src/ast_old.rs b/src/ast_old.rs deleted file mode 100644 index c1e9472..0000000 --- a/src/ast_old.rs +++ /dev/null @@ -1,235 +0,0 @@ -use std::fmt::Write; - -#[derive(Debug, PartialEq)] -pub struct AstNode { - pub value: Option, - pub kind: String, - pub subtokens: Option>, -} -impl std::fmt::Display for AstNode { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match &self.kind[..] { - "number" => write!(f, "{}", self.value.as_ref().expect("number had blank value")), - "identifier" => write!(f, "{}", self.value.as_ref().expect("number had blank value")), - "not" => write!(f, "!({})", self.subtokens.as_ref().expect("not had blank subtokens")[0]), - "equal" => write!(f, "({} == ({}))", self.subtokens.as_ref().expect("equal had blank subtokens")[0], self.subtokens.as_ref().expect("equal had blank subtokens")[1]), - "not_equal" => write!(f, "({} != {})", self.subtokens.as_ref().expect("not_equal had blank subtokens")[0], self.subtokens.as_ref().expect("not_equal had blank subtokens")[1]), - "add" => write!(f, "({} + {})", self.subtokens.as_ref().expect("add had blank subtokens")[0], self.subtokens.as_ref().expect("add had blank subtokens")[1]), - "subtract" => write!(f, "({} - {})", self.subtokens.as_ref().expect("subtract had blank subtokens")[0], self.subtokens.as_ref().expect("subtract had blank subtokens")[1]), - "multiply" => write!(f, "({} * {})", self.subtokens.as_ref().expect("multiply had blank subtokens")[0], self.subtokens.as_ref().expect("multiply had blank subtokens")[1]), - "divide" => write!(f, "({} / {})", self.subtokens.as_ref().expect("divide had blank subtokens")[0], self.subtokens.as_ref().expect("divide had blank subtokens")[1]), - "call" => { - write!(f, "({}(", self.value.as_ref().expect("call had blank value"))?; - let args = self.subtokens.as_ref().expect("call had blank subtokens"); - if args.len() > 0 { - write!(f, "{}", args[0])?; - if args.len() > 1 { - for i in 1..args.len() { - write!(f, ", {}", args[i])?; - } - } - } - write!(f, "))") - }, - "return" => write!(f, "return {}", self.subtokens.as_ref().expect("return had blank subtokens")[0]), - "block" => { - write!(f, "{{\n")?; - let stmts = self.subtokens.as_ref().expect("block had blank subtokens"); - if stmts.len() > 0 { - write!(f, "{};", stmts[0])?; - if stmts.len() > 1 { - for i in 1..stmts.len() { - write!(f, "\n{};", stmts[i])?; - } - } - } - write!(f, "\n}}") - } - "if" => { - let parts = self.subtokens.as_ref().expect("if had blank subtokens"); - write!(f, "(if ({}) {{{}}} else {{{}}})", parts[0], parts[1], parts[2]) - } - "function" => { - let parts = self.subtokens.as_ref().expect("function had blank subtokens"); - write!(f, "function {}(", self.value.as_ref().expect("function had blank value"))?; - let params = &parts[1..]; - if params.len() > 0 { - write!(f, "{}", params[0])?; - if params.len() > 1 { - for i in 1..params.len() { - write!(f, ", {}", params[i])?; - } - } - } - write!(f, ") {}", parts[0]) - } - "variable" => write!(f, "var {} = {}", self.value.as_ref().expect("var had blank value"), self.subtokens.as_ref().expect("var had blank subtokens")[0]), - "assign" => write!(f, "{} = {}", self.value.as_ref().expect("assign had blank value"), self.subtokens.as_ref().expect("assign had blank subtokens")[0]), - "while" => { - let parts = self.subtokens.as_ref().expect("while had blank subtokens"); - write!(f, "while ({})\n{}\n", parts[0], parts[1]) - } - "program" => { - write!(f, "{{\n")?; - let stmts = self.subtokens.as_ref().expect("program had blank subtokens"); - if stmts.len() > 0 { - write!(f, "{};", stmts[0])?; - if stmts.len() > 1 { - for i in 1..stmts.len() { - write!(f, "\n{};", stmts[i])?; - } - } - } - write!(f, "\n}}") - } - _ => write!(f, "(unknown node type {})", self.kind), - } - } -} -impl AstNode { - pub fn emit(&self, f: &mut dyn Write) -> Result<(), std::fmt::Error> { - match &self.kind[..] { - "number" => write!(f, "i32.const {}\n", self.value.as_ref().expect("number had blank value")), - "add" => { - let subtokens = self.subtokens.as_ref().expect("add had blank subtokens"); - subtokens[0].emit(f)?; - subtokens[1].emit(f)?; - write!(f, "i32.add\n") - }, - _ => Ok(()) - } - } - - pub fn number(num: i32) -> AstNode { - AstNode { - value: Some(num.to_string()), - kind: "number".into(), - subtokens: None, - } - } - pub fn identifier>(id: T) -> AstNode { - AstNode { - value: Some(id.into()), - kind: "identifier".into(), - subtokens: None, - } - } - pub fn not(operand: AstNode) -> AstNode { - AstNode { - value: None, - kind: "not".into(), - subtokens: Some(vec![operand]), - } - } - pub fn equal(left: AstNode, right: AstNode) -> AstNode { - AstNode { - value: None, - kind: "equal".into(), - subtokens: Some(vec![left, right]), - } - } - pub fn not_equal(left: AstNode, right: AstNode) -> AstNode { - AstNode { - value: None, - kind: "not_equal".into(), - subtokens: Some(vec![left, right]), - } - } - pub fn add(left: AstNode, right: AstNode) -> AstNode { - AstNode { - value: None, - kind: "add".into(), - subtokens: Some(vec![left, right]), - } - } - pub fn subtract(left: AstNode, right: AstNode) -> AstNode { - AstNode { - value: None, - kind: "subtract".into(), - subtokens: Some(vec![left, right]), - } - } - pub fn multiply(left: AstNode, right: AstNode) -> AstNode { - AstNode { - value: None, - kind: "multiply".into(), - subtokens: Some(vec![left, right]), - } - } - pub fn divide(left: AstNode, right: AstNode) -> AstNode { - AstNode { - value: None, - kind: "divide".into(), - subtokens: Some(vec![left, right]), - } - } - pub fn call>(callee: T, args: Vec) -> AstNode { - AstNode { - value: Some(callee.into()), - kind: "call".into(), - subtokens: Some(args), - } - } - pub fn r#return(operand: AstNode) -> AstNode { - AstNode { - value: None, - kind: "return".into(), - subtokens: Some(vec![operand]), - } - } - pub fn block(statements: Vec) -> AstNode { - AstNode { - value: None, - kind: "block".into(), - subtokens: Some(statements), - } - } - pub fn r#if(conditional: AstNode, consequence: AstNode, alternative: AstNode) -> AstNode { - AstNode { - value: None, - kind: "if".into(), - subtokens: Some(vec![conditional, consequence, alternative]), - } - } - pub fn function>(name: T, parameters: Vec<(T, T)>, body: AstNode) -> AstNode { - // Turn the parameter strings into ids. - let mut params = vec![]; - params.push(body); // First one will always be the body. - for p in parameters { - params.push(AstNode::identifier(p)); - } - AstNode { - value: Some(name.into()), - kind: "function".into(), - subtokens: Some(params), - } - } - pub fn variable>(name: T, value: AstNode) -> AstNode { - AstNode { - value: Some(name.into()), - kind: "variable".into(), - subtokens: Some(vec![value]), - } - } - pub fn assign>(name: T, value: AstNode) -> AstNode { - AstNode { - value: Some(name.into()), - kind: "assign".into(), - subtokens: Some(vec![value]), - } - } - pub fn r#while(conditional: AstNode, body: AstNode) -> AstNode { - AstNode { - value: None, - kind: "while".into(), - subtokens: Some(vec![conditional, body]), - } - } - pub fn program(statements: Vec) -> AstNode { - AstNode { - value: None, - kind: "program".into(), - subtokens: Some(statements), - } - } -} diff --git a/src/lib.rs b/src/lib.rs index 8858c9c..9c06b34 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,7 @@ -#[macro_use] -extern crate nom; +#![allow(unused_variables)] +#![allow(non_snake_case)] +#![allow(dead_code)] + extern crate regex; pub mod ast; diff --git a/src/main.rs b/src/main.rs index aba00b9..b6f26e2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,61 +1,4 @@ -use pivot::ast::AstNode; -use regex::Regex; - fn main() { - pivot::parse::parse(); - // let src = AstNode::program(vec![ - // AstNode::add( - // AstNode::integer(2), - // AstNode::integer(3), - // ) - // ]); - // println!("{:?}", src); - // let mut out = String::new(); - // src.emit(&mut out); - // println!("{}", out); - // test(); + let ast = pivot::parse::parse(r"log(2)"); + println!("{}", ast); } - -// fn test() { -// use pivot::ast::AstNode; -// -// let src = r#" -// function factorial(n) { -// var result = 1; -// while (n != 1) { -// result = result * n; -// n = n - 1; -// } -// return result; -// } -// "#; -// -// let actual_ast = pivot::parse::parse(); -// let expected_ast = AstNode::function("factorial", vec!["n"], -// AstNode::block(vec![ -// AstNode::variable("result", AstNode::number(1)), -// AstNode::r#while( -// AstNode::not_equal( -// AstNode::identifier("n"), -// AstNode::number(1) -// ), -// AstNode::block(vec![ -// AstNode::assign("result", -// AstNode::multiply( -// AstNode::identifier("result"), -// AstNode::identifier("n") -// ) -// ), -// AstNode::assign("n", -// AstNode::subtract( -// AstNode::identifier("n"), -// AstNode::number(1) -// ) -// ) -// ]), -// ), -// AstNode::r#return(AstNode::identifier("result")) -// ]) -// ); -// println!("{}", expected_ast); -// } diff --git a/src/parse.rs b/src/parse.rs deleted file mode 100644 index 6215219..0000000 --- a/src/parse.rs +++ /dev/null @@ -1,371 +0,0 @@ -use crate::ast::AstNode; -use std::ops::Range; -use regex::Regex; - -pub fn parse() -> AstNode { - let src = "420"; - let mut n = String::new(); - - println!("src: {:?}", src); - println!("n: {:?}", n); - { - let mut num = Parser::regex(r"\d+(\.\d+)?").bind(&mut n); - - let mut full = num; - println!("full: {}", full); - println!("full.parse: {:?}", full.parse(src)); - println!("full: {}", full); - } - println!("src: {:?}", src); - println!("n: {:?}", n); - AstNode::block(vec![]) -} - -#[derive(Debug)] -pub enum ParserKind<'a> { - Literal(String), - Regex(Regex), - And, - Or, - Repeat(usize), - RepeatRange(Range), - Bind(&'a mut String), -} -impl<'a> std::fmt::Display for ParserKind<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use ParserKind::*; - match self { - Literal(s) => write!(f, "Literal \"{}\"", s), - Regex(r) => write!(f, "Regex /{}/", r.as_str()), - And => write!(f, "And"), - Or => write!(f, "Or"), - Repeat(num) => write!(f, "Repeat {}", num), - RepeatRange(range) => write!(f, "RepeatRange {:?}", range), - Bind(_) => write!(f, "Bind"), - } - } -} - -#[derive(Debug)] -pub struct Parser<'a> { - kind: ParserKind<'a>, - subparsers: Vec>, - // bind: Option<&'a mut String>, -} -impl<'a> std::fmt::Display for Parser<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.pretty_print(f, 0) - } -} -impl<'a> Parser<'a> { - pub fn parse>(&mut self, src: T) -> Result<(String, String), String> { - use ParserKind::*; - let s: String = src.into(); - match &mut self.kind { - Literal(literal) => { - if s.len() >= literal.len() && s[..literal.len()] == literal[..] { - Ok((s[..literal.len()].to_owned(), s[literal.len()..].to_owned())) - } else { - Err(s) - } - } - Regex(re) => { - if let Some(mat) = re.find(&s) { - if mat.start() == 0 { - Ok((s[mat.start()..mat.end()].to_owned(), s[mat.end()..].to_owned())) - } else { - Err(s) - } - } else { - Err(s) - } - } - And => { - let (lmatched, lrest) = self.subparsers[0].parse(s)?; - let (rmatched, rrest) = self.subparsers[1].parse(lrest)?; - Ok((lmatched + &rmatched, rrest)) - } - Or => { - if let Ok(lresult) = self.subparsers[0].parse(s.clone()) { - Ok(lresult) - } else { - self.subparsers[1].parse(s.clone()) - } - } - Repeat(num_repeats) => { - let mut matched = String::new(); - let mut rest = s.clone(); - for _ in 0..*num_repeats { - let (m, r) = self.subparsers[0].parse(rest)?; - matched += &m; - rest = r; - } - Ok((matched, rest)) - } - RepeatRange(range) => { - let mut matched = String::new(); - let mut rest = s.clone(); - - // Parse up to range.start - for _ in 0..range.start { - let (m, r) = self.subparsers[0].parse(rest)?; - matched += &m; - rest = r; - } - - // Parse optionally up to range.end - for _ in 0..(range.end - range.start) { - let parse_result = self.subparsers[0].parse(rest); - if let Err(r) = parse_result { - rest = r; - break; - } else { - let (m, r) = parse_result.unwrap(); - matched += &m; - rest = r; - } - } - - Ok((matched, rest)) - } - Bind(var) => { - let (matched, rest) = self.subparsers[0].parse(s)?; - **var = matched.clone(); - Ok((matched, rest)) - } - } - } - - // Static - pub fn literal>(s: T) -> Parser<'a> { - Parser { - kind: ParserKind::Literal(s.into()), - subparsers: vec![], - // bind: None, - } - } - pub fn regex>(s: T) -> Parser<'a> { - Parser { - kind: ParserKind::Regex(Regex::new(&s.into()).expect("could not compile regex")), - subparsers: vec![], - // bind: None, - } - } - - // Instance - pub fn and(self, r: Parser<'a>) -> Parser<'a> { - Parser { - kind: ParserKind::And, - subparsers: vec![self, r], - // bind: None, - } - } - pub fn or(self, r: Parser<'a>) -> Parser<'a> { - Parser { - kind: ParserKind::Or, - subparsers: vec![self, r], - // bind: None, - } - } - pub fn repeat(self, num_repeats: usize) -> Parser<'a> { - Parser { - kind: ParserKind::Repeat(num_repeats), - subparsers: vec![self], - // bind: None, - } - } - pub fn repeat_range(self, num_repeats: Range) -> Parser<'a> { - Parser { - kind: ParserKind::RepeatRange(num_repeats), - subparsers: vec![self], - // bind: None, - } - } - pub fn optional(self) -> Parser<'a> { - Parser { - kind: ParserKind::RepeatRange(0..1), - subparsers: vec![self], - // bind: None, - } - } - pub fn bind(self, s: &'a mut String) -> Parser<'a> { - Parser { - kind: ParserKind::Bind(s), - subparsers: vec![self], - } - } - - // Other - pub fn pretty_print(&self, f: &mut std::fmt::Formatter<'_>, indent: usize) -> std::fmt::Result { - for _ in 0..indent { - write!(f, " "); - } - write!(f, "{}", self.kind)?; - if self.subparsers.len() > 0 { - write!(f, " [\n")?; - for subparser in &self.subparsers { - subparser.pretty_print(f, indent + 2)?; - write!(f, ",\n")?; - } - for _ in 0..indent { - write!(f, " ")?; - } - write!(f, "]") - } else { - write!(f, "") - } - } -} - -// use combinators::*; -// pub mod combinators { -// pub struct Parser<'a> { -// source: &'a str, -// subparsers: Vec>, -// pub parse: Box Result<(&'a str, &'a str), &'a str>>, -// } -// impl Parser { -// // pub type S = Into; -// pub fn literal<'a>(literal: &'a str) -> Parser { -// Parser { -// source: literal.into(), -// subparsers: vec![], -// parse: Box::new(|s: &'a str| -> Result<(&'a str, &'a str), &'a str> { -// if src.len() >= literal.len() { -// if src[..literal.len()] == literal[..] { -// return Ok((&src[..literal.len()], &src[literal.len()..])); -// } -// } -// Err(&src[..]) -// }) -// } -// } -// } - // pub fn literal<'a, T>(literal: &'a str) -> T where T: Fn(&str) -> Result<(&str, &str), &str> + 'a { - // move |src: &str| -> Result<(&str, &str), &str> { - // if src.len() >= literal.len() { - // if src[..literal.len()] == literal[..] { - // return Ok((&src[..literal.len()], &src[literal.len()..])); - // } - // } - // Err(&src[..]) - // } - // } - // pub fn and<'a>(left: T, right: T) -> T where T: Fn(&str) -> Result<(&str, &str), &str> + 'a { - // - // } -// } - -// // Whitespace and comments. -// let whitespace = Parser.regex(/[ \n\r\t]+/y); -// let comments = Parser.regex(/[/][/].*/y).or( -// Parser.regex(/[/][*].*[*][/]/sy) -// ); -// let ignored = Parser.zeroOrMore(whitespace.or(comments)); -// // Tokens -// let token = (pattern: RegExp) => Parser.regex(pattern).bind((value: any) => ignored.and(Parser.constant(value))); -// let FUNCTION = token(/function\b/y); -// let IF = token(/if\b/y); -// let ELSE = token(/else\b/y); -// let RETURN = token(/return\b/y); -// let ASSIGN = token(/=/y).map(_ => Assign); -// let VAR = token(/var\b/y); -// let WHILE = token(/while\b/y); -// let COMMA = token(/[,]/y); -// let SEMICOLON = token(/[;]/y); -// let LEFT_PAREN = token(/[(]/y); -// let RIGHT_PAREN = token(/[)]/y); -// let LEFT_BRACE = token(/[{]/y); -// let RIGHT_BRACE = token(/[}]/y); -// let NUMBER = token(/[0-9]/y).map((digits: any) => new Num(parseInt(digits))); -// let ID = token(/[a-zA-Z_][a-zA-Z0-9_]*/y).map((x: any) => new Id(x)); -// let NOT = token(/!/y).map(_ => Not); -// let EQUAL = token(/==/y).map(_ => Equal); -// let NOT_EQUAL = token(/!=/y).map(_ => NotEqual); -// let PLUS = token(/[+]/y).map(_ => Add); -// let MINUS = token(/[-]/y).map(_ => Subtract); -// let STAR = token(/[*]/y).map(_ => Multiply); -// let SLASH = token(/[/]/y).map(_ => Divide); -// // Expression parser -// let expression: Parser = Parser.error('expression parser used before definition'); -// // Call parser -// let args: Parser> = expression.bind((arg: any) => Parser.zeroOrMore(COMMA.and(expression)).bind((args: any) => Parser.constant([arg, ...args]))).or(Parser.constant([])); -// let functionCall: Parser = ID.bind((callee: any) => LEFT_PAREN.and(args.bind((args: any) => RIGHT_PAREN.and(Parser.constant(callee.equals(new Id('assert')) ? new Assert(args[0]) : new FunctionCall(callee, args)))))); -// // Atom -// let atom: Parser = functionCall.or(ID).or(NUMBER).or(LEFT_PAREN.and(expression).bind((e: any) => RIGHT_PAREN.and(Parser.constant(e)))); -// // Unary operators -// let unary: Parser = Parser.optional(NOT).bind((not: any) => atom.map((operand: any) => not ? new Not(operand) : operand)); -// // Infix operators -// let infix = (operatorParser: any, operandParser: any) => -// operandParser.bind((operand: any) => -// Parser.zeroOrMore( -// operatorParser.bind((operator: any) => -// operandParser.bind((operand: any) => -// Parser.constant({ operator, operand }) -// ) -// ) -// ).map((operatorTerms: any) => -// operatorTerms.reduce((left: any, { operator, operand }: { operator: any, operand: any }) => -// new operator(left, operand), operand) -// ) -// ); -// let product = infix(STAR.or(SLASH), unary); -// let sum = infix(PLUS.or(MINUS), product); -// let comparison = infix(EQUAL.or(NOT_EQUAL), sum); -// // Associativity -// // Closing the loop: expression -// expression.parse = comparison.parse; -// // Statement -// let statement: Parser = Parser.error('statement parser used before definition'); -// let returnStatement: Parser = RETURN.and(expression).bind((operand: any) => SEMICOLON.and(Parser.constant(new Return(operand)))); -// let expressionStatement: Parser = expression.bind((operand: any) => SEMICOLON.and(Parser.constant(operand))); -// let ifStatement: Parser = IF.and(LEFT_PAREN).and(expression).bind((conditional: any) => -// RIGHT_PAREN.and(statement).bind((consequence: any) => -// ELSE.and(statement).bind((alternative: any) => -// Parser.constant(new If(conditional, consequence, alternative)) -// ) -// ) -// ); -// let whileStatement: Parser = WHILE.and(LEFT_PAREN).and(expression).bind((conditional: any) => -// RIGHT_PAREN.and(statement).bind((body: any) => -// Parser.constant(new While(conditional, body)) -// ) -// ); -// let varStatement: Parser = VAR.and(ID).bind((name: any) => -// ASSIGN.and(expression).bind((value: any) => -// SEMICOLON.and(Parser.constant(new Var(name, value))) -// ) -// ); -// let assignmentStatement: Parser = ID.bind((name: any) => -// ASSIGN.and(expression).bind((value: any) => -// SEMICOLON.and(Parser.constant(new Assign(name, value))) -// ) -// ); -// let blockStatement: Parser = LEFT_BRACE.and(Parser.zeroOrMore(statement)).bind((statements: any) => -// RIGHT_BRACE.and(Parser.constant(new Block(statements))) -// ); -// let parameters: Parser> = ID.bind((param: any) => -// Parser.zeroOrMore(COMMA.and(ID)).bind((params: any) => -// Parser.constant([param, ...params]) -// ) -// ).or(Parser.constant([])); -// let functionStatement: Parser = FUNCTION.and(ID).bind((name: any) => -// LEFT_PAREN.and(parameters).bind((parameters: any) => -// RIGHT_PAREN.and(blockStatement).bind((block: any) => -// Parser.constant(name.equals(new Id('main')) ? new Main(block.statements) : new FunctionDefinition(name, parameters, block)) -// ) -// ) -// ); -// let statementParser: Parser = -// returnStatement -// .or(functionStatement) -// .or(ifStatement) -// .or(whileStatement) -// .or(varStatement) -// .or(assignmentStatement) -// .or(blockStatement) -// .or(expressionStatement); -// statement.parse = statementParser.parse; -// let parser: Parser = ignored.and(Parser.zeroOrMore(statement)).map((statements: any) => new Block(statements)); -// -// return parser.parseStringToCompletion(source); diff --git a/src/parse/combinators.rs b/src/parse/combinators.rs new file mode 100644 index 0000000..5072be5 --- /dev/null +++ b/src/parse/combinators.rs @@ -0,0 +1,267 @@ +use regex::Regex; +use ron::to_string; +use std::ops::Range; +use std::rc::Rc; + +pub enum ParserKind { + Literal(String), + Regex(Regex), + Constant(String), + And, + Ignore(bool), + Or, + Repeat(usize), + RepeatRange(Range), + Error(String), + Map(Rc Result>>), +} +impl std::fmt::Debug for ParserKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} +impl std::fmt::Display for ParserKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use ParserKind::*; + match self { + Literal(s) => write!(f, "Literal \"{}\"", s), + Regex(r) => write!(f, "Regex /{}/", r.as_str()), + Constant(c) => write!(f, "Constant \"{}\"", c), + And => write!(f, "And"), + Ignore(b) => write!(f, "Ignore{}", if *b { "Before" } else { "After" }), + Or => write!(f, "Or"), + Repeat(num) => write!(f, "Repeat {}", num), + RepeatRange(range) => write!(f, "RepeatRange {:?}", range), + Error(msg) => write!(f, "Error \"{}\"", msg), + Map(_) => write!(f, "Map"), + } + } +} +impl Clone for ParserKind { + fn clone(&self) -> Self { + use ParserKind::*; + match self { + Literal(s) => Literal(s.clone()), + Regex(r) => Regex(r.clone()), + Constant(c) => Constant(c.clone()), + And => And, + Ignore(b) => Ignore(*b), + Or => Or, + Repeat(num) => Repeat(num.clone()), + RepeatRange(range) => RepeatRange(range.clone()), + Error(msg) => Error(msg.clone()), + Map(cfn) => Map(Rc::clone(cfn)), + } + } +} + +#[derive(Debug, Clone)] +pub struct Parser { + kind: ParserKind, + subparsers: Vec, +} +impl std::fmt::Display for Parser { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.pretty_print(f, 0) + } +} +impl Parser { + pub fn parse>(&self, src: T) -> Result<(String, String), String> { + use ParserKind::*; + let s: String = src.into(); + match &self.kind { + Literal(literal) => { + if s.len() >= literal.len() && s[..literal.len()] == literal[..] { + Ok((s[..literal.len()].to_owned(), s[literal.len()..].to_owned())) + } else { + Err(s) + } + } + Regex(re) => { + if let Some(mat) = re.find(&s) { + if mat.start() == 0 { + Ok(( + s[mat.start()..mat.end()].to_owned(), + s[mat.end()..].to_owned(), + )) + } else { + Err(s) + } + } else { + Err(s) + } + } + Constant(constant) => Ok((constant.clone(), s)), + And => { + let (lmatched, lrest) = self.subparsers[0].parse(s)?; + let (rmatched, rrest) = self.subparsers[1].parse(lrest)?; + Ok(( + to_string(&vec![lmatched.clone(), rmatched.clone()]).unwrap(), + rrest, + )) + } + Ignore(before) => { + if *before { + let (_, rest) = self.subparsers[0].parse(s)?; + self.subparsers[1].parse(rest) + } else { + let (matched, rest) = self.subparsers[0].parse(s)?; + let (_, rest) = self.subparsers[1].parse(rest)?; + Ok((matched, rest)) + } + } + Or => { + if let Ok(lresult) = self.subparsers[0].parse(s.clone()) { + Ok(lresult) + } else { + self.subparsers[1].parse(s.clone()) + } + } + Repeat(num_repeats) => { + let mut matched = vec![]; + let mut rest = s.clone(); + for _ in 0..*num_repeats { + let (m, r) = self.subparsers[0].parse(rest)?; + matched.push(m); + rest = r; + } + Ok((to_string(&matched).unwrap(), rest)) + } + RepeatRange(range) => { + let mut matched = vec![]; + let mut rest = s.clone(); + + // Parse up to range.start + for _ in 0..range.start { + let (m, r) = self.subparsers[0].parse(rest)?; + matched.push(m); + rest = r; + } + + // Parse optionally up to range.end + for _ in 0..(range.end - range.start) { + let parse_result = self.subparsers[0].parse(rest); + if let Err(r) = parse_result { + rest = r; + break; + } else { + let (m, r) = parse_result.unwrap(); + matched.push(m); + rest = r; + } + } + + Ok((to_string(&matched).unwrap(), rest)) + } + Error(msg) => panic!(msg.clone()), + Map(cfn) => { + let (matched, rest) = self.subparsers[0].parse(s)?; + if let Ok(m) = cfn(matched) { + Ok((m, rest)) + } else { + Err(rest) + } + } + } + } + + // Static + pub fn literal>(s: T) -> Parser { + Parser { + kind: ParserKind::Literal(s.into()), + subparsers: vec![], + } + } + pub fn regex>(s: T) -> Parser { + Parser { + kind: ParserKind::Regex(Regex::new(&s.into()).expect("could not compile regex")), + subparsers: vec![], + } + } + pub fn constant>(s: T) -> Parser { + Parser { + kind: ParserKind::Constant(s.into()), + subparsers: vec![], + } + } + pub fn error>(s: T) -> Parser { + Parser { + kind: ParserKind::Error(s.into()), + subparsers: vec![], + } + } + + // Instance + pub fn and(self, r: Parser) -> Parser { + Parser { + kind: ParserKind::And, + subparsers: vec![self, r], + } + } + pub fn ignore_before(self, r: Parser) -> Parser { + Parser { + kind: ParserKind::Ignore(true), + subparsers: vec![self, r], + } + } + pub fn ignore_after(self, r: Parser) -> Parser { + Parser { + kind: ParserKind::Ignore(false), + subparsers: vec![self, r], + } + } + pub fn or(self, r: Parser) -> Parser { + Parser { + kind: ParserKind::Or, + subparsers: vec![self, r], + } + } + pub fn repeat(self, num_repeats: usize) -> Parser { + Parser { + kind: ParserKind::Repeat(num_repeats), + subparsers: vec![self], + } + } + pub fn repeat_range(self, num_repeats: Range) -> Parser { + Parser { + kind: ParserKind::RepeatRange(num_repeats), + subparsers: vec![self], + } + } + pub fn optional(self) -> Parser { + Parser { + kind: ParserKind::RepeatRange(0..1), + subparsers: vec![self], + } + } + pub fn map(self, cfn: F) -> Parser + where + F: Fn(String) -> Result, + { + Parser { + kind: ParserKind::Map(Rc::new(Box::new(cfn))), + subparsers: vec![self], + } + } + + // Other + pub fn pretty_print(&self, f: &mut std::fmt::Formatter<'_>, indent: usize) -> std::fmt::Result { + for _ in 0..indent { + write!(f, " ")?; + } + write!(f, "{}", self.kind)?; + if self.subparsers.len() > 0 { + write!(f, " [\n")?; + for subparser in &self.subparsers { + subparser.pretty_print(f, indent + 2)?; + write!(f, ",\n")?; + } + for _ in 0..indent { + write!(f, " ")?; + } + write!(f, "]") + } else { + write!(f, "") + } + } +} diff --git a/src/parse/mod.rs b/src/parse/mod.rs new file mode 100644 index 0000000..87d6501 --- /dev/null +++ b/src/parse/mod.rs @@ -0,0 +1,157 @@ +mod combinators; + +use crate::ast::AstNode; +use combinators::Parser; +use ron::{from_str, to_string}; + +pub fn parse>(src: T) -> AstNode { + let src: String = src.into(); + let whitespace = Parser::regex(r"[ \n\r\t]+"); + let comments = Parser::regex(r"[/][/].*").or(Parser::regex(r"[/][*].*[*][/]")); + let ignored = whitespace.or(comments).repeat_range(0..usize::MAX); + // Token parser constructor. + let i = ignored.clone(); + let token = move |pattern: &str| i.clone().ignore_before(Parser::regex(pattern)); + // Token helper parsers. + let FUNCTION = Parser::regex(r"function\b").or(ignored.clone()); + let IF = token(r"if\b"); + let ELSE = token(r"else\b"); + let RETURN = token(r"return\b"); + let VAR = token(r"var\b"); + let WHILE = token(r"while\b"); + let COMMA = token(r"[,]"); + let SEMICOLON = token(r"[;]"); + let LEFT_PAREN = token(r"[(]"); + let RIGHT_PAREN = token(r"[)]"); + let LEFT_BRACE = token(r"[{]"); + let RIGHT_BRACE = token(r"[}]"); + let NUMBER = token(r"[0-9]+").map(|matched| { + Ok(to_string(&AstNode::integer( + matched.parse::().unwrap(), + ))?) + }); + let IDENTIFIER = token(r"[a-zA-Z_][a-zA-Z0-9_]*") + .map(|matched| Ok(to_string(&AstNode::identifier(matched))?)); + let NOT = token(r"!").map(|matched| Ok(to_string(&AstNode::not(AstNode::null()))?)); + let ASSIGN = + token(r"=").map(|matched| Ok(to_string(&AstNode::assign("".into(), AstNode::null()))?)); + let EQUAL = token(r"==").map(|matched| { + Ok(to_string(&AstNode::equal( + AstNode::null(), + AstNode::null(), + ))?) + }); + let NOT_EQUAL = token(r"!=").map(|matched| { + Ok(to_string(&AstNode::not_equal( + AstNode::null(), + AstNode::null(), + ))?) + }); + let PLUS = token(r"[+]") + .map(|matched| Ok(to_string(&AstNode::add(AstNode::null(), AstNode::null()))?)); + let MINUS = token(r"[-]").map(|matched| { + Ok(to_string(&AstNode::subtract( + AstNode::null(), + AstNode::null(), + ))?) + }); + let STAR = token(r"[*]").map(|matched| { + Ok(to_string(&AstNode::multiply( + AstNode::null(), + AstNode::null(), + ))?) + }); + let SLASH = token(r"[/]").map(|matched| { + Ok(to_string(&AstNode::divide( + AstNode::null(), + AstNode::null(), + ))?) + }); + // Expression parser. + let mut expression = Parser::constant("").map(|matched| Ok(to_string(&AstNode::null())?)); + // Call parser. + let args = expression + .clone() + .and( + COMMA + .ignore_before(expression.clone()) + .repeat_range(0..usize::MAX), + ) + .map(|matched| { + println!("{}", matched); + let mut args = vec![]; + let data = from_str::>(&matched)?; + args.push(data[0].clone()); + let others = from_str::>(&data[1])?; + for o in others { + args.push(o.clone()); + } + Ok(to_string(&args)?) + }); + let call = IDENTIFIER + .clone() + .ignore_after(LEFT_PAREN.clone()) + .and(args.clone()) + .ignore_after(RIGHT_PAREN.clone()) + .map(|matched| { + let data = from_str::>(&matched)?; + let callee = data[0].clone(); + let args = from_str::>(&data[1])?; + for arg in args { + println!("{}", arg); + } + Ok(to_string( + &AstNode::function_call(callee, vec![]), // TODO: recursively make into AstNodes + )?) + }); + // Atom parser. + let atom = call + .clone() + .or(IDENTIFIER.clone()) + .or(NUMBER.clone()) + .or(LEFT_PAREN + .clone() + .ignore_before(expression.clone()) + .ignore_after(RIGHT_PAREN.clone())); + // Unary operator parsers. + let unary = NOT.clone().optional().and(atom.clone()).map(|matched| { + let data = from_str::>(&matched)?; + let atom_data = from_str::(&data[1])?; + Ok(to_string(&match &data[0][..] { + "!" => AstNode::not(atom_data), + _ => atom_data, + })?) + }); + // Infix operator parsers. + let infix = |operator_parser: Parser, term_parser: Parser| { + term_parser + .clone() + .and( + operator_parser + .and(term_parser.clone()) + .repeat_range(0..usize::MAX), + ) + .map(|matched| { + let data = from_str::>(&matched)?; + let others = from_str::>(&data[1])?; + let mut current = from_str::(&data[0])?; + for i in 0..others.len() { + let o = from_str::>(&others[i])?; // Parse the [operator, unary] + let mut op = from_str::(&o[0])?; // Pull the operator out. + let t = from_str::(&o[1])?; // Pull the term out. + op.subnodes[0] = current; // Put current on the left side. + op.subnodes[1] = t; // Put the term on the right side. + current = op; // Replace current with the operator. + } + Ok(to_string(¤t)?) + }) + }; + let product = infix(STAR.clone().or(SLASH.clone()), unary.clone()); + let sum = infix(PLUS.clone().or(MINUS.clone()), product.clone()); + let comparison = infix(EQUAL.clone().or(NOT_EQUAL.clone()), sum.clone()); + // Close the recursive definition. + // OR NOT! IT WORKS IN JS BECAUSE IT GETS HOISTED + expression = comparison.clone(); + from_str::(&expression.parse(src).unwrap().0).unwrap() + // AstNode::block(vec![]) +}