No idea how to fix the recursive definition, but working otherwise.

This commit is contained in:
Garen Tyler 2020-11-29 22:42:27 -07:00
parent fed2aef6e4
commit 16de6c4f75
9 changed files with 576 additions and 683 deletions

74
Cargo.lock generated
View File

@ -15,6 +15,12 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]]
name = "base64"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3441f0f7b02788e948e47f457ca01f1d7e6d92c693bc132c22b087d3141c03ff"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "1.2.1" version = "1.2.1"
@ -69,6 +75,26 @@ version = "0.1.0"
dependencies = [ dependencies = [
"nom", "nom",
"regex", "regex",
"ron",
"serde",
]
[[package]]
name = "proc-macro2"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
dependencies = [
"proc-macro2",
] ]
[[package]] [[package]]
@ -89,18 +115,60 @@ version = "0.6.20"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cab7a364d15cde1e505267766a2d3c4e22a843e1a601f0fa7564c0f82ced11c" checksum = "8cab7a364d15cde1e505267766a2d3c4e22a843e1a601f0fa7564c0f82ced11c"
[[package]]
name = "ron"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8a58080b7bb83b2ea28c3b7a9a994fd5e310330b7c8ca5258d99b98128ecfe4"
dependencies = [
"base64",
"bitflags",
"serde",
]
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.5" version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
[[package]]
name = "serde"
version = "1.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "static_assertions" name = "static_assertions"
version = "1.1.0" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "syn"
version = "1.0.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c1e438504729046a5cfae47f97c30d6d083c7d91d94603efdae3477fc070d4c"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]] [[package]]
name = "thread_local" name = "thread_local"
version = "1.0.1" version = "1.0.1"
@ -110,6 +178,12 @@ dependencies = [
"lazy_static", "lazy_static",
] ]
[[package]]
name = "unicode-xid"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.2" version = "0.9.2"

View File

@ -9,3 +9,5 @@ edition = "2018"
[dependencies] [dependencies]
regex = "1.4.1" regex = "1.4.1"
nom = "5.1.2" nom = "5.1.2"
ron = "0.6.2"
serde = "1.0.117"

View File

@ -1,4 +1,5 @@
#[derive(Clone, Debug, PartialEq)] use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum AstNodeKind { pub enum AstNodeKind {
// Primitives // Primitives
Integer, Integer,
@ -23,20 +24,22 @@ pub enum AstNodeKind {
FunctionDefinition, FunctionDefinition,
VariableDefinition, VariableDefinition,
Assign, Assign,
// Blank node
Null,
} }
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct AstNode { pub struct AstNode {
pub kind: AstNodeKind, pub kind: AstNodeKind,
pub value: String, pub value: String,
pub subnodes: Vec<AstNode> pub subnodes: Vec<AstNode>,
} }
impl AstNode { impl AstNode {
pub fn new(kind: AstNodeKind, value: String, subnodes: Vec<AstNode>) -> AstNode { pub fn new(kind: AstNodeKind, value: String, subnodes: Vec<AstNode>) -> AstNode {
AstNode { AstNode {
kind, kind,
value, value,
subnodes subnodes,
} }
} }
pub fn emit(&self, f: &mut dyn std::fmt::Write) -> std::fmt::Result { pub fn emit(&self, f: &mut dyn std::fmt::Write) -> std::fmt::Result {
@ -51,11 +54,11 @@ impl AstNode {
Program => { Program => {
write!(f, "(module\n")?; write!(f, "(module\n")?;
for node in &self.subnodes { for node in &self.subnodes {
node.emit(f); node.emit(f)?;
} }
write!(f, ")") write!(f, ")")
} }
_ => Ok(()) _ => Ok(()),
} }
} }
@ -87,42 +90,42 @@ impl AstNode {
AstNode { AstNode {
kind: AstNodeKind::NotEqual, kind: AstNodeKind::NotEqual,
value: "not_equal".into(), value: "not_equal".into(),
subnodes: vec![left, right] subnodes: vec![left, right],
} }
} }
pub fn equal(left: AstNode, right: AstNode) -> AstNode { pub fn equal(left: AstNode, right: AstNode) -> AstNode {
AstNode { AstNode {
kind: AstNodeKind::Equal, kind: AstNodeKind::Equal,
value: "equal".into(), value: "equal".into(),
subnodes: vec![left, right] subnodes: vec![left, right],
} }
} }
pub fn add(left: AstNode, right: AstNode) -> AstNode { pub fn add(left: AstNode, right: AstNode) -> AstNode {
AstNode { AstNode {
kind: AstNodeKind::Add, kind: AstNodeKind::Add,
value: "add".into(), value: "add".into(),
subnodes: vec![left, right] subnodes: vec![left, right],
} }
} }
pub fn subtract(left: AstNode, right: AstNode) -> AstNode { pub fn subtract(left: AstNode, right: AstNode) -> AstNode {
AstNode { AstNode {
kind: AstNodeKind::Subtract, kind: AstNodeKind::Subtract,
value: "subtract".into(), value: "subtract".into(),
subnodes: vec![left, right] subnodes: vec![left, right],
} }
} }
pub fn multiply(left: AstNode, right: AstNode) -> AstNode { pub fn multiply(left: AstNode, right: AstNode) -> AstNode {
AstNode { AstNode {
kind: AstNodeKind::Multiply, kind: AstNodeKind::Multiply,
value: "multiply".into(), value: "multiply".into(),
subnodes: vec![left, right] subnodes: vec![left, right],
} }
} }
pub fn divide(left: AstNode, right: AstNode) -> AstNode { pub fn divide(left: AstNode, right: AstNode) -> AstNode {
AstNode { AstNode {
kind: AstNodeKind::Divide, kind: AstNodeKind::Divide,
value: "divide".into(), value: "divide".into(),
subnodes: vec![left, right] subnodes: vec![left, right],
} }
} }
// Control flow // Control flow
@ -130,14 +133,18 @@ impl AstNode {
AstNode { AstNode {
kind: AstNodeKind::Block, kind: AstNodeKind::Block,
value: "block".into(), value: "block".into(),
subnodes: statements subnodes: statements,
} }
} }
pub fn if_statement(conditional: AstNode, consequence: AstNode, alternative: AstNode) -> AstNode { pub fn if_statement(
conditional: AstNode,
consequence: AstNode,
alternative: AstNode,
) -> AstNode {
AstNode { AstNode {
kind: AstNodeKind::IfStatement, kind: AstNodeKind::IfStatement,
value: "if_statement".into(), value: "if_statement".into(),
subnodes: vec![conditional, consequence, alternative] subnodes: vec![conditional, consequence, alternative],
} }
} }
pub fn while_loop(conditional: AstNode, body: AstNode) -> AstNode { pub fn while_loop(conditional: AstNode, body: AstNode) -> AstNode {
@ -151,7 +158,7 @@ impl AstNode {
AstNode { AstNode {
kind: AstNodeKind::Program, kind: AstNodeKind::Program,
value: "program".into(), value: "program".into(),
subnodes: statements subnodes: statements,
} }
} }
// Functions and variables // Functions and variables
@ -194,4 +201,51 @@ impl AstNode {
subnodes: vec![value], subnodes: vec![value],
} }
} }
// Blank node
pub fn null() -> AstNode {
AstNode {
kind: AstNodeKind::Null,
value: "".into(),
subnodes: vec![],
}
}
// Other
pub fn pretty_print(&self, f: &mut std::fmt::Formatter<'_>, indent: usize) -> std::fmt::Result {
for _ in 0..indent {
write!(f, " ")?;
}
write!(f, "{{\n")?;
for _ in 0..indent + 2 {
write!(f, " ")?;
}
write!(f, "kind: {:?}\n", self.kind)?;
for _ in 0..indent + 2 {
write!(f, " ")?;
}
write!(f, "value: {:?}\n", self.value)?;
if self.subnodes.len() > 0 {
for _ in 0..indent + 2 {
write!(f, " ")?;
}
write!(f, "subnodes: [\n")?;
for subnode in &self.subnodes {
subnode.pretty_print(f, indent + 4)?;
write!(f, ",\n")?;
}
for _ in 0..indent + 2 {
write!(f, " ")?;
}
write!(f, "]\n")?;
}
for _ in 0..indent {
write!(f, " ")?;
}
write!(f, "}}")
}
}
impl std::fmt::Display for AstNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.pretty_print(f, 0)
}
} }

View File

@ -1,235 +0,0 @@
use std::fmt::Write;
#[derive(Debug, PartialEq)]
pub struct AstNode {
pub value: Option<String>,
pub kind: String,
pub subtokens: Option<Vec<AstNode>>,
}
impl std::fmt::Display for AstNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match &self.kind[..] {
"number" => write!(f, "{}", self.value.as_ref().expect("number had blank value")),
"identifier" => write!(f, "{}", self.value.as_ref().expect("number had blank value")),
"not" => write!(f, "!({})", self.subtokens.as_ref().expect("not had blank subtokens")[0]),
"equal" => write!(f, "({} == ({}))", self.subtokens.as_ref().expect("equal had blank subtokens")[0], self.subtokens.as_ref().expect("equal had blank subtokens")[1]),
"not_equal" => write!(f, "({} != {})", self.subtokens.as_ref().expect("not_equal had blank subtokens")[0], self.subtokens.as_ref().expect("not_equal had blank subtokens")[1]),
"add" => write!(f, "({} + {})", self.subtokens.as_ref().expect("add had blank subtokens")[0], self.subtokens.as_ref().expect("add had blank subtokens")[1]),
"subtract" => write!(f, "({} - {})", self.subtokens.as_ref().expect("subtract had blank subtokens")[0], self.subtokens.as_ref().expect("subtract had blank subtokens")[1]),
"multiply" => write!(f, "({} * {})", self.subtokens.as_ref().expect("multiply had blank subtokens")[0], self.subtokens.as_ref().expect("multiply had blank subtokens")[1]),
"divide" => write!(f, "({} / {})", self.subtokens.as_ref().expect("divide had blank subtokens")[0], self.subtokens.as_ref().expect("divide had blank subtokens")[1]),
"call" => {
write!(f, "({}(", self.value.as_ref().expect("call had blank value"))?;
let args = self.subtokens.as_ref().expect("call had blank subtokens");
if args.len() > 0 {
write!(f, "{}", args[0])?;
if args.len() > 1 {
for i in 1..args.len() {
write!(f, ", {}", args[i])?;
}
}
}
write!(f, "))")
},
"return" => write!(f, "return {}", self.subtokens.as_ref().expect("return had blank subtokens")[0]),
"block" => {
write!(f, "{{\n")?;
let stmts = self.subtokens.as_ref().expect("block had blank subtokens");
if stmts.len() > 0 {
write!(f, "{};", stmts[0])?;
if stmts.len() > 1 {
for i in 1..stmts.len() {
write!(f, "\n{};", stmts[i])?;
}
}
}
write!(f, "\n}}")
}
"if" => {
let parts = self.subtokens.as_ref().expect("if had blank subtokens");
write!(f, "(if ({}) {{{}}} else {{{}}})", parts[0], parts[1], parts[2])
}
"function" => {
let parts = self.subtokens.as_ref().expect("function had blank subtokens");
write!(f, "function {}(", self.value.as_ref().expect("function had blank value"))?;
let params = &parts[1..];
if params.len() > 0 {
write!(f, "{}", params[0])?;
if params.len() > 1 {
for i in 1..params.len() {
write!(f, ", {}", params[i])?;
}
}
}
write!(f, ") {}", parts[0])
}
"variable" => write!(f, "var {} = {}", self.value.as_ref().expect("var had blank value"), self.subtokens.as_ref().expect("var had blank subtokens")[0]),
"assign" => write!(f, "{} = {}", self.value.as_ref().expect("assign had blank value"), self.subtokens.as_ref().expect("assign had blank subtokens")[0]),
"while" => {
let parts = self.subtokens.as_ref().expect("while had blank subtokens");
write!(f, "while ({})\n{}\n", parts[0], parts[1])
}
"program" => {
write!(f, "{{\n")?;
let stmts = self.subtokens.as_ref().expect("program had blank subtokens");
if stmts.len() > 0 {
write!(f, "{};", stmts[0])?;
if stmts.len() > 1 {
for i in 1..stmts.len() {
write!(f, "\n{};", stmts[i])?;
}
}
}
write!(f, "\n}}")
}
_ => write!(f, "(unknown node type {})", self.kind),
}
}
}
impl AstNode {
pub fn emit(&self, f: &mut dyn Write) -> Result<(), std::fmt::Error> {
match &self.kind[..] {
"number" => write!(f, "i32.const {}\n", self.value.as_ref().expect("number had blank value")),
"add" => {
let subtokens = self.subtokens.as_ref().expect("add had blank subtokens");
subtokens[0].emit(f)?;
subtokens[1].emit(f)?;
write!(f, "i32.add\n")
},
_ => Ok(())
}
}
pub fn number(num: i32) -> AstNode {
AstNode {
value: Some(num.to_string()),
kind: "number".into(),
subtokens: None,
}
}
pub fn identifier<T: Into<String>>(id: T) -> AstNode {
AstNode {
value: Some(id.into()),
kind: "identifier".into(),
subtokens: None,
}
}
pub fn not(operand: AstNode) -> AstNode {
AstNode {
value: None,
kind: "not".into(),
subtokens: Some(vec![operand]),
}
}
pub fn equal(left: AstNode, right: AstNode) -> AstNode {
AstNode {
value: None,
kind: "equal".into(),
subtokens: Some(vec![left, right]),
}
}
pub fn not_equal(left: AstNode, right: AstNode) -> AstNode {
AstNode {
value: None,
kind: "not_equal".into(),
subtokens: Some(vec![left, right]),
}
}
pub fn add(left: AstNode, right: AstNode) -> AstNode {
AstNode {
value: None,
kind: "add".into(),
subtokens: Some(vec![left, right]),
}
}
pub fn subtract(left: AstNode, right: AstNode) -> AstNode {
AstNode {
value: None,
kind: "subtract".into(),
subtokens: Some(vec![left, right]),
}
}
pub fn multiply(left: AstNode, right: AstNode) -> AstNode {
AstNode {
value: None,
kind: "multiply".into(),
subtokens: Some(vec![left, right]),
}
}
pub fn divide(left: AstNode, right: AstNode) -> AstNode {
AstNode {
value: None,
kind: "divide".into(),
subtokens: Some(vec![left, right]),
}
}
pub fn call<T: Into<String>>(callee: T, args: Vec<AstNode>) -> AstNode {
AstNode {
value: Some(callee.into()),
kind: "call".into(),
subtokens: Some(args),
}
}
pub fn r#return(operand: AstNode) -> AstNode {
AstNode {
value: None,
kind: "return".into(),
subtokens: Some(vec![operand]),
}
}
pub fn block(statements: Vec<AstNode>) -> AstNode {
AstNode {
value: None,
kind: "block".into(),
subtokens: Some(statements),
}
}
pub fn r#if(conditional: AstNode, consequence: AstNode, alternative: AstNode) -> AstNode {
AstNode {
value: None,
kind: "if".into(),
subtokens: Some(vec![conditional, consequence, alternative]),
}
}
pub fn function<T: Into<String>>(name: T, parameters: Vec<(T, T)>, body: AstNode) -> AstNode {
// Turn the parameter strings into ids.
let mut params = vec![];
params.push(body); // First one will always be the body.
for p in parameters {
params.push(AstNode::identifier(p));
}
AstNode {
value: Some(name.into()),
kind: "function".into(),
subtokens: Some(params),
}
}
pub fn variable<T: Into<String>>(name: T, value: AstNode) -> AstNode {
AstNode {
value: Some(name.into()),
kind: "variable".into(),
subtokens: Some(vec![value]),
}
}
pub fn assign<T: Into<String>>(name: T, value: AstNode) -> AstNode {
AstNode {
value: Some(name.into()),
kind: "assign".into(),
subtokens: Some(vec![value]),
}
}
pub fn r#while(conditional: AstNode, body: AstNode) -> AstNode {
AstNode {
value: None,
kind: "while".into(),
subtokens: Some(vec![conditional, body]),
}
}
pub fn program(statements: Vec<AstNode>) -> AstNode {
AstNode {
value: None,
kind: "program".into(),
subtokens: Some(statements),
}
}
}

View File

@ -1,5 +1,7 @@
#[macro_use] #![allow(unused_variables)]
extern crate nom; #![allow(non_snake_case)]
#![allow(dead_code)]
extern crate regex; extern crate regex;
pub mod ast; pub mod ast;

View File

@ -1,61 +1,4 @@
use pivot::ast::AstNode;
use regex::Regex;
fn main() { fn main() {
pivot::parse::parse(); let ast = pivot::parse::parse(r"log(2)");
// let src = AstNode::program(vec![ println!("{}", ast);
// AstNode::add(
// AstNode::integer(2),
// AstNode::integer(3),
// )
// ]);
// println!("{:?}", src);
// let mut out = String::new();
// src.emit(&mut out);
// println!("{}", out);
// test();
} }
// fn test() {
// use pivot::ast::AstNode;
//
// let src = r#"
// function factorial(n) {
// var result = 1;
// while (n != 1) {
// result = result * n;
// n = n - 1;
// }
// return result;
// }
// "#;
//
// let actual_ast = pivot::parse::parse();
// let expected_ast = AstNode::function("factorial", vec!["n"],
// AstNode::block(vec![
// AstNode::variable("result", AstNode::number(1)),
// AstNode::r#while(
// AstNode::not_equal(
// AstNode::identifier("n"),
// AstNode::number(1)
// ),
// AstNode::block(vec![
// AstNode::assign("result",
// AstNode::multiply(
// AstNode::identifier("result"),
// AstNode::identifier("n")
// )
// ),
// AstNode::assign("n",
// AstNode::subtract(
// AstNode::identifier("n"),
// AstNode::number(1)
// )
// )
// ]),
// ),
// AstNode::r#return(AstNode::identifier("result"))
// ])
// );
// println!("{}", expected_ast);
// }

View File

@ -1,371 +0,0 @@
use crate::ast::AstNode;
use std::ops::Range;
use regex::Regex;
pub fn parse() -> AstNode {
let src = "420";
let mut n = String::new();
println!("src: {:?}", src);
println!("n: {:?}", n);
{
let mut num = Parser::regex(r"\d+(\.\d+)?").bind(&mut n);
let mut full = num;
println!("full: {}", full);
println!("full.parse: {:?}", full.parse(src));
println!("full: {}", full);
}
println!("src: {:?}", src);
println!("n: {:?}", n);
AstNode::block(vec![])
}
#[derive(Debug)]
pub enum ParserKind<'a> {
Literal(String),
Regex(Regex),
And,
Or,
Repeat(usize),
RepeatRange(Range<usize>),
Bind(&'a mut String),
}
impl<'a> std::fmt::Display for ParserKind<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use ParserKind::*;
match self {
Literal(s) => write!(f, "Literal \"{}\"", s),
Regex(r) => write!(f, "Regex /{}/", r.as_str()),
And => write!(f, "And"),
Or => write!(f, "Or"),
Repeat(num) => write!(f, "Repeat {}", num),
RepeatRange(range) => write!(f, "RepeatRange {:?}", range),
Bind(_) => write!(f, "Bind"),
}
}
}
#[derive(Debug)]
pub struct Parser<'a> {
kind: ParserKind<'a>,
subparsers: Vec<Parser<'a>>,
// bind: Option<&'a mut String>,
}
impl<'a> std::fmt::Display for Parser<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.pretty_print(f, 0)
}
}
impl<'a> Parser<'a> {
pub fn parse<T: Into<String>>(&mut self, src: T) -> Result<(String, String), String> {
use ParserKind::*;
let s: String = src.into();
match &mut self.kind {
Literal(literal) => {
if s.len() >= literal.len() && s[..literal.len()] == literal[..] {
Ok((s[..literal.len()].to_owned(), s[literal.len()..].to_owned()))
} else {
Err(s)
}
}
Regex(re) => {
if let Some(mat) = re.find(&s) {
if mat.start() == 0 {
Ok((s[mat.start()..mat.end()].to_owned(), s[mat.end()..].to_owned()))
} else {
Err(s)
}
} else {
Err(s)
}
}
And => {
let (lmatched, lrest) = self.subparsers[0].parse(s)?;
let (rmatched, rrest) = self.subparsers[1].parse(lrest)?;
Ok((lmatched + &rmatched, rrest))
}
Or => {
if let Ok(lresult) = self.subparsers[0].parse(s.clone()) {
Ok(lresult)
} else {
self.subparsers[1].parse(s.clone())
}
}
Repeat(num_repeats) => {
let mut matched = String::new();
let mut rest = s.clone();
for _ in 0..*num_repeats {
let (m, r) = self.subparsers[0].parse(rest)?;
matched += &m;
rest = r;
}
Ok((matched, rest))
}
RepeatRange(range) => {
let mut matched = String::new();
let mut rest = s.clone();
// Parse up to range.start
for _ in 0..range.start {
let (m, r) = self.subparsers[0].parse(rest)?;
matched += &m;
rest = r;
}
// Parse optionally up to range.end
for _ in 0..(range.end - range.start) {
let parse_result = self.subparsers[0].parse(rest);
if let Err(r) = parse_result {
rest = r;
break;
} else {
let (m, r) = parse_result.unwrap();
matched += &m;
rest = r;
}
}
Ok((matched, rest))
}
Bind(var) => {
let (matched, rest) = self.subparsers[0].parse(s)?;
**var = matched.clone();
Ok((matched, rest))
}
}
}
// Static
pub fn literal<T: Into<String>>(s: T) -> Parser<'a> {
Parser {
kind: ParserKind::Literal(s.into()),
subparsers: vec![],
// bind: None,
}
}
pub fn regex<T: Into<String>>(s: T) -> Parser<'a> {
Parser {
kind: ParserKind::Regex(Regex::new(&s.into()).expect("could not compile regex")),
subparsers: vec![],
// bind: None,
}
}
// Instance
pub fn and(self, r: Parser<'a>) -> Parser<'a> {
Parser {
kind: ParserKind::And,
subparsers: vec![self, r],
// bind: None,
}
}
pub fn or(self, r: Parser<'a>) -> Parser<'a> {
Parser {
kind: ParserKind::Or,
subparsers: vec![self, r],
// bind: None,
}
}
pub fn repeat(self, num_repeats: usize) -> Parser<'a> {
Parser {
kind: ParserKind::Repeat(num_repeats),
subparsers: vec![self],
// bind: None,
}
}
pub fn repeat_range(self, num_repeats: Range<usize>) -> Parser<'a> {
Parser {
kind: ParserKind::RepeatRange(num_repeats),
subparsers: vec![self],
// bind: None,
}
}
pub fn optional(self) -> Parser<'a> {
Parser {
kind: ParserKind::RepeatRange(0..1),
subparsers: vec![self],
// bind: None,
}
}
pub fn bind(self, s: &'a mut String) -> Parser<'a> {
Parser {
kind: ParserKind::Bind(s),
subparsers: vec![self],
}
}
// Other
pub fn pretty_print(&self, f: &mut std::fmt::Formatter<'_>, indent: usize) -> std::fmt::Result {
for _ in 0..indent {
write!(f, " ");
}
write!(f, "{}", self.kind)?;
if self.subparsers.len() > 0 {
write!(f, " [\n")?;
for subparser in &self.subparsers {
subparser.pretty_print(f, indent + 2)?;
write!(f, ",\n")?;
}
for _ in 0..indent {
write!(f, " ")?;
}
write!(f, "]")
} else {
write!(f, "")
}
}
}
// use combinators::*;
// pub mod combinators {
// pub struct Parser<'a> {
// source: &'a str,
// subparsers: Vec<Parser<'a>>,
// pub parse: Box<Fn(&'a str) -> Result<(&'a str, &'a str), &'a str>>,
// }
// impl Parser {
// // pub type S = Into<String>;
// pub fn literal<'a>(literal: &'a str) -> Parser {
// Parser {
// source: literal.into(),
// subparsers: vec![],
// parse: Box::new(|s: &'a str| -> Result<(&'a str, &'a str), &'a str> {
// if src.len() >= literal.len() {
// if src[..literal.len()] == literal[..] {
// return Ok((&src[..literal.len()], &src[literal.len()..]));
// }
// }
// Err(&src[..])
// })
// }
// }
// }
// pub fn literal<'a, T>(literal: &'a str) -> T where T: Fn(&str) -> Result<(&str, &str), &str> + 'a {
// move |src: &str| -> Result<(&str, &str), &str> {
// if src.len() >= literal.len() {
// if src[..literal.len()] == literal[..] {
// return Ok((&src[..literal.len()], &src[literal.len()..]));
// }
// }
// Err(&src[..])
// }
// }
// pub fn and<'a>(left: T, right: T) -> T where T: Fn(&str) -> Result<(&str, &str), &str> + 'a {
//
// }
// }
// // Whitespace and comments.
// let whitespace = Parser.regex(/[ \n\r\t]+/y);
// let comments = Parser.regex(/[/][/].*/y).or(
// Parser.regex(/[/][*].*[*][/]/sy)
// );
// let ignored = Parser.zeroOrMore(whitespace.or(comments));
// // Tokens
// let token = (pattern: RegExp) => Parser.regex(pattern).bind((value: any) => ignored.and(Parser.constant(value)));
// let FUNCTION = token(/function\b/y);
// let IF = token(/if\b/y);
// let ELSE = token(/else\b/y);
// let RETURN = token(/return\b/y);
// let ASSIGN = token(/=/y).map(_ => Assign);
// let VAR = token(/var\b/y);
// let WHILE = token(/while\b/y);
// let COMMA = token(/[,]/y);
// let SEMICOLON = token(/[;]/y);
// let LEFT_PAREN = token(/[(]/y);
// let RIGHT_PAREN = token(/[)]/y);
// let LEFT_BRACE = token(/[{]/y);
// let RIGHT_BRACE = token(/[}]/y);
// let NUMBER = token(/[0-9]/y).map((digits: any) => new Num(parseInt(digits)));
// let ID = token(/[a-zA-Z_][a-zA-Z0-9_]*/y).map((x: any) => new Id(x));
// let NOT = token(/!/y).map(_ => Not);
// let EQUAL = token(/==/y).map(_ => Equal);
// let NOT_EQUAL = token(/!=/y).map(_ => NotEqual);
// let PLUS = token(/[+]/y).map(_ => Add);
// let MINUS = token(/[-]/y).map(_ => Subtract);
// let STAR = token(/[*]/y).map(_ => Multiply);
// let SLASH = token(/[/]/y).map(_ => Divide);
// // Expression parser
// let expression: Parser<AST> = Parser.error('expression parser used before definition');
// // Call parser
// let args: Parser<Array<AST>> = expression.bind((arg: any) => Parser.zeroOrMore(COMMA.and(expression)).bind((args: any) => Parser.constant([arg, ...args]))).or(Parser.constant([]));
// let functionCall: Parser<AST> = ID.bind((callee: any) => LEFT_PAREN.and(args.bind((args: any) => RIGHT_PAREN.and(Parser.constant(callee.equals(new Id('assert')) ? new Assert(args[0]) : new FunctionCall(callee, args))))));
// // Atom
// let atom: Parser<AST> = functionCall.or(ID).or(NUMBER).or(LEFT_PAREN.and(expression).bind((e: any) => RIGHT_PAREN.and(Parser.constant(e))));
// // Unary operators
// let unary: Parser<AST> = Parser.optional(NOT).bind((not: any) => atom.map((operand: any) => not ? new Not(operand) : operand));
// // Infix operators
// let infix = (operatorParser: any, operandParser: any) =>
// operandParser.bind((operand: any) =>
// Parser.zeroOrMore(
// operatorParser.bind((operator: any) =>
// operandParser.bind((operand: any) =>
// Parser.constant({ operator, operand })
// )
// )
// ).map((operatorTerms: any) =>
// operatorTerms.reduce((left: any, { operator, operand }: { operator: any, operand: any }) =>
// new operator(left, operand), operand)
// )
// );
// let product = infix(STAR.or(SLASH), unary);
// let sum = infix(PLUS.or(MINUS), product);
// let comparison = infix(EQUAL.or(NOT_EQUAL), sum);
// // Associativity
// // Closing the loop: expression
// expression.parse = comparison.parse;
// // Statement
// let statement: Parser<AST> = Parser.error('statement parser used before definition');
// let returnStatement: Parser<AST> = RETURN.and(expression).bind((operand: any) => SEMICOLON.and(Parser.constant(new Return(operand))));
// let expressionStatement: Parser<AST> = expression.bind((operand: any) => SEMICOLON.and(Parser.constant(operand)));
// let ifStatement: Parser<AST> = IF.and(LEFT_PAREN).and(expression).bind((conditional: any) =>
// RIGHT_PAREN.and(statement).bind((consequence: any) =>
// ELSE.and(statement).bind((alternative: any) =>
// Parser.constant(new If(conditional, consequence, alternative))
// )
// )
// );
// let whileStatement: Parser<AST> = WHILE.and(LEFT_PAREN).and(expression).bind((conditional: any) =>
// RIGHT_PAREN.and(statement).bind((body: any) =>
// Parser.constant(new While(conditional, body))
// )
// );
// let varStatement: Parser<AST> = VAR.and(ID).bind((name: any) =>
// ASSIGN.and(expression).bind((value: any) =>
// SEMICOLON.and(Parser.constant(new Var(name, value)))
// )
// );
// let assignmentStatement: Parser<AST> = ID.bind((name: any) =>
// ASSIGN.and(expression).bind((value: any) =>
// SEMICOLON.and(Parser.constant(new Assign(name, value)))
// )
// );
// let blockStatement: Parser<AST> = LEFT_BRACE.and(Parser.zeroOrMore(statement)).bind((statements: any) =>
// RIGHT_BRACE.and(Parser.constant(new Block(statements)))
// );
// let parameters: Parser<Array<string>> = ID.bind((param: any) =>
// Parser.zeroOrMore(COMMA.and(ID)).bind((params: any) =>
// Parser.constant([param, ...params])
// )
// ).or(Parser.constant([]));
// let functionStatement: Parser<AST> = FUNCTION.and(ID).bind((name: any) =>
// LEFT_PAREN.and(parameters).bind((parameters: any) =>
// RIGHT_PAREN.and(blockStatement).bind((block: any) =>
// Parser.constant(name.equals(new Id('main')) ? new Main(block.statements) : new FunctionDefinition(name, parameters, block))
// )
// )
// );
// let statementParser: Parser<AST> =
// returnStatement
// .or(functionStatement)
// .or(ifStatement)
// .or(whileStatement)
// .or(varStatement)
// .or(assignmentStatement)
// .or(blockStatement)
// .or(expressionStatement);
// statement.parse = statementParser.parse;
// let parser: Parser<AST> = ignored.and(Parser.zeroOrMore(statement)).map((statements: any) => new Block(statements));
//
// return parser.parseStringToCompletion(source);

267
src/parse/combinators.rs Normal file
View File

@ -0,0 +1,267 @@
use regex::Regex;
use ron::to_string;
use std::ops::Range;
use std::rc::Rc;
pub enum ParserKind {
Literal(String),
Regex(Regex),
Constant(String),
And,
Ignore(bool),
Or,
Repeat(usize),
RepeatRange(Range<usize>),
Error(String),
Map(Rc<Box<dyn Fn(String) -> Result<String, ron::Error>>>),
}
impl std::fmt::Debug for ParserKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self)
}
}
impl std::fmt::Display for ParserKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use ParserKind::*;
match self {
Literal(s) => write!(f, "Literal \"{}\"", s),
Regex(r) => write!(f, "Regex /{}/", r.as_str()),
Constant(c) => write!(f, "Constant \"{}\"", c),
And => write!(f, "And"),
Ignore(b) => write!(f, "Ignore{}", if *b { "Before" } else { "After" }),
Or => write!(f, "Or"),
Repeat(num) => write!(f, "Repeat {}", num),
RepeatRange(range) => write!(f, "RepeatRange {:?}", range),
Error(msg) => write!(f, "Error \"{}\"", msg),
Map(_) => write!(f, "Map"),
}
}
}
impl Clone for ParserKind {
fn clone(&self) -> Self {
use ParserKind::*;
match self {
Literal(s) => Literal(s.clone()),
Regex(r) => Regex(r.clone()),
Constant(c) => Constant(c.clone()),
And => And,
Ignore(b) => Ignore(*b),
Or => Or,
Repeat(num) => Repeat(num.clone()),
RepeatRange(range) => RepeatRange(range.clone()),
Error(msg) => Error(msg.clone()),
Map(cfn) => Map(Rc::clone(cfn)),
}
}
}
#[derive(Debug, Clone)]
pub struct Parser {
kind: ParserKind,
subparsers: Vec<Parser>,
}
impl std::fmt::Display for Parser {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.pretty_print(f, 0)
}
}
impl Parser {
pub fn parse<T: Into<String>>(&self, src: T) -> Result<(String, String), String> {
use ParserKind::*;
let s: String = src.into();
match &self.kind {
Literal(literal) => {
if s.len() >= literal.len() && s[..literal.len()] == literal[..] {
Ok((s[..literal.len()].to_owned(), s[literal.len()..].to_owned()))
} else {
Err(s)
}
}
Regex(re) => {
if let Some(mat) = re.find(&s) {
if mat.start() == 0 {
Ok((
s[mat.start()..mat.end()].to_owned(),
s[mat.end()..].to_owned(),
))
} else {
Err(s)
}
} else {
Err(s)
}
}
Constant(constant) => Ok((constant.clone(), s)),
And => {
let (lmatched, lrest) = self.subparsers[0].parse(s)?;
let (rmatched, rrest) = self.subparsers[1].parse(lrest)?;
Ok((
to_string(&vec![lmatched.clone(), rmatched.clone()]).unwrap(),
rrest,
))
}
Ignore(before) => {
if *before {
let (_, rest) = self.subparsers[0].parse(s)?;
self.subparsers[1].parse(rest)
} else {
let (matched, rest) = self.subparsers[0].parse(s)?;
let (_, rest) = self.subparsers[1].parse(rest)?;
Ok((matched, rest))
}
}
Or => {
if let Ok(lresult) = self.subparsers[0].parse(s.clone()) {
Ok(lresult)
} else {
self.subparsers[1].parse(s.clone())
}
}
Repeat(num_repeats) => {
let mut matched = vec![];
let mut rest = s.clone();
for _ in 0..*num_repeats {
let (m, r) = self.subparsers[0].parse(rest)?;
matched.push(m);
rest = r;
}
Ok((to_string(&matched).unwrap(), rest))
}
RepeatRange(range) => {
let mut matched = vec![];
let mut rest = s.clone();
// Parse up to range.start
for _ in 0..range.start {
let (m, r) = self.subparsers[0].parse(rest)?;
matched.push(m);
rest = r;
}
// Parse optionally up to range.end
for _ in 0..(range.end - range.start) {
let parse_result = self.subparsers[0].parse(rest);
if let Err(r) = parse_result {
rest = r;
break;
} else {
let (m, r) = parse_result.unwrap();
matched.push(m);
rest = r;
}
}
Ok((to_string(&matched).unwrap(), rest))
}
Error(msg) => panic!(msg.clone()),
Map(cfn) => {
let (matched, rest) = self.subparsers[0].parse(s)?;
if let Ok(m) = cfn(matched) {
Ok((m, rest))
} else {
Err(rest)
}
}
}
}
// Static
pub fn literal<T: Into<String>>(s: T) -> Parser {
Parser {
kind: ParserKind::Literal(s.into()),
subparsers: vec![],
}
}
pub fn regex<T: Into<String>>(s: T) -> Parser {
Parser {
kind: ParserKind::Regex(Regex::new(&s.into()).expect("could not compile regex")),
subparsers: vec![],
}
}
pub fn constant<T: Into<String>>(s: T) -> Parser {
Parser {
kind: ParserKind::Constant(s.into()),
subparsers: vec![],
}
}
pub fn error<T: Into<String>>(s: T) -> Parser {
Parser {
kind: ParserKind::Error(s.into()),
subparsers: vec![],
}
}
// Instance
pub fn and(self, r: Parser) -> Parser {
Parser {
kind: ParserKind::And,
subparsers: vec![self, r],
}
}
pub fn ignore_before(self, r: Parser) -> Parser {
Parser {
kind: ParserKind::Ignore(true),
subparsers: vec![self, r],
}
}
pub fn ignore_after(self, r: Parser) -> Parser {
Parser {
kind: ParserKind::Ignore(false),
subparsers: vec![self, r],
}
}
pub fn or(self, r: Parser) -> Parser {
Parser {
kind: ParserKind::Or,
subparsers: vec![self, r],
}
}
pub fn repeat(self, num_repeats: usize) -> Parser {
Parser {
kind: ParserKind::Repeat(num_repeats),
subparsers: vec![self],
}
}
pub fn repeat_range(self, num_repeats: Range<usize>) -> Parser {
Parser {
kind: ParserKind::RepeatRange(num_repeats),
subparsers: vec![self],
}
}
pub fn optional(self) -> Parser {
Parser {
kind: ParserKind::RepeatRange(0..1),
subparsers: vec![self],
}
}
pub fn map<F: 'static>(self, cfn: F) -> Parser
where
F: Fn(String) -> Result<String, ron::Error>,
{
Parser {
kind: ParserKind::Map(Rc::new(Box::new(cfn))),
subparsers: vec![self],
}
}
// Other
pub fn pretty_print(&self, f: &mut std::fmt::Formatter<'_>, indent: usize) -> std::fmt::Result {
for _ in 0..indent {
write!(f, " ")?;
}
write!(f, "{}", self.kind)?;
if self.subparsers.len() > 0 {
write!(f, " [\n")?;
for subparser in &self.subparsers {
subparser.pretty_print(f, indent + 2)?;
write!(f, ",\n")?;
}
for _ in 0..indent {
write!(f, " ")?;
}
write!(f, "]")
} else {
write!(f, "")
}
}
}

157
src/parse/mod.rs Normal file
View File

@ -0,0 +1,157 @@
mod combinators;
use crate::ast::AstNode;
use combinators::Parser;
use ron::{from_str, to_string};
pub fn parse<T: Into<String>>(src: T) -> AstNode {
let src: String = src.into();
let whitespace = Parser::regex(r"[ \n\r\t]+");
let comments = Parser::regex(r"[/][/].*").or(Parser::regex(r"[/][*].*[*][/]"));
let ignored = whitespace.or(comments).repeat_range(0..usize::MAX);
// Token parser constructor.
let i = ignored.clone();
let token = move |pattern: &str| i.clone().ignore_before(Parser::regex(pattern));
// Token helper parsers.
let FUNCTION = Parser::regex(r"function\b").or(ignored.clone());
let IF = token(r"if\b");
let ELSE = token(r"else\b");
let RETURN = token(r"return\b");
let VAR = token(r"var\b");
let WHILE = token(r"while\b");
let COMMA = token(r"[,]");
let SEMICOLON = token(r"[;]");
let LEFT_PAREN = token(r"[(]");
let RIGHT_PAREN = token(r"[)]");
let LEFT_BRACE = token(r"[{]");
let RIGHT_BRACE = token(r"[}]");
let NUMBER = token(r"[0-9]+").map(|matched| {
Ok(to_string(&AstNode::integer(
matched.parse::<i64>().unwrap(),
))?)
});
let IDENTIFIER = token(r"[a-zA-Z_][a-zA-Z0-9_]*")
.map(|matched| Ok(to_string(&AstNode::identifier(matched))?));
let NOT = token(r"!").map(|matched| Ok(to_string(&AstNode::not(AstNode::null()))?));
let ASSIGN =
token(r"=").map(|matched| Ok(to_string(&AstNode::assign("".into(), AstNode::null()))?));
let EQUAL = token(r"==").map(|matched| {
Ok(to_string(&AstNode::equal(
AstNode::null(),
AstNode::null(),
))?)
});
let NOT_EQUAL = token(r"!=").map(|matched| {
Ok(to_string(&AstNode::not_equal(
AstNode::null(),
AstNode::null(),
))?)
});
let PLUS = token(r"[+]")
.map(|matched| Ok(to_string(&AstNode::add(AstNode::null(), AstNode::null()))?));
let MINUS = token(r"[-]").map(|matched| {
Ok(to_string(&AstNode::subtract(
AstNode::null(),
AstNode::null(),
))?)
});
let STAR = token(r"[*]").map(|matched| {
Ok(to_string(&AstNode::multiply(
AstNode::null(),
AstNode::null(),
))?)
});
let SLASH = token(r"[/]").map(|matched| {
Ok(to_string(&AstNode::divide(
AstNode::null(),
AstNode::null(),
))?)
});
// Expression parser.
let mut expression = Parser::constant("").map(|matched| Ok(to_string(&AstNode::null())?));
// Call parser.
let args = expression
.clone()
.and(
COMMA
.ignore_before(expression.clone())
.repeat_range(0..usize::MAX),
)
.map(|matched| {
println!("{}", matched);
let mut args = vec![];
let data = from_str::<Vec<String>>(&matched)?;
args.push(data[0].clone());
let others = from_str::<Vec<String>>(&data[1])?;
for o in others {
args.push(o.clone());
}
Ok(to_string(&args)?)
});
let call = IDENTIFIER
.clone()
.ignore_after(LEFT_PAREN.clone())
.and(args.clone())
.ignore_after(RIGHT_PAREN.clone())
.map(|matched| {
let data = from_str::<Vec<String>>(&matched)?;
let callee = data[0].clone();
let args = from_str::<Vec<String>>(&data[1])?;
for arg in args {
println!("{}", arg);
}
Ok(to_string(
&AstNode::function_call(callee, vec![]), // TODO: recursively make into AstNodes
)?)
});
// Atom parser.
let atom = call
.clone()
.or(IDENTIFIER.clone())
.or(NUMBER.clone())
.or(LEFT_PAREN
.clone()
.ignore_before(expression.clone())
.ignore_after(RIGHT_PAREN.clone()));
// Unary operator parsers.
let unary = NOT.clone().optional().and(atom.clone()).map(|matched| {
let data = from_str::<Vec<String>>(&matched)?;
let atom_data = from_str::<AstNode>(&data[1])?;
Ok(to_string(&match &data[0][..] {
"!" => AstNode::not(atom_data),
_ => atom_data,
})?)
});
// Infix operator parsers.
let infix = |operator_parser: Parser, term_parser: Parser| {
term_parser
.clone()
.and(
operator_parser
.and(term_parser.clone())
.repeat_range(0..usize::MAX),
)
.map(|matched| {
let data = from_str::<Vec<String>>(&matched)?;
let others = from_str::<Vec<String>>(&data[1])?;
let mut current = from_str::<AstNode>(&data[0])?;
for i in 0..others.len() {
let o = from_str::<Vec<String>>(&others[i])?; // Parse the [operator, unary]
let mut op = from_str::<AstNode>(&o[0])?; // Pull the operator out.
let t = from_str::<AstNode>(&o[1])?; // Pull the term out.
op.subnodes[0] = current; // Put current on the left side.
op.subnodes[1] = t; // Put the term on the right side.
current = op; // Replace current with the operator.
}
Ok(to_string(&current)?)
})
};
let product = infix(STAR.clone().or(SLASH.clone()), unary.clone());
let sum = infix(PLUS.clone().or(MINUS.clone()), product.clone());
let comparison = infix(EQUAL.clone().or(NOT_EQUAL.clone()), sum.clone());
// Close the recursive definition.
// OR NOT! IT WORKS IN JS BECAUSE IT GETS HOISTED
expression = comparison.clone();
from_str::<AstNode>(&expression.parse(src).unwrap().0).unwrap()
// AstNode::block(vec![])
}