From fcbb9d9bf4d8066fa0537b50748b0d1b05cf2d78 Mon Sep 17 00:00:00 2001 From: Nicolas Polomack Date: Tue, 7 Jul 2020 13:27:10 +0200 Subject: [PATCH] Brought parsers to use a common abstraction Improved `Parser` trait definition --- .gitignore | 1 + Cargo.lock | 6 + Cargo.toml | 3 +- som-interpreter/src/main.rs | 9 +- som-interpreter/src/shell.rs | 7 +- som-parser-core/Cargo.toml | 10 + som-parser-core/README.md | 4 + .../src/combinators.rs | 127 ++++++------ .../parser.rs => som-parser-core/src/lib.rs | 66 +++--- som-parser-symbols/Cargo.toml | 1 + som-parser-symbols/src/lang.rs | 79 ++++---- som-parser-symbols/src/lib.rs | 18 +- som-parser-symbols/src/parser.rs | 161 --------------- som-parser-symbols/tests/tests.rs | 20 +- som-parser-text/Cargo.toml | 1 + som-parser-text/src/combinators.rs | 191 ------------------ som-parser-text/src/lang.rs | 98 +++++---- som-parser-text/src/lib.rs | 18 +- som-parser-text/tests/tests.rs | 20 +- 19 files changed, 261 insertions(+), 579 deletions(-) create mode 100644 som-parser-core/Cargo.toml create mode 100644 som-parser-core/README.md rename {som-parser-symbols => som-parser-core}/src/combinators.rs (57%) rename som-parser-text/src/parser.rs => som-parser-core/src/lib.rs (65%) delete mode 100644 som-parser-symbols/src/parser.rs delete mode 100644 som-parser-text/src/combinators.rs diff --git a/.gitignore b/.gitignore index ab347ef6..31a2bafb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target .vscode /*.som +.DS_Store diff --git a/Cargo.lock b/Cargo.lock index f5ae9b41..4b36cd25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -228,12 +228,17 @@ dependencies = [ name = "som-lexer" version = "0.1.0" +[[package]] +name = "som-parser-core" +version = "0.1.0" + [[package]] name = "som-parser-symbols" version = "0.1.0" dependencies = [ "som-core 0.1.0", "som-lexer 0.1.0", + "som-parser-core 0.1.0", ] [[package]] @@ -241,6 +246,7 @@ name = "som-parser-text" version = "0.1.0" dependencies = [ "som-core 0.1.0", + "som-parser-core 0.1.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 03fccdb2..026defbf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ members = [ "som-core", "som-interpreter", "som-lexer", - "som-parser-text", + "som-parser-core", "som-parser-symbols", + "som-parser-text", ] diff --git a/som-interpreter/src/main.rs b/som-interpreter/src/main.rs index 6164f7b5..e47801d2 100644 --- a/som-interpreter/src/main.rs +++ b/som-interpreter/src/main.rs @@ -29,9 +29,6 @@ struct Options { #[structopt(short, long)] classpath: Vec, - // /// enable disassembling - // #[structopt(short = "d")] - // disassembling: bool, /// Enable verbose output (with timing information). #[structopt(short = "v")] verbose: bool, @@ -71,11 +68,11 @@ fn main() -> anyhow::Result<()> { }); // let class = universe.load_class_from_path(file)?; - // let instance = som_interpreter::instance::Instance::from_class(class); - // let instance = Value::Instance(Rc::new(std::cell::RefCell::new(instance))); + // let instance = Instance::from_class(class); + // let instance = Value::Instance(Rc::new(RefCell::new(instance))); // let invokable = instance.lookup_method(&universe, "run").unwrap(); - // let output = som_interpreter::invokable::Invoke::invoke(invokable.as_ref(), &mut universe, vec![instance]); + // let output = invokable.invoke(&mut universe, vec![instance]); match output { Return::Exception(message) => println!("ERROR: {}", message), diff --git a/som-interpreter/src/shell.rs b/som-interpreter/src/shell.rs index 46399285..c5af0691 100644 --- a/som-interpreter/src/shell.rs +++ b/som-interpreter/src/shell.rs @@ -6,7 +6,6 @@ use anyhow::Error; use som_lexer::{Lexer, Token}; use som_parser::lang; -use som_parser::Parser; use som_interpreter::evaluate::Evaluate; use som_interpreter::frame::FrameKind; @@ -57,9 +56,9 @@ pub fn interactive(universe: &mut Universe, verbose: bool) -> Result<(), Error> } let start = Instant::now(); - let expr = match lang::expression().parse(tokens.as_slice()) { - Some((expr, rest)) if rest.is_empty() => expr, - Some(_) | None => { + let expr = match som_parser::apply(lang::expression(), tokens.as_slice()) { + Some(expr) => expr, + None => { println!("ERROR: could not fully parse the given expression"); continue; } diff --git a/som-parser-core/Cargo.toml b/som-parser-core/Cargo.toml new file mode 100644 index 00000000..22834025 --- /dev/null +++ b/som-parser-core/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "som-parser-core" +version = "0.1.0" +description = "The parsing core library for the Simple Object Machine" +authors = ["Nicolas Polomack "] +edition = "2018" +publish = false +license = "MIT OR Apache-2.0" + +[dependencies] diff --git a/som-parser-core/README.md b/som-parser-core/README.md new file mode 100644 index 00000000..c157f4ec --- /dev/null +++ b/som-parser-core/README.md @@ -0,0 +1,4 @@ +The SOM Parser Core Library +=========================== + +This crate serves as the collection of parser primitives shared between the various SOM parsers. diff --git a/som-parser-symbols/src/combinators.rs b/som-parser-core/src/combinators.rs similarity index 57% rename from som-parser-symbols/src/combinators.rs rename to som-parser-core/src/combinators.rs index d5056baa..926d9698 100644 --- a/som-parser-symbols/src/combinators.rs +++ b/som-parser-core/src/combinators.rs @@ -1,6 +1,4 @@ -use som_lexer::Token; - -use crate::parser::Parser; +use crate::Parser; /// Represents a value of either type A (Left) or type B (Right). #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -10,28 +8,28 @@ pub enum Either { } /// Transforms a parser into a non-consuming one, allowing to parse ahead without consuming anything. -pub fn peek<'a, A>(parser: impl Parser<'a, A>) -> impl Parser<'a, A> { - move |input: &'a [Token]| { - let (value, _) = parser.parse(input)?; +pub fn peek(mut parser: impl Parser) -> impl Parser { + move |input: I| { + let (value, _) = parser.parse(input.clone())?; Some((value, input)) } } /// Runs the given parser, fails if it succeeded, and succeeds otherwise. -pub fn not<'a, A>(parser: impl Parser<'a, A>) -> impl Parser<'a, ()> { - move |input: &'a [Token]| match parser.parse(input) { +pub fn not(mut parser: impl Parser) -> impl Parser<(), I> { + move |input: I| match parser.parse(input.clone()) { Some(_) => None, None => Some(((), input)), } } /// Sequences two parsers, one after the other, collecting both results. -pub fn sequence<'a, A, B>( - fst: impl Parser<'a, A>, - snd: impl Parser<'a, B>, -) -> impl Parser<'a, (A, B)> { +pub fn sequence( + mut fst: impl Parser, + mut snd: impl Parser, +) -> impl Parser<(A, B), I> { // equivalent to: `fst.and(snd)` - move |input: &'a [Token]| { + move |input: I| { let (a, input) = fst.parse(input)?; let (b, input) = snd.parse(input)?; Some(((a, b), input)) @@ -39,17 +37,20 @@ pub fn sequence<'a, A, B>( } /// Tries to apply the first parser, if it fails, it tries to apply the second parser. -pub fn alternative<'a, A>(fst: impl Parser<'a, A>, snd: impl Parser<'a, A>) -> impl Parser<'a, A> { - move |input: &'a [Token]| fst.parse(input).or_else(|| snd.parse(input)) +pub fn alternative( + mut fst: impl Parser, + mut snd: impl Parser, +) -> impl Parser { + move |input: I| fst.parse(input.clone()).or_else(|| snd.parse(input)) } /// Same as `either`, but allows for different output types for the parsers. -pub fn either<'a, A, B>( - fst: impl Parser<'a, A>, - snd: impl Parser<'a, B>, -) -> impl Parser<'a, Either> { - move |input: &'a [Token]| { - if let Some((a, input)) = fst.parse(input) { +pub fn either( + mut fst: impl Parser, + mut snd: impl Parser, +) -> impl Parser, I> { + move |input: I| { + if let Some((a, input)) = fst.parse(input.clone()) { Some((Either::Left(a), input)) } else if let Some((b, input)) = snd.parse(input) { Some((Either::Right(b), input)) @@ -60,27 +61,35 @@ pub fn either<'a, A, B>( } /// Tries to apply a parser, or fallback to a constant value (making it an always-succeeding parser). -pub fn fallback<'a, A: Clone>(def: A, parser: impl Parser<'a, A>) -> impl Parser<'a, A> { - move |input: &'a [Token]| parser.parse(input).or_else(|| Some((def.clone(), input))) +pub fn fallback(def: A, mut parser: impl Parser) -> impl Parser { + move |input: I| { + parser + .parse(input.clone()) + .or_else(|| Some((def.clone(), input))) + } } /// Tries to apply a parser, or fallback to its default value (making it an always-succeeding parser). -pub fn default<'a, A: Default>(parser: impl Parser<'a, A>) -> impl Parser<'a, A> { +pub fn default(parser: impl Parser) -> impl Parser { optional(parser).map(Option::unwrap_or_default) } /// Tries every parser in a slice, from left to right, and returns the output of the first succeeding one. -pub fn any<'a: 'b, 'b, A>(parsers: &'b [impl Parser<'a, A>]) -> impl Parser<'a, A> + 'b { - move |input: &'a [Token]| parsers.iter().find_map(|parser| parser.parse(input)) +pub fn any<'a, A, I: Clone>(parsers: &'a mut [impl Parser]) -> impl Parser + 'a { + move |input: I| { + parsers + .iter_mut() + .find_map(|parser| parser.parse(input.clone())) + } } /// Applies every parser in a slice, from left to right, and returns the output from all of them. /// If one parser fails, the whole sequence is considered failed. -pub fn all<'a: 'b, 'b, A>(parsers: &'b [impl Parser<'a, A>]) -> impl Parser<'a, Vec> + 'b { - move |input: &'a [Token]| { +pub fn all<'a, A, I>(parsers: &'a mut [impl Parser]) -> impl Parser, I> + 'a { + move |input: I| { let output = Vec::::with_capacity(parsers.len()); parsers - .iter() + .iter_mut() .try_fold((output, input), |(mut output, input), parser| { let (value, input) = parser.parse(input)?; output.push(value); @@ -90,9 +99,9 @@ pub fn all<'a: 'b, 'b, A>(parsers: &'b [impl Parser<'a, A>]) -> impl Parser<'a, } /// Tries to apply a parser, but fails gracefully (with an `Option` output). -pub fn optional<'a, A>(parser: impl Parser<'a, A>) -> impl Parser<'a, Option> { - move |input: &'a [Token]| { - if let Some((value, input)) = parser.parse(input) { +pub fn optional(mut parser: impl Parser) -> impl Parser, I> { + move |input: I| { + if let Some((value, input)) = parser.parse(input.clone()) { Some((Some(value), input)) } else { Some((None, input)) @@ -101,10 +110,10 @@ pub fn optional<'a, A>(parser: impl Parser<'a, A>) -> impl Parser<'a, Option> } /// Applies a parser zero or more times. -pub fn many<'a, A>(parser: impl Parser<'a, A>) -> impl Parser<'a, Vec> { - move |mut input: &'a [Token]| { +pub fn many(mut parser: impl Parser) -> impl Parser, I> { + move |mut input: I| { let mut output = Vec::::new(); - while let Some((value, next)) = parser.parse(input) { + while let Some((value, next)) = parser.parse(input.clone()) { input = next; output.push(value); } @@ -113,11 +122,11 @@ pub fn many<'a, A>(parser: impl Parser<'a, A>) -> impl Parser<'a, Vec> { } /// Applies a parser one or more times. -pub fn some<'a, A>(parser: impl Parser<'a, A>) -> impl Parser<'a, Vec> { - move |input: &'a [Token]| { +pub fn some(mut parser: impl Parser) -> impl Parser, I> { + move |input: I| { let (value, mut input) = parser.parse(input)?; let mut output = vec![value]; - while let Some((value, next)) = parser.parse(input) { + while let Some((value, next)) = parser.parse(input.clone()) { input = next; output.push(value); } @@ -126,12 +135,12 @@ pub fn some<'a, A>(parser: impl Parser<'a, A>) -> impl Parser<'a, Vec> { } /// Parses something that is enclosed between two other things. -pub fn between<'a, A, B, C>( - before: impl Parser<'a, A>, - within: impl Parser<'a, B>, - after: impl Parser<'a, C>, -) -> impl Parser<'a, B> { - move |input: &'a [Token]| { +pub fn between( + mut before: impl Parser, + mut within: impl Parser, + mut after: impl Parser, +) -> impl Parser { + move |input: I| { let (_, input) = before.parse(input)?; let (value, input) = within.parse(input)?; let (_, input) = after.parse(input)?; @@ -140,16 +149,16 @@ pub fn between<'a, A, B, C>( } /// Parses zero or more things, separated by an arbitrary delimiter. -pub fn sep_by<'a, A, B>( - delim: impl Parser<'a, A>, - within: impl Parser<'a, B>, -) -> impl Parser<'a, Vec> { - move |input: &'a [Token]| { +pub fn sep_by( + mut delim: impl Parser, + mut within: impl Parser, +) -> impl Parser, I> { + move |input: I| { let mut output = Vec::::new(); - if let Some((value, mut input)) = within.parse(input) { + if let Some((value, mut input)) = within.parse(input.clone()) { output.push(value); while let Some((value, next)) = delim - .parse(input) + .parse(input.clone()) .and_then(|(_, input)| within.parse(input)) { input = next; @@ -163,16 +172,16 @@ pub fn sep_by<'a, A, B>( } /// Parses one or more things, separated by an arbitrary delimiter. -pub fn sep_by1<'a, A, B>( - delim: impl Parser<'a, A>, - within: impl Parser<'a, B>, -) -> impl Parser<'a, Vec> { - move |input: &'a [Token]| { +pub fn sep_by1( + mut delim: impl Parser, + mut within: impl Parser, +) -> impl Parser, I> { + move |input: I| { let mut output = Vec::::new(); let (value, mut input) = within.parse(input)?; output.push(value); while let Some((value, next)) = delim - .parse(input) + .parse(input.clone()) .and_then(|(_, input)| within.parse(input)) { input = next; @@ -183,8 +192,8 @@ pub fn sep_by1<'a, A, B>( } /// Transforms the output value of a parser. -pub fn map<'a, A, B>(parser: impl Parser<'a, A>, func: impl Fn(A) -> B) -> impl Parser<'a, B> { - move |input: &'a [Token]| { +pub fn map(mut parser: impl Parser, func: impl Fn(A) -> B) -> impl Parser { + move |input: I| { let (value, input) = parser.parse(input)?; Some((func(value), input)) } diff --git a/som-parser-text/src/parser.rs b/som-parser-core/src/lib.rs similarity index 65% rename from som-parser-text/src/parser.rs rename to som-parser-core/src/lib.rs index d24b61f3..d517323f 100644 --- a/som-parser-text/src/parser.rs +++ b/som-parser-core/src/lib.rs @@ -1,17 +1,20 @@ use std::marker::PhantomData; +/// Generic parser combinators. +pub mod combinators; + /// Defines a parser. /// /// It is basically a function that takes an input and returns a parsed result along with the rest of input (which can be parsed further). -pub trait Parser<'a, T>: Sized { +pub trait Parser: Sized { /// Applies the parser on some input. /// /// It returns the parsed value and the rest of the unparsed input as `Some`, if successful. /// Failing that, it returns `None`. - fn parse(&self, input: &'a [char]) -> Option<(T, &'a [char])>; + fn parse(&mut self, input: I) -> Option<(T, I)>; /// Sequences two parsers, one after the other, collecting both results. - fn and>(self, parser: P) -> And { + fn and>(self, parser: P) -> And { And { p1: self, p2: parser, @@ -19,7 +22,7 @@ pub trait Parser<'a, T>: Sized { } /// Tries to apply the first parser, if it fails, it tries to apply the second parser. - fn or>(self, parser: P) -> Or { + fn or>(self, parser: P) -> Or { Or { p1: self, p2: parser, @@ -36,7 +39,7 @@ pub trait Parser<'a, T>: Sized { } /// Sequences two parsers, one after the other, but discards the output of the second one. - fn and_left, U>(self, parser: P) -> AndLeft { + fn and_left, U>(self, parser: P) -> AndLeft { AndLeft { p1: self, p2: parser, @@ -45,7 +48,7 @@ pub trait Parser<'a, T>: Sized { } /// Sequences two parsers, one after the other, but discards the output of the first one. - fn and_right, U>(self, parser: P) -> AndRight { + fn and_right, U>(self, parser: P) -> AndRight { AndRight { p1: self, p2: parser, @@ -60,12 +63,12 @@ pub struct And { p2: B, } -impl<'a, T1, T2, A, B> Parser<'a, (T1, T2)> for And +impl Parser<(T1, T2), I> for And where - A: Parser<'a, T1>, - B: Parser<'a, T2>, + A: Parser, + B: Parser, { - fn parse(&self, input: &'a [char]) -> Option<((T1, T2), &'a [char])> { + fn parse<'a>(&mut self, input: I) -> Option<((T1, T2), I)> { let (v1, input) = self.p1.parse(input)?; let (v2, input) = self.p2.parse(input)?; Some(((v1, v2), input)) @@ -78,13 +81,14 @@ pub struct Or { p2: B, } -impl<'a, T, A, B> Parser<'a, T> for Or +impl Parser for Or where - A: Parser<'a, T>, - B: Parser<'a, T>, + I: Clone, + A: Parser, + B: Parser, { - fn parse(&self, input: &'a [char]) -> Option<(T, &'a [char])> { - self.p1.parse(input).or_else(|| self.p2.parse(input)) + fn parse(&mut self, input: I) -> Option<(T, I)> { + self.p1.parse(input.clone()).or_else(|| self.p2.parse(input)) } } @@ -95,12 +99,12 @@ pub struct Map { _phantom: PhantomData, } -impl<'a, P, T, F, U> Parser<'a, U> for Map +impl Parser for Map where - P: Parser<'a, T>, + P: Parser, F: Fn(T) -> U, { - fn parse(&self, input: &'a [char]) -> Option<(U, &'a [char])> { + fn parse<'a>(&mut self, input: I) -> Option<(U, I)> { let (value, input) = self.parser.parse(input)?; Some(((self.func)(value), input)) } @@ -113,12 +117,12 @@ pub struct AndLeft { _phantom: PhantomData, } -impl<'a, A, B, T, U> Parser<'a, T> for AndLeft +impl Parser for AndLeft where - A: Parser<'a, T>, - B: Parser<'a, U>, + A: Parser, + B: Parser, { - fn parse(&self, input: &'a [char]) -> Option<(T, &'a [char])> { + fn parse(&mut self, input: I) -> Option<(T, I)> { let (value, input) = self.p1.parse(input)?; let (_, input) = self.p2.parse(input)?; Some((value, input)) @@ -132,12 +136,12 @@ pub struct AndRight { _phantom: PhantomData, } -impl<'a, A, B, T, U> Parser<'a, U> for AndRight +impl Parser for AndRight where - A: Parser<'a, T>, - B: Parser<'a, U>, + A: Parser, + B: Parser, { - fn parse(&self, input: &'a [char]) -> Option<(U, &'a [char])> { + fn parse(&mut self, input: I) -> Option<(U, I)> { let (_, input) = self.p1.parse(input)?; let (value, input) = self.p2.parse(input)?; Some((value, input)) @@ -146,14 +150,14 @@ where /// Because a `Parser` is basically a function of the following signature. /// ```text -/// (&[char]) -> (T, &[char]) +/// (I) -> (T, I) /// ``` -/// We can implement it for any `Fn(&[char]) -> (T, &[char])`. -impl<'a, T, F> Parser<'a, T> for F +/// We can implement it for any bare `Fn(I) -> (T, I)`. +impl Parser for F where - F: Fn(&'a [char]) -> Option<(T, &'a [char])>, + F: FnMut(I) -> Option<(T, I)>, { - fn parse(&self, input: &'a [char]) -> Option<(T, &'a [char])> { + fn parse(&mut self, input: I) -> Option<(T, I)> { (self)(input) } } diff --git a/som-parser-symbols/Cargo.toml b/som-parser-symbols/Cargo.toml index 3bfa1ac3..ea554a11 100644 --- a/som-parser-symbols/Cargo.toml +++ b/som-parser-symbols/Cargo.toml @@ -10,4 +10,5 @@ license = "MIT OR Apache-2.0" [dependencies] # internal som-core = { path = "../som-core", version = "0.1.0" } +som-parser-core = { path = "../som-parser-core", version = "0.1.0" } som-lexer = { path = "../som-lexer", version = "0.1.0" } diff --git a/som-parser-symbols/src/lang.rs b/som-parser-symbols/src/lang.rs index 3cbf0212..fa993c86 100644 --- a/som-parser-symbols/src/lang.rs +++ b/som-parser-symbols/src/lang.rs @@ -1,8 +1,7 @@ use som_core::ast::*; use som_lexer::Token; - -use crate::combinators::*; -use crate::parser::Parser; +use som_parser_core::combinators::*; +use som_parser_core::Parser; macro_rules! opaque { ($expr:expr) => {{ @@ -11,7 +10,7 @@ macro_rules! opaque { } /// A parser that expects to be nothing left in its input. -pub fn eof<'a>() -> impl Parser<'a, ()> { +pub fn eof<'a>() -> impl Parser<(), &'a [Token]> { move |input: &'a [Token]| { if input.is_empty() { Some(((), input)) @@ -21,7 +20,7 @@ pub fn eof<'a>() -> impl Parser<'a, ()> { } } -pub fn exact<'a>(ch: Token) -> impl Parser<'a, ()> { +pub fn exact<'a>(ch: Token) -> impl Parser<(), &'a [Token]> { move |input: &'a [Token]| { let (head, tail) = input.split_first()?; if *head == ch { @@ -32,7 +31,7 @@ pub fn exact<'a>(ch: Token) -> impl Parser<'a, ()> { } } -pub fn exact_ident<'a, 'b: 'a>(string: &'b str) -> impl Parser<'a, ()> { +pub fn exact_ident<'a, 'b: 'a>(string: &'b str) -> impl Parser<(), &'a [Token]> { move |input: &'a [Token]| { let (head, tail) = input.split_first()?; match head { @@ -42,7 +41,7 @@ pub fn exact_ident<'a, 'b: 'a>(string: &'b str) -> impl Parser<'a, ()> { } } -pub fn big_integer<'a>() -> impl Parser<'a, String> { +pub fn big_integer<'a>() -> impl Parser { move |input: &'a [Token]| { let (sign, input) = optional(exact(Token::Minus)).parse(input)?; let sign = if sign.is_some() { "-" } else { "" }; @@ -55,7 +54,7 @@ pub fn big_integer<'a>() -> impl Parser<'a, String> { } } -pub fn integer<'a>() -> impl Parser<'a, i64> { +pub fn integer<'a>() -> impl Parser { move |input: &'a [Token]| { let (sign, input) = optional(exact(Token::Minus)).parse(input)?; let sign = if sign.is_some() { -1 } else { 1 }; @@ -68,7 +67,7 @@ pub fn integer<'a>() -> impl Parser<'a, i64> { } } -pub fn double<'a>() -> impl Parser<'a, f64> { +pub fn double<'a>() -> impl Parser { move |input: &'a [Token]| { let (sign, input) = optional(exact(Token::Minus)).parse(input)?; let sign = if sign.is_some() { -1.0 } else { 1.0 }; @@ -81,7 +80,7 @@ pub fn double<'a>() -> impl Parser<'a, f64> { } } -pub fn single_operator<'a>() -> impl Parser<'a, &'static str> { +pub fn single_operator<'a>() -> impl Parser<&'static str, &'a [Token]> { move |input: &'a [Token]| { let (head, tail) = input.split_first()?; match head { @@ -104,7 +103,7 @@ pub fn single_operator<'a>() -> impl Parser<'a, &'static str> { } } -pub fn operator_sequence<'a>() -> impl Parser<'a, String> { +pub fn operator_sequence<'a>() -> impl Parser { move |input: &'a [Token]| { let (head, tail) = input.split_first()?; match head { @@ -114,11 +113,11 @@ pub fn operator_sequence<'a>() -> impl Parser<'a, String> { } } -pub fn operator<'a>() -> impl Parser<'a, String> { +pub fn operator<'a>() -> impl Parser { single_operator().map(String::from).or(operator_sequence()) } -pub fn identifier<'a>() -> impl Parser<'a, String> { +pub fn identifier<'a>() -> impl Parser { move |input: &'a [Token]| { let (head, tail) = input.split_first()?; match head { @@ -128,7 +127,7 @@ pub fn identifier<'a>() -> impl Parser<'a, String> { } } -pub fn string<'a>() -> impl Parser<'a, String> { +pub fn string<'a>() -> impl Parser { move |input: &'a [Token]| { let (head, tail) = input.split_first()?; match head { @@ -138,7 +137,7 @@ pub fn string<'a>() -> impl Parser<'a, String> { } } -pub fn symbol<'a>() -> impl Parser<'a, String> { +pub fn symbol<'a>() -> impl Parser { move |input: &'a [Token]| { let (head, tail) = input.split_first()?; match head { @@ -148,7 +147,7 @@ pub fn symbol<'a>() -> impl Parser<'a, String> { } } -pub fn array<'a>() -> impl Parser<'a, Vec> { +pub fn array<'a>() -> impl Parser, &'a [Token]> { move |input: &'a [Token]| { between( exact(Token::NewArray), @@ -159,7 +158,7 @@ pub fn array<'a>() -> impl Parser<'a, Vec> { } } -pub fn literal<'a>() -> impl Parser<'a, Literal> { +pub fn literal<'a>() -> impl Parser { (double().map(Literal::Double)) .or(integer().map(Literal::Integer)) .or(big_integer().map(Literal::BigInteger)) @@ -168,7 +167,7 @@ pub fn literal<'a>() -> impl Parser<'a, Literal> { .or(array().map(Literal::Array)) } -pub fn keyword<'a>() -> impl Parser<'a, String> { +pub fn keyword<'a>() -> impl Parser { move |input: &'a [Token]| { let (head, tail) = input.split_first()?; match head { @@ -178,7 +177,7 @@ pub fn keyword<'a>() -> impl Parser<'a, String> { } } -pub fn unary_send<'a>() -> impl Parser<'a, Expression> { +pub fn unary_send<'a>() -> impl Parser { opaque!(primary()) .and(many(identifier())) .map(|(receiver, signatures)| { @@ -194,7 +193,7 @@ pub fn unary_send<'a>() -> impl Parser<'a, Expression> { }) } -pub fn binary_send<'a>() -> impl Parser<'a, Expression> { +pub fn binary_send<'a>() -> impl Parser { unary_send() .and(many(operator().and(unary_send().map(Box::new)))) .map(|(lhs, operands)| { @@ -208,7 +207,7 @@ pub fn binary_send<'a>() -> impl Parser<'a, Expression> { }) } -pub fn positional_send<'a>() -> impl Parser<'a, Expression> { +pub fn positional_send<'a>() -> impl Parser { binary_send() .and(many(keyword().and(binary_send()))) .map(|(receiver, pairs)| { @@ -226,7 +225,7 @@ pub fn positional_send<'a>() -> impl Parser<'a, Expression> { }) } -pub fn body<'a>() -> impl Parser<'a, Body> { +pub fn body<'a>() -> impl Parser { sep_by(exact(Token::Period), exit().or(statement())) .and(optional(exact(Token::Period))) .map(|(exprs, stopped)| Body { @@ -235,19 +234,19 @@ pub fn body<'a>() -> impl Parser<'a, Body> { }) } -pub fn locals<'a>() -> impl Parser<'a, Vec> { +pub fn locals<'a>() -> impl Parser, &'a [Token]> { between(exact(Token::Or), many(identifier()), exact(Token::Or)) } -pub fn parameter<'a>() -> impl Parser<'a, String> { +pub fn parameter<'a>() -> impl Parser { exact(Token::Colon).and_right(identifier()) } -pub fn parameters<'a>() -> impl Parser<'a, Vec> { +pub fn parameters<'a>() -> impl Parser, &'a [Token]> { some(parameter()).and_left(exact(Token::Or)) } -pub fn block<'a>() -> impl Parser<'a, Expression> { +pub fn block<'a>() -> impl Parser { between( exact(Token::NewBlock), default(parameters()).and(default(locals())).and(body()), @@ -262,44 +261,44 @@ pub fn block<'a>() -> impl Parser<'a, Expression> { }) } -pub fn term<'a>() -> impl Parser<'a, Expression> { +pub fn term<'a>() -> impl Parser { between(exact(Token::NewTerm), body(), exact(Token::EndTerm)) .map(|body| Expression::Term(Term { body })) } -pub fn exit<'a>() -> impl Parser<'a, Expression> { +pub fn exit<'a>() -> impl Parser { exact(Token::Exit) .and_right(statement()) .map(|expr| Expression::Exit(Box::new(expr))) } -pub fn expression<'a>() -> impl Parser<'a, Expression> { +pub fn expression<'a>() -> impl Parser { positional_send() } -pub fn primary<'a>() -> impl Parser<'a, Expression> { +pub fn primary<'a>() -> impl Parser { (identifier().map(Expression::Reference)) .or(term()) .or(block()) .or(literal().map(Expression::Literal)) } -pub fn assignment<'a>() -> impl Parser<'a, Expression> { +pub fn assignment<'a>() -> impl Parser { identifier() .and_left(exact(Token::Assign)) .and(opaque!(statement())) .map(|(name, expr)| Expression::Assignment(name, Box::new(expr))) } -pub fn statement<'a>() -> impl Parser<'a, Expression> { +pub fn statement<'a>() -> impl Parser { assignment().or(expression()) } -pub fn primitive<'a>() -> impl Parser<'a, MethodBody> { +pub fn primitive<'a>() -> impl Parser { exact(Token::Primitive).map(|_| MethodBody::Primitive) } -pub fn method_body<'a>() -> impl Parser<'a, MethodBody> { +pub fn method_body<'a>() -> impl Parser { between( exact(Token::NewTerm), default(locals()).and(body()), @@ -308,7 +307,7 @@ pub fn method_body<'a>() -> impl Parser<'a, MethodBody> { .map(|(locals, body)| MethodBody::Body { locals, body }) } -pub fn unary_method_def<'a>() -> impl Parser<'a, MethodDef> { +pub fn unary_method_def<'a>() -> impl Parser { identifier() .and_left(exact(Token::Equal)) .and(primitive().or(method_body())) @@ -319,7 +318,7 @@ pub fn unary_method_def<'a>() -> impl Parser<'a, MethodDef> { }) } -pub fn positional_method_def<'a>() -> impl Parser<'a, MethodDef> { +pub fn positional_method_def<'a>() -> impl Parser { some(keyword().and(identifier())) .and_left(exact(Token::Equal)) .and(primitive().or(method_body())) @@ -334,7 +333,7 @@ pub fn positional_method_def<'a>() -> impl Parser<'a, MethodDef> { }) } -pub fn operator_method_def<'a>() -> impl Parser<'a, MethodDef> { +pub fn operator_method_def<'a>() -> impl Parser { operator() .and(identifier()) .and_left(exact(Token::Equal)) @@ -346,13 +345,13 @@ pub fn operator_method_def<'a>() -> impl Parser<'a, MethodDef> { }) } -pub fn method_def<'a>() -> impl Parser<'a, MethodDef> { +pub fn method_def<'a>() -> impl Parser { unary_method_def() .or(positional_method_def()) .or(operator_method_def()) } -pub fn class_def<'a>() -> impl Parser<'a, ClassDef> { +pub fn class_def<'a>() -> impl Parser { identifier() .and_left(exact(Token::Equal)) .and(optional(identifier())) @@ -378,6 +377,6 @@ pub fn class_def<'a>() -> impl Parser<'a, ClassDef> { }) } -pub fn file<'a>() -> impl Parser<'a, ClassDef> { +pub fn file<'a>() -> impl Parser { class_def().and_left(eof()) } diff --git a/som-parser-symbols/src/lib.rs b/som-parser-symbols/src/lib.rs index 646aec6b..f586c2f3 100644 --- a/som-parser-symbols/src/lib.rs +++ b/som-parser-symbols/src/lib.rs @@ -4,20 +4,26 @@ //! This particular version of the parser works with the tokens outputted by the lexical analyser, instead of directly reading text. //! -/// Generic parser combinators. -pub mod combinators; /// SOM-specific parser combinators. pub mod lang; -/// Parser combinator primitives. -pub mod parser; - -pub use crate::parser::Parser; use som_core::ast::ClassDef; use som_lexer::Token; +use som_parser_core::Parser; /// Parses the input of an entire file into an AST. pub fn parse_file(input: &[Token]) -> Option { let (class, _) = lang::file().parse(input)?; Some(class) } + +/// Applies a parser and returns the output value if the entirety of the input has been parsed successfully. +pub fn apply<'a, A, P>(mut parser: P, input: &'a [Token]) -> Option +where + P: Parser, +{ + match parser.parse(input) { + Some((output, tail)) if tail.is_empty() => Some(output), + Some(_) | None => None, + } +} diff --git a/som-parser-symbols/src/parser.rs b/som-parser-symbols/src/parser.rs deleted file mode 100644 index 66ebdd11..00000000 --- a/som-parser-symbols/src/parser.rs +++ /dev/null @@ -1,161 +0,0 @@ -use std::marker::PhantomData; - -use som_lexer::Token; - -/// Defines a parser. -/// -/// It is basically a function that takes an input and returns a parsed result along with the rest of input (which can be parsed further). -pub trait Parser<'a, T>: Sized { - /// Applies the parser on some input. - /// - /// It returns the parsed value and the rest of the unparsed input as `Some`, if successful. - /// Failing that, it returns `None`. - fn parse(&self, input: &'a [Token]) -> Option<(T, &'a [Token])>; - - /// Sequences two parsers, one after the other, collecting both results. - fn and>(self, parser: P) -> And { - And { - p1: self, - p2: parser, - } - } - - /// Tries to apply the first parser, if it fails, it tries to apply the second parser. - fn or>(self, parser: P) -> Or { - Or { - p1: self, - p2: parser, - } - } - - /// Maps a function over the output value of the parser. - fn map U, U>(self, func: F) -> Map { - Map { - parser: self, - func, - _phantom: PhantomData, - } - } - - /// Sequences two parsers, one after the other, but discards the output of the second one. - fn and_left, U>(self, parser: P) -> AndLeft { - AndLeft { - p1: self, - p2: parser, - _phantom: PhantomData, - } - } - - /// Sequences two parsers, one after the other, but discards the output of the first one. - fn and_right, U>(self, parser: P) -> AndRight { - AndRight { - p1: self, - p2: parser, - _phantom: PhantomData, - } - } -} - -/// Sequences two parsers, one after the other, collecting both results. -pub struct And { - p1: A, - p2: B, -} - -impl<'a, T1, T2, A, B> Parser<'a, (T1, T2)> for And -where - A: Parser<'a, T1>, - B: Parser<'a, T2>, -{ - fn parse(&self, input: &'a [Token]) -> Option<((T1, T2), &'a [Token])> { - let (v1, input) = self.p1.parse(input)?; - let (v2, input) = self.p2.parse(input)?; - Some(((v1, v2), input)) - } -} - -/// Tries to apply the first parser, if it fails, it tries to apply the second parser. -pub struct Or { - p1: A, - p2: B, -} - -impl<'a, T, A, B> Parser<'a, T> for Or -where - A: Parser<'a, T>, - B: Parser<'a, T>, -{ - fn parse(&self, input: &'a [Token]) -> Option<(T, &'a [Token])> { - self.p1.parse(input).or_else(|| self.p2.parse(input)) - } -} - -/// Maps a function over the output value of the parser. -pub struct Map { - parser: P, - func: F, - _phantom: PhantomData, -} - -impl<'a, P, T, F, U> Parser<'a, U> for Map -where - P: Parser<'a, T>, - F: Fn(T) -> U, -{ - fn parse(&self, input: &'a [Token]) -> Option<(U, &'a [Token])> { - let (value, input) = self.parser.parse(input)?; - Some(((self.func)(value), input)) - } -} - -/// Sequences two parsers, one after the other, but discards the output of the second one. -pub struct AndLeft { - p1: A, - p2: B, - _phantom: PhantomData, -} - -impl<'a, A, B, T, U> Parser<'a, T> for AndLeft -where - A: Parser<'a, T>, - B: Parser<'a, U>, -{ - fn parse(&self, input: &'a [Token]) -> Option<(T, &'a [Token])> { - let (value, input) = self.p1.parse(input)?; - let (_, input) = self.p2.parse(input)?; - Some((value, input)) - } -} - -/// Sequences two parsers, one after the other, but discards the output of the first one. -pub struct AndRight { - p1: A, - p2: B, - _phantom: PhantomData, -} - -impl<'a, A, B, T, U> Parser<'a, U> for AndRight -where - A: Parser<'a, T>, - B: Parser<'a, U>, -{ - fn parse(&self, input: &'a [Token]) -> Option<(U, &'a [Token])> { - let (_, input) = self.p1.parse(input)?; - let (value, input) = self.p2.parse(input)?; - Some((value, input)) - } -} - -/// Because a `Parser` is basically a function of the following signature. -/// ```text -/// (&[Token]) -> (T, &[Token]) -/// ``` -/// We can implement it for any `Fn(&[Token]) -> (T, &[Token])`. -impl<'a, T, F> Parser<'a, T> for F -where - F: Fn(&'a [Token]) -> Option<(T, &'a [Token])>, -{ - fn parse(&self, input: &'a [Token]) -> Option<(T, &'a [Token])> { - (self)(input) - } -} diff --git a/som-parser-symbols/tests/tests.rs b/som-parser-symbols/tests/tests.rs index cc2416bb..920fe233 100644 --- a/som-parser-symbols/tests/tests.rs +++ b/som-parser-symbols/tests/tests.rs @@ -1,8 +1,8 @@ use som_core::ast::*; use som_lexer::{Lexer, Token}; -use som_parser_symbols::combinators::*; +use som_parser_core::combinators::*; +use som_parser_core::Parser; use som_parser_symbols::lang::*; -use som_parser_symbols::Parser; #[test] fn literal_tests() { @@ -10,8 +10,7 @@ fn literal_tests() { .skip_whitespace(true) .collect(); - let parser = many(literal()); - let result = parser.parse(tokens.as_slice()); + let result = many(literal()).parse(tokens.as_slice()); assert!(result.is_some(), "input did not parse successfully"); let (literals, rest) = result.unwrap(); @@ -31,8 +30,7 @@ fn expression_test_1() { .skip_whitespace(true) .collect(); - let parser = expression(); - let result = parser.parse(tokens.as_slice()); + let result = expression().parse(tokens.as_slice()); assert!(result.is_some(), "input did not parse successfully"); let (expression, rest) = result.unwrap(); @@ -59,8 +57,7 @@ fn block_test() { .skip_whitespace(true) .collect(); - let parser = block(); - let result = parser.parse(tokens.as_slice()); + let result = block().parse(tokens.as_slice()); assert!(result.is_some(), "input did not parse successfully"); let (block, rest) = result.unwrap(); @@ -99,8 +96,7 @@ fn expression_test_2() { .skip_whitespace(true) .collect(); - let parser = expression(); - let result = parser.parse(tokens.as_slice()); + let result = expression().parse(tokens.as_slice()); assert!(result.is_some(), "input did not parse successfully"); let (expression, rest) = result.unwrap(); @@ -159,8 +155,8 @@ fn primary_test() { let tokens: Vec = Lexer::new("[ self fib: (n - 1) + (self fib: (n - 2)) ]") .skip_whitespace(true) .collect(); - let parser = primary(); - let result = parser.parse(tokens.as_slice()); + + let result = primary().parse(tokens.as_slice()); assert!(result.is_some(), "input did not parse successfully"); let (primary, rest) = result.unwrap(); diff --git a/som-parser-text/Cargo.toml b/som-parser-text/Cargo.toml index 935e22ee..88b0c321 100644 --- a/som-parser-text/Cargo.toml +++ b/som-parser-text/Cargo.toml @@ -10,3 +10,4 @@ license = "MIT OR Apache-2.0" [dependencies] # internal som-core = { path = "../som-core", version = "0.1.0" } +som-parser-core = { path = "../som-parser-core", version = "0.1.0" } diff --git a/som-parser-text/src/combinators.rs b/som-parser-text/src/combinators.rs deleted file mode 100644 index 33b41dac..00000000 --- a/som-parser-text/src/combinators.rs +++ /dev/null @@ -1,191 +0,0 @@ -use crate::parser::Parser; - -/// Represents a value of either type A (Left) or type B (Right). -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Either { - /// Variant of type A. - Left(A), - /// Variant of type B. - Right(B), -} - -/// Transforms a parser into a non-consuming one, allowing to parse ahead without consuming anything. -pub fn peek<'a, A>(parser: impl Parser<'a, A>) -> impl Parser<'a, A> { - move |input: &'a [char]| { - let (value, _) = parser.parse(input)?; - Some((value, input)) - } -} - -/// Runs the given parser, fails if it succeeded, and succeeds otherwise. -pub fn not<'a, A>(parser: impl Parser<'a, A>) -> impl Parser<'a, ()> { - move |input: &'a [char]| match parser.parse(input) { - Some(_) => None, - None => Some(((), input)), - } -} - -/// Sequences two parsers, one after the other, collecting both results. -pub fn sequence<'a, A, B>( - fst: impl Parser<'a, A>, - snd: impl Parser<'a, B>, -) -> impl Parser<'a, (A, B)> { - // equivalent to: `fst.and(snd)` - move |input: &'a [char]| { - let (a, input) = fst.parse(input)?; - let (b, input) = snd.parse(input)?; - Some(((a, b), input)) - } -} - -/// Tries to apply the first parser, if it fails, it tries to apply the second parser. -pub fn alternative<'a, A>(fst: impl Parser<'a, A>, snd: impl Parser<'a, A>) -> impl Parser<'a, A> { - move |input: &'a [char]| fst.parse(input).or_else(|| snd.parse(input)) -} - -/// Same as `either`, but allows for different output types for the parsers. -pub fn either<'a, A, B>( - fst: impl Parser<'a, A>, - snd: impl Parser<'a, B>, -) -> impl Parser<'a, Either> { - move |input: &'a [char]| { - if let Some((a, input)) = fst.parse(input) { - Some((Either::Left(a), input)) - } else if let Some((b, input)) = snd.parse(input) { - Some((Either::Right(b), input)) - } else { - None - } - } -} - -/// Tries to apply a parser, or fallback to a constant value (making it an always-succeeding parser). -pub fn fallback<'a, A: Clone>(def: A, parser: impl Parser<'a, A>) -> impl Parser<'a, A> { - move |input: &'a [char]| parser.parse(input).or_else(|| Some((def.clone(), input))) -} - -/// Tries to apply a parser, or fallback to its default value (making it an always-succeeding parser). -pub fn default<'a, A: Default>(parser: impl Parser<'a, A>) -> impl Parser<'a, A> { - optional(parser).map(Option::unwrap_or_default) -} - -/// Tries every parser in a slice, from left to right, and returns the output of the first succeeding one. -pub fn any<'a: 'b, 'b, A>(parsers: &'b [impl Parser<'a, A>]) -> impl Parser<'a, A> + 'b { - move |input: &'a [char]| parsers.iter().find_map(|parser| parser.parse(input)) -} - -/// Applies every parser in a slice, from left to right, and returns the output from all of them. -/// If one parser fails, the whole sequence is considered failed. -pub fn all<'a: 'b, 'b, A>(parsers: &'b [impl Parser<'a, A>]) -> impl Parser<'a, Vec> + 'b { - move |input: &'a [char]| { - let output = Vec::::with_capacity(parsers.len()); - parsers - .iter() - .try_fold((output, input), |(mut output, input), parser| { - let (value, input) = parser.parse(input)?; - output.push(value); - Some((output, input)) - }) - } -} - -/// Tries to apply a parser, but fails gracefully (with an `Option` output). -pub fn optional<'a, A>(parser: impl Parser<'a, A>) -> impl Parser<'a, Option> { - move |input: &'a [char]| { - if let Some((value, input)) = parser.parse(input) { - Some((Some(value), input)) - } else { - Some((None, input)) - } - } -} - -/// Applies a parser zero or more times. -pub fn many<'a, A>(parser: impl Parser<'a, A>) -> impl Parser<'a, Vec> { - move |mut input: &'a [char]| { - let mut output = Vec::::new(); - while let Some((value, next)) = parser.parse(input) { - input = next; - output.push(value); - } - Some((output, input)) - } -} - -/// Applies a parser one or more times. -pub fn some<'a, A>(parser: impl Parser<'a, A>) -> impl Parser<'a, Vec> { - move |input: &'a [char]| { - let (value, mut input) = parser.parse(input)?; - let mut output = vec![value]; - while let Some((value, next)) = parser.parse(input) { - input = next; - output.push(value); - } - Some((output, input)) - } -} - -/// Parses something that is enclosed between two other things. -pub fn between<'a, A, B, C>( - before: impl Parser<'a, A>, - within: impl Parser<'a, B>, - after: impl Parser<'a, C>, -) -> impl Parser<'a, B> { - move |input: &'a [char]| { - let (_, input) = before.parse(input)?; - let (value, input) = within.parse(input)?; - let (_, input) = after.parse(input)?; - Some((value, input)) - } -} - -/// Parses zero or more things, separated by an arbitrary delimiter. -pub fn sep_by<'a, A, B>( - delim: impl Parser<'a, A>, - within: impl Parser<'a, B>, -) -> impl Parser<'a, Vec> { - move |input: &'a [char]| { - let mut output = Vec::::new(); - if let Some((value, mut input)) = within.parse(input) { - output.push(value); - while let Some((value, next)) = delim - .parse(input) - .and_then(|(_, input)| within.parse(input)) - { - input = next; - output.push(value); - } - Some((output, input)) - } else { - Some((output, input)) - } - } -} - -/// Parses one or more things, separated by an arbitrary delimiter. -pub fn sep_by1<'a, A, B>( - delim: impl Parser<'a, A>, - within: impl Parser<'a, B>, -) -> impl Parser<'a, Vec> { - move |input: &'a [char]| { - let mut output = Vec::::new(); - let (value, mut input) = within.parse(input)?; - output.push(value); - while let Some((value, next)) = delim - .parse(input) - .and_then(|(_, input)| within.parse(input)) - { - input = next; - output.push(value); - } - Some((output, input)) - } -} - -/// Transforms the output value of a parser. -pub fn map<'a, A, B>(parser: impl Parser<'a, A>, func: impl Fn(A) -> B) -> impl Parser<'a, B> { - move |input: &'a [char]| { - let (value, input) = parser.parse(input)?; - Some((func(value), input)) - } -} diff --git a/som-parser-text/src/lang.rs b/som-parser-text/src/lang.rs index ba0219a4..96d0e122 100644 --- a/som-parser-text/src/lang.rs +++ b/som-parser-text/src/lang.rs @@ -1,9 +1,8 @@ use som_core::ast::*; +use som_parser_core::combinators::*; +use som_parser_core::Parser; -use crate::combinators::*; -use crate::parser::Parser; - -pub fn eof<'a>() -> impl Parser<'a, ()> { +pub fn eof<'a>() -> impl Parser<(), &'a [char]> { move |input: &'a [char]| { if input.is_empty() { Some(((), input)) @@ -13,7 +12,7 @@ pub fn eof<'a>() -> impl Parser<'a, ()> { } } -pub fn exact<'a>(ch: char) -> impl Parser<'a, char> { +pub fn exact<'a>(ch: char) -> impl Parser { move |input: &'a [char]| { let (head, tail) = input.split_first()?; if *head == ch { @@ -24,9 +23,9 @@ pub fn exact<'a>(ch: char) -> impl Parser<'a, char> { } } -pub fn exact_str<'a, 'b: 'a>(string: &'b str) -> impl Parser<'a, ()> { +pub fn exact_str<'a, 'b: 'a>(string: &'b str) -> impl Parser<(), &'a [char]> { move |mut input: &'a [char]| { - for parser in string.chars().map(exact) { + for mut parser in string.chars().map(exact) { let (_, new_input) = parser.parse(input)?; input = new_input; } @@ -34,7 +33,7 @@ pub fn exact_str<'a, 'b: 'a>(string: &'b str) -> impl Parser<'a, ()> { } } -pub fn one_of<'a, 'b: 'a>(string: &'b str) -> impl Parser<'a, char> { +pub fn one_of<'a, 'b: 'a>(string: &'b str) -> impl Parser { move |input: &'a [char]| { let (head, tail) = input.split_first()?; if let Some(ch) = string.chars().find(|ch| ch == head) { @@ -45,7 +44,7 @@ pub fn one_of<'a, 'b: 'a>(string: &'b str) -> impl Parser<'a, char> { } } -pub fn not_exact<'a>(ch: char) -> impl Parser<'a, char> { +pub fn not_exact<'a>(ch: char) -> impl Parser { move |input: &'a [char]| { let (head, tail) = input.split_first()?; if *head != ch { @@ -56,7 +55,7 @@ pub fn not_exact<'a>(ch: char) -> impl Parser<'a, char> { } } -pub fn whitespace<'a>() -> impl Parser<'a, char> { +pub fn whitespace<'a>() -> impl Parser { move |input: &'a [char]| { let (head, tail) = input.split_first()?; if head.is_whitespace() { @@ -67,19 +66,19 @@ pub fn whitespace<'a>() -> impl Parser<'a, char> { } } -pub fn comment<'a>() -> impl Parser<'a, String> { +pub fn comment<'a>() -> impl Parser { between(exact('"'), many(not_exact('"')), exact('"')).map(|chars| chars.into_iter().collect()) } -pub fn separator<'a>() -> impl Parser<'a, ()> { +pub fn separator<'a>() -> impl Parser<(), &'a [char]> { exact_str("----").and(many(exact('-'))).map(|_| ()) } -pub fn spacing<'a>() -> impl Parser<'a, ()> { +pub fn spacing<'a>() -> impl Parser<(), &'a [char]> { whitespace().map(|_| ()).or(comment().map(|_| ())) } -pub fn digit<'a>() -> impl Parser<'a, i64> { +pub fn digit<'a>() -> impl Parser { move |input: &'a [char]| { let (head, tail) = input.split_first()?; match head { @@ -98,7 +97,7 @@ pub fn digit<'a>() -> impl Parser<'a, i64> { } } -pub fn integer<'a>() -> impl Parser<'a, i64> { +pub fn integer<'a>() -> impl Parser { optional(exact('-')) .and(some(digit())) .map(|(sign, digits)| { @@ -107,7 +106,7 @@ pub fn integer<'a>() -> impl Parser<'a, i64> { }) } -pub fn double<'a>() -> impl Parser<'a, f64> { +pub fn double<'a>() -> impl Parser { move |input: &'a [char]| { let (sign, input) = optional(exact('-')).parse(input)?; let sign = if sign.is_some() { "-" } else { "" }; @@ -120,7 +119,7 @@ pub fn double<'a>() -> impl Parser<'a, f64> { } } -pub fn lower<'a>() -> impl Parser<'a, char> { +pub fn lower<'a>() -> impl Parser { move |input: &'a [char]| { let (head, tail) = input.split_first()?; if head.is_lowercase() { @@ -131,7 +130,7 @@ pub fn lower<'a>() -> impl Parser<'a, char> { } } -pub fn upper<'a>() -> impl Parser<'a, char> { +pub fn upper<'a>() -> impl Parser { move |input: &'a [char]| { let (head, tail) = input.split_first()?; if head.is_uppercase() { @@ -142,7 +141,7 @@ pub fn upper<'a>() -> impl Parser<'a, char> { } } -pub fn digitc<'a>() -> impl Parser<'a, char> { +pub fn digitc<'a>() -> impl Parser { move |input: &'a [char]| { let (head, tail) = input.split_first()?; match head { @@ -152,7 +151,7 @@ pub fn digitc<'a>() -> impl Parser<'a, char> { } } -pub fn single_operator<'a>() -> impl Parser<'a, char> { +pub fn single_operator<'a>() -> impl Parser { move |input: &'a [char]| { let (head, tail) = input.split_first()?; match head { @@ -164,17 +163,17 @@ pub fn single_operator<'a>() -> impl Parser<'a, char> { } } -pub fn operator<'a>() -> impl Parser<'a, String> { +pub fn operator<'a>() -> impl Parser { some(single_operator()).map(|chars| chars.into_iter().collect()) } -pub fn identifier<'a>() -> impl Parser<'a, String> { +pub fn identifier<'a>() -> impl Parser { (lower().or(upper())) .and(many(lower().or(upper()).or(digitc()).or(exact('_')))) .map(|(fst, tail)| std::iter::once(fst).chain(tail).collect()) } -pub fn string<'a>() -> impl Parser<'a, String> { +pub fn string<'a>() -> impl Parser { move |input: &'a [char]| { let single_char = exact('\\') .and(one_of("tbnrf\'\\")) @@ -190,14 +189,13 @@ pub fn string<'a>() -> impl Parser<'a, String> { _ => vec![a, b], }) .or(not_exact('\'').map(|a| vec![a])); - let parser = between(exact('\''), many(single_char), exact('\'')); - let (value, input) = parser.parse(input)?; + let (value, input) = between(exact('\''), many(single_char), exact('\'')).parse(input)?; let value: String = value.into_iter().flatten().collect(); Some((value, input)) } } -pub fn symbol<'a>() -> impl Parser<'a, String> { +pub fn symbol<'a>() -> impl Parser { exact('#').and_right( (some(keyword()).map(|words| words.into_iter().collect())) .or(identifier()) @@ -206,7 +204,7 @@ pub fn symbol<'a>() -> impl Parser<'a, String> { ) } -pub fn array<'a>() -> impl Parser<'a, Vec> { +pub fn array<'a>() -> impl Parser, &'a [char]> { move |input: &'a [char]| { let (_, input) = exact('#').parse(input)?; let (_, input) = exact('(').parse(input)?; @@ -218,7 +216,7 @@ pub fn array<'a>() -> impl Parser<'a, Vec> { } } -pub fn literal<'a>() -> impl Parser<'a, Literal> { +pub fn literal<'a>() -> impl Parser { (double().map(Literal::Double)) .or(integer().map(Literal::Integer)) .or(string().map(Literal::String)) @@ -226,7 +224,7 @@ pub fn literal<'a>() -> impl Parser<'a, Literal> { .or(array().map(Literal::Array)) } -pub fn keyword<'a>() -> impl Parser<'a, String> { +pub fn keyword<'a>() -> impl Parser { (lower().or(upper())) .and(many(lower().or(upper()).or(digitc()).or(exact('_')))) .and(exact(':')) @@ -238,7 +236,7 @@ pub fn keyword<'a>() -> impl Parser<'a, String> { }) } -pub fn unary_send<'a>() -> impl Parser<'a, Expression> { +pub fn unary_send<'a>() -> impl Parser { move |input: &'a [char]| { let (receiver, input) = primary().parse(input)?; let (_, input) = many(spacing()).parse(input)?; @@ -262,7 +260,7 @@ pub fn unary_send<'a>() -> impl Parser<'a, Expression> { } } -pub fn binary_send<'a>() -> impl Parser<'a, Expression> { +pub fn binary_send<'a>() -> impl Parser { move |input: &'a [char]| { let (lhs, input) = unary_send().parse(input)?; let (_, input) = many(spacing()).parse(input)?; @@ -285,7 +283,7 @@ pub fn binary_send<'a>() -> impl Parser<'a, Expression> { } } -pub fn positional_send<'a>() -> impl Parser<'a, Expression> { +pub fn positional_send<'a>() -> impl Parser { let parameters = move |input: &'a [char]| { let (_, input) = many(spacing()).parse(input)?; let (keyword, input) = keyword().parse(input)?; @@ -312,7 +310,7 @@ pub fn positional_send<'a>() -> impl Parser<'a, Expression> { } } -pub fn locals<'a>() -> impl Parser<'a, Vec> { +pub fn locals<'a>() -> impl Parser, &'a [char]> { move |input: &'a [char]| { let (_, input) = exact('|').parse(input)?; let (_, input) = many(spacing()).parse(input)?; @@ -323,7 +321,7 @@ pub fn locals<'a>() -> impl Parser<'a, Vec> { } } -pub fn body<'a>() -> impl Parser<'a, Body> { +pub fn body<'a>() -> impl Parser { move |input: &'a [char]| { let (exprs, input) = sep_by( exact('.').and(many(spacing())), @@ -341,7 +339,7 @@ pub fn body<'a>() -> impl Parser<'a, Body> { } } -pub fn block<'a>() -> impl Parser<'a, Expression> { +pub fn block<'a>() -> impl Parser { let parameters = move |input: &'a [char]| { let parameter = move |input: &'a [char]| { let (_, input) = exact(':').parse(input)?; @@ -372,7 +370,7 @@ pub fn block<'a>() -> impl Parser<'a, Expression> { } } -pub fn term<'a>() -> impl Parser<'a, Expression> { +pub fn term<'a>() -> impl Parser { move |input: &'a [char]| { let (_, input) = exact('(').parse(input)?; let (_, input) = many(spacing()).parse(input)?; @@ -386,7 +384,7 @@ pub fn term<'a>() -> impl Parser<'a, Expression> { } } -pub fn exit<'a>() -> impl Parser<'a, Expression> { +pub fn exit<'a>() -> impl Parser { move |input: &'a [char]| { let (_, input) = exact('^').parse(input)?; let (_, input) = many(spacing()).parse(input)?; @@ -397,18 +395,18 @@ pub fn exit<'a>() -> impl Parser<'a, Expression> { } } -pub fn expression<'a>() -> impl Parser<'a, Expression> { +pub fn expression<'a>() -> impl Parser { positional_send() } -pub fn primary<'a>() -> impl Parser<'a, Expression> { +pub fn primary<'a>() -> impl Parser { (identifier().map(Expression::Reference)) .or(term()) .or(block()) .or(literal().map(Expression::Literal)) } -pub fn assignment<'a>() -> impl Parser<'a, Expression> { +pub fn assignment<'a>() -> impl Parser { move |input: &'a [char]| { let (name, input) = identifier().parse(input)?; let (_, input) = many(spacing()).parse(input)?; @@ -420,15 +418,15 @@ pub fn assignment<'a>() -> impl Parser<'a, Expression> { } } -pub fn statement<'a>() -> impl Parser<'a, Expression> { +pub fn statement<'a>() -> impl Parser { assignment().or(expression()) } -pub fn primitive<'a>() -> impl Parser<'a, MethodBody> { +pub fn primitive<'a>() -> impl Parser { exact_str("primitive").map(|_| MethodBody::Primitive) } -pub fn method_body<'a>() -> impl Parser<'a, MethodBody> { +pub fn method_body<'a>() -> impl Parser { move |input: &'a [char]| { let (_, input) = exact('(').parse(input)?; let (_, input) = many(spacing()).parse(input)?; @@ -442,7 +440,7 @@ pub fn method_body<'a>() -> impl Parser<'a, MethodBody> { } } -pub fn unary_method_def<'a>() -> impl Parser<'a, MethodDef> { +pub fn unary_method_def<'a>() -> impl Parser { move |input: &'a [char]| { let (signature, input) = identifier().parse(input)?; let (_, input) = many(spacing()).parse(input)?; @@ -459,7 +457,7 @@ pub fn unary_method_def<'a>() -> impl Parser<'a, MethodDef> { } } -pub fn positional_method_def<'a>() -> impl Parser<'a, MethodDef> { +pub fn positional_method_def<'a>() -> impl Parser { let parameter = move |input: &'a [char]| { let (keyword, input) = keyword().parse(input)?; let (_, input) = many(spacing()).parse(input)?; @@ -483,7 +481,7 @@ pub fn positional_method_def<'a>() -> impl Parser<'a, MethodDef> { } } -pub fn operator_method_def<'a>() -> impl Parser<'a, MethodDef> { +pub fn operator_method_def<'a>() -> impl Parser { move |input: &'a [char]| { let (signature, input) = operator().parse(input)?; let (_, input) = many(spacing()).parse(input)?; @@ -502,14 +500,14 @@ pub fn operator_method_def<'a>() -> impl Parser<'a, MethodDef> { } } -pub fn method_def<'a>() -> impl Parser<'a, MethodDef> { +pub fn method_def<'a>() -> impl Parser { unary_method_def() .or(positional_method_def()) .or(operator_method_def()) } -pub fn class_def<'a>() -> impl Parser<'a, ClassDef> { - let class_section = move |input: &'a [char]| { +pub fn class_def<'a>() -> impl Parser { + let mut class_section = move |input: &'a [char]| { let (locals, input) = default(locals().and_left(many(spacing()))).parse(input)?; let (methods, input) = sep_by(many(spacing()), method_def()).parse(input)?; Some(((locals, methods), input)) @@ -547,7 +545,7 @@ pub fn class_def<'a>() -> impl Parser<'a, ClassDef> { } } -pub fn file<'a>() -> impl Parser<'a, ClassDef> { +pub fn file<'a>() -> impl Parser { move |input: &'a [char]| { let (_, input) = many(spacing()).parse(input)?; let (class_def, input) = class_def().parse(input)?; diff --git a/som-parser-text/src/lib.rs b/som-parser-text/src/lib.rs index 271db73f..957a4fb5 100644 --- a/som-parser-text/src/lib.rs +++ b/som-parser-text/src/lib.rs @@ -4,19 +4,25 @@ //! This particular version of the parser works with by directly reading the source code text, thus it does not need any lexical analysis stage before it. //! -/// Generic parser combinators. -pub mod combinators; /// SOM-specific parser combinators. pub mod lang; -/// Parser combinator primitives. -pub mod parser; - -pub use crate::parser::Parser; use som_core::ast::ClassDef; +use som_parser_core::Parser; /// Parses the input of an entire file into an AST. pub fn parse_file(input: &[char]) -> Option { let (class, _) = lang::file().parse(input)?; Some(class) } + +/// Applies a parser and returns the output value if the entirety of the input has been parsed successfully. +pub fn apply<'a, A, P>(mut parser: P, input: &'a [char]) -> Option +where + P: Parser, +{ + match parser.parse(input) { + Some((output, tail)) if tail.is_empty() => Some(output), + Some(_) | None => None, + } +} diff --git a/som-parser-text/tests/tests.rs b/som-parser-text/tests/tests.rs index 9d19ef70..aa4c6c51 100644 --- a/som-parser-text/tests/tests.rs +++ b/som-parser-text/tests/tests.rs @@ -1,14 +1,13 @@ use som_core::ast::*; -use som_parser_text::combinators::*; +use som_parser_core::combinators::*; +use som_parser_core::Parser; use som_parser_text::lang::*; -use som_parser_text::Parser; #[test] fn literal_tests() { let tokens: Vec = "1.2 5 #foo 'test'".chars().collect(); - let parser = sep_by(spacing(), literal()); - let result = parser.parse(tokens.as_slice()); + let result = sep_by(spacing(), literal()).parse(tokens.as_slice()); assert!(result.is_some(), "input did not parse successfully"); let (literals, rest) = result.unwrap(); @@ -26,8 +25,7 @@ fn literal_tests() { fn expression_test_1() { let tokens: Vec = "3 + counter get".chars().collect(); - let parser = expression(); - let result = parser.parse(tokens.as_slice()); + let result = expression().parse(tokens.as_slice()); assert!(result.is_some(), "input did not parse successfully"); let (expression, rest) = result.unwrap(); @@ -53,8 +51,7 @@ fn block_test() { .chars() .collect(); - let parser = block(); - let result = parser.parse(tokens.as_slice()); + let result = block().parse(tokens.as_slice()); assert!(result.is_some(), "input did not parse successfully"); let (block, rest) = result.unwrap(); @@ -92,8 +89,7 @@ fn expression_test_2() { .chars() .collect(); - let parser = expression(); - let result = parser.parse(tokens.as_slice()); + let result = expression().parse(tokens.as_slice()); assert!(result.is_some(), "input did not parse successfully"); let (expression, rest) = result.unwrap(); @@ -152,8 +148,8 @@ fn primary_test() { let tokens: Vec = "[ self fib: (n - 1) + (self fib: (n - 2)) ]" .chars() .collect(); - let parser = primary(); - let result = parser.parse(tokens.as_slice()); + + let result = primary().parse(tokens.as_slice()); assert!(result.is_some(), "input did not parse successfully"); let (primary, rest) = result.unwrap();