msp430-repl/src/parser/parsable.rs

// © 2023 John Breaux
//! A [`Parsable`] struct (an AST node) can parse tokens from a [stream](TokenStream) into it[`self`](https://doc.rust-lang.org/stable/std/keyword.SelfTy.html)
use super::*;
/// Parses tokens from [stream](TokenStream) into Self node
pub trait Parsable {
    /// Parses tokens from [TokenStream](TokenStream) into Self nodes
    fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
    where
        Self: Sized,
        T: TokenStream<'text>;

    /// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
    ///
    /// Masks failed expectations.
    fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Option<Self>, ParseError>
    where
        Self: Sized,
        T: TokenStream<'text>,
    {
        match Self::parse(p, stream) {
            Ok(some) => Ok(Some(some)),
            Err(ParseError::LexError(_)) => Ok(None),
            Err(e) => Err(e),
        }
    }

    fn parse_and<'text, T, R>(
        p: &Parser,
        stream: &mut T,
        f: fn(p: &Parser, &mut T) -> R,
    ) -> Result<(Self, R), ParseError>
    where
        Self: Sized,
        T: TokenStream<'text>,
    {
        Ok((Self::parse(p, stream)?, f(p, stream)))
    }

    /// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
    ///
    /// Returns [`Self::default()`](Default::default()) on error
    fn parse_or_default<'text, T>(p: &Parser, stream: &mut T) -> Self
    where
        Self: Sized + Default,
        T: TokenStream<'text>,
    {
        Self::parse(p, stream).unwrap_or_default()
    }
}

macro_rules! parsable_str_types {
    ($($t:ty),*$(,)?) => {$(
        impl Parsable for $t {
            fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result<Self, ParseError>
            where T: TokenStream<'text> {
                Ok(stream.expect(Type::String)?.lexeme().trim_matches('"').into())
            }
        }
    )*};
}
use std::{path::PathBuf, rc::Rc};
parsable_str_types![String, Rc<str>, Box<str>, PathBuf];

/// Vectors of arbitrary parsables are cool
impl<P: Parsable> Parsable for Vec<P> {
    fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
    where T: TokenStream<'text> {
        // [dead beef]
        // [A, B,]
        // [c d e f]
        // [ something
        //   else      ]

        stream.require(Type::LBracket)?;
        stream.allow(Type::Endl);
        let mut out = vec![];
        while let Some(t) = P::try_parse(p, stream)? {
            out.push(t);
            stream.allow(Type::Separator);
            stream.allow(Type::Endl);
        }
        stream.require(Type::RBracket)?;
        Ok(out)
    }
}
msp430-asm: init repo with proof-of-concept code 2023-08-19 23:02:24 -05:00			`// © 2023 John Breaux`
docs: Improve documentation comments somewhat 2023-08-25 03:05:42 -05:00			//! A [`Parsable`] struct (an AST node) can parse tokens from a [stream](TokenStream) into it[`self`](https://doc.rust-lang.org/stable/std/keyword.SelfTy.html)
msp430-asm: init repo with proof-of-concept code 2023-08-19 23:02:24 -05:00			`use super::*;`
			`/// Parses tokens from [stream](TokenStream) into Self node`
			`pub trait Parsable {`
			`/// Parses tokens from [TokenStream](TokenStream) into Self nodes`
0.2.0: Feature update and Refactor - Each major module (lexer, parser, assembler) has its own error type - These error types are somewhat interconnected, but their dependency relationships are one-way and well defined - The AST is no longer responsible for assembling itself - The Assembler (assembler::Assembler) will now visit every AST node and accumulate words - Words are assumed to be little-endian. - There are now a set of assembler directives that affect the generated output: - .word <Number>: inserts a single word in the output - .words [<Number>,]: inserts multiple words in the output - .byte <Number>: Alias for .word - .bytes [<Number>,]: Alias for .words - .string "String": inserts a null-terminated UTF-8 encoded string - .strings ["String",]: "" multiple strings - Data is always word-aligned at the moment. - There are now assembler directives that affect the AST during parsing: - .include "path/to/file": Parses the contents of a file directly into the AST - Included files have their own defines, but share* labels. This is because .defines are a tokenizer construct, and including a file creates a new buffer and tokenizer. - Circular includes are NOT checked for at the moment. It is very easy to exhaust the stack. - General cleanup of several functions, comments, TODOs, etc. - main.rs was moved to make room for upcoming improvements to the UI TODO: - REPL mode is only partially compatible with .define directive - Branching to a label will branch to the data AT the label, not the label itself. I doubt this is correct behavior. - In case br <label> is meant to use the absolute address, I've created a .org directive (currently unimplemented) for specifying the load address of the program. 2023-09-05 01:54:50 -05:00			`fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>`
msp430-asm: init repo with proof-of-concept code 2023-08-19 23:02:24 -05:00			`where`
			`Self: Sized,`
			`T: TokenStream<'text>;`

			`/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.`
			`///`
			`/// Masks failed expectations.`
0.2.0: Feature update and Refactor - Each major module (lexer, parser, assembler) has its own error type - These error types are somewhat interconnected, but their dependency relationships are one-way and well defined - The AST is no longer responsible for assembling itself - The Assembler (assembler::Assembler) will now visit every AST node and accumulate words - Words are assumed to be little-endian. - There are now a set of assembler directives that affect the generated output: - .word <Number>: inserts a single word in the output - .words [<Number>,]: inserts multiple words in the output - .byte <Number>: Alias for .word - .bytes [<Number>,]: Alias for .words - .string "String": inserts a null-terminated UTF-8 encoded string - .strings ["String",]: "" multiple strings - Data is always word-aligned at the moment. - There are now assembler directives that affect the AST during parsing: - .include "path/to/file": Parses the contents of a file directly into the AST - Included files have their own defines, but share* labels. This is because .defines are a tokenizer construct, and including a file creates a new buffer and tokenizer. - Circular includes are NOT checked for at the moment. It is very easy to exhaust the stack. - General cleanup of several functions, comments, TODOs, etc. - main.rs was moved to make room for upcoming improvements to the UI TODO: - REPL mode is only partially compatible with .define directive - Branching to a label will branch to the data AT the label, not the label itself. I doubt this is correct behavior. - In case br <label> is meant to use the absolute address, I've created a .org directive (currently unimplemented) for specifying the load address of the program. 2023-09-05 01:54:50 -05:00			`fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Option<Self>, ParseError>`
msp430-asm: init repo with proof-of-concept code 2023-08-19 23:02:24 -05:00			`where`
			`Self: Sized,`
			`T: TokenStream<'text>,`
			`{`
0.2.0: Feature update and Refactor - Each major module (lexer, parser, assembler) has its own error type - These error types are somewhat interconnected, but their dependency relationships are one-way and well defined - The AST is no longer responsible for assembling itself - The Assembler (assembler::Assembler) will now visit every AST node and accumulate words - Words are assumed to be little-endian. - There are now a set of assembler directives that affect the generated output: - .word <Number>: inserts a single word in the output - .words [<Number>,]: inserts multiple words in the output - .byte <Number>: Alias for .word - .bytes [<Number>,]: Alias for .words - .string "String": inserts a null-terminated UTF-8 encoded string - .strings ["String",]: "" multiple strings - Data is always word-aligned at the moment. - There are now assembler directives that affect the AST during parsing: - .include "path/to/file": Parses the contents of a file directly into the AST - Included files have their own defines, but share* labels. This is because .defines are a tokenizer construct, and including a file creates a new buffer and tokenizer. - Circular includes are NOT checked for at the moment. It is very easy to exhaust the stack. - General cleanup of several functions, comments, TODOs, etc. - main.rs was moved to make room for upcoming improvements to the UI TODO: - REPL mode is only partially compatible with .define directive - Branching to a label will branch to the data AT the label, not the label itself. I doubt this is correct behavior. - In case br <label> is meant to use the absolute address, I've created a .org directive (currently unimplemented) for specifying the load address of the program. 2023-09-05 01:54:50 -05:00			`match Self::parse(p, stream) {`
			`Ok(some) => Ok(Some(some)),`
			`Err(ParseError::LexError(_)) => Ok(None),`
			`Err(e) => Err(e),`
msp430-asm: init repo with proof-of-concept code 2023-08-19 23:02:24 -05:00			`}`
			`}`

0.2.0: Feature update and Refactor - Each major module (lexer, parser, assembler) has its own error type - These error types are somewhat interconnected, but their dependency relationships are one-way and well defined - The AST is no longer responsible for assembling itself - The Assembler (assembler::Assembler) will now visit every AST node and accumulate words - Words are assumed to be little-endian. - There are now a set of assembler directives that affect the generated output: - .word <Number>: inserts a single word in the output - .words [<Number>,]: inserts multiple words in the output - .byte <Number>: Alias for .word - .bytes [<Number>,]: Alias for .words - .string "String": inserts a null-terminated UTF-8 encoded string - .strings ["String",]: "" multiple strings - Data is always word-aligned at the moment. - There are now assembler directives that affect the AST during parsing: - .include "path/to/file": Parses the contents of a file directly into the AST - Included files have their own defines, but share* labels. This is because .defines are a tokenizer construct, and including a file creates a new buffer and tokenizer. - Circular includes are NOT checked for at the moment. It is very easy to exhaust the stack. - General cleanup of several functions, comments, TODOs, etc. - main.rs was moved to make room for upcoming improvements to the UI TODO: - REPL mode is only partially compatible with .define directive - Branching to a label will branch to the data AT the label, not the label itself. I doubt this is correct behavior. - In case br <label> is meant to use the absolute address, I've created a .org directive (currently unimplemented) for specifying the load address of the program. 2023-09-05 01:54:50 -05:00			`fn parse_and<'text, T, R>(`
			`p: &Parser,`
			`stream: &mut T,`
			`f: fn(p: &Parser, &mut T) -> R,`
			`) -> Result<(Self, R), ParseError>`
msp430-asm: init repo with proof-of-concept code 2023-08-19 23:02:24 -05:00			`where`
			`Self: Sized,`
			`T: TokenStream<'text>,`
			`{`
			`Ok((Self::parse(p, stream)?, f(p, stream)))`
			`}`

			`/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.`
			`///`
			/// Returns [`Self::default()`](Default::default()) on error
			`fn parse_or_default<'text, T>(p: &Parser, stream: &mut T) -> Self`
			`where`
			`Self: Sized + Default,`
			`T: TokenStream<'text>,`
			`{`
			`Self::parse(p, stream).unwrap_or_default()`
			`}`
			`}`
0.2.0: Feature update and Refactor - Each major module (lexer, parser, assembler) has its own error type - These error types are somewhat interconnected, but their dependency relationships are one-way and well defined - The AST is no longer responsible for assembling itself - The Assembler (assembler::Assembler) will now visit every AST node and accumulate words - Words are assumed to be little-endian. - There are now a set of assembler directives that affect the generated output: - .word <Number>: inserts a single word in the output - .words [<Number>,]: inserts multiple words in the output - .byte <Number>: Alias for .word - .bytes [<Number>,]: Alias for .words - .string "String": inserts a null-terminated UTF-8 encoded string - .strings ["String",]: "" multiple strings - Data is always word-aligned at the moment. - There are now assembler directives that affect the AST during parsing: - .include "path/to/file": Parses the contents of a file directly into the AST - Included files have their own defines, but share* labels. This is because .defines are a tokenizer construct, and including a file creates a new buffer and tokenizer. - Circular includes are NOT checked for at the moment. It is very easy to exhaust the stack. - General cleanup of several functions, comments, TODOs, etc. - main.rs was moved to make room for upcoming improvements to the UI TODO: - REPL mode is only partially compatible with .define directive - Branching to a label will branch to the data AT the label, not the label itself. I doubt this is correct behavior. - In case br <label> is meant to use the absolute address, I've created a .org directive (currently unimplemented) for specifying the load address of the program. 2023-09-05 01:54:50 -05:00
			`macro_rules! parsable_str_types {`
			`($($t:ty),*$(,)?) => {$(`
			`impl Parsable for $t {`
			`fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result<Self, ParseError>`
			`where T: TokenStream<'text> {`
			`Ok(stream.expect(Type::String)?.lexeme().trim_matches('"').into())`
			`}`
			`}`
			`)*};`
			`}`
			`use std::{path::PathBuf, rc::Rc};`
			`parsable_str_types![String, Rc<str>, Box<str>, PathBuf];`

			`/// Vectors of arbitrary parsables are cool`
			`impl<P: Parsable> Parsable for Vec<P> {`
			`fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>`
			`where T: TokenStream<'text> {`
			`// [dead beef]`
			`// [A, B,]`
			`// [c d e f]`
			`// [ something`
			`// else ]`

			`stream.require(Type::LBracket)?;`
			`stream.allow(Type::Endl);`
			`let mut out = vec![];`
			`while let Some(t) = P::try_parse(p, stream)? {`
			`out.push(t);`
			`stream.allow(Type::Separator);`
			`stream.allow(Type::Endl);`
			`}`
			`stream.require(Type::RBracket)?;`
			`Ok(out)`
			`}`
			`}`