From 7b05da13344747a49d09434bbc17d93e8e4f9f77 Mon Sep 17 00:00:00 2001 From: John Date: Tue, 16 Sep 2025 04:18:12 -0400 Subject: [PATCH] do: Elaborate on pattern syntax, add binary `as` operator --- src/ast.rs | 41 ++++++++-- src/ast/macro_matcher.rs | 16 ++++ src/lexer.rs | 1 + src/main.rs | 1 - src/parser.rs | 168 ++++++++++++++++++++++----------------- src/token.rs | 1 + 6 files changed, 150 insertions(+), 78 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 6cb55e1..0410d02 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -42,6 +42,8 @@ pub enum Pat { Slice(Vec), /// Matches one of the provided alternates Alt(Vec), + /// Matches a typed pattern + Typed(Box, Ty), } /// The arms of a make expression @@ -70,7 +72,7 @@ pub enum Ty { /// `[Ty]` Slice(Box), /// `[Ty; _]` - Array(Box, usize), + Array(Box, Box), /// `[Rety, ..Args]` Fn(Vec), } @@ -113,12 +115,21 @@ impl Expr { | Self::Op(Op::Deref, _) ) } + + #[allow(clippy::type_complexity)] + pub fn as_slice(&self) -> Option<(Op, &[Anno, A>])> { + match self { + Expr::Op(op, args) => Some((*op, args.as_slice())), + _ => None, + } + } } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Op { // -- true operators Do, // Expr ; Expr + As, // Expr as Expr Macro, // macro Expr => Expr Block, // { Expr } Array, // [ Expr,* ] @@ -201,13 +212,13 @@ impl Display for Expr { Self::MetId(id) => write!(f, "`{id}"), Self::Lit(literal) => literal.fmt(f), Self::Let(pat, Some(expr)) => write!(f, "let {pat} = {expr}"), - Self::Let(pat, None) => write!(f, "let {pat}"), + Self::Let(pat, None) => write!(f, "let ({pat})"), Self::Const(pat, expr) => write!(f, "const {pat} = {expr}"), Self::Make(expr, make_arms) => { f.delimit(fmt!("({expr} {{"), "})").list(make_arms, ", ") } Self::Match(expr, match_arms) => f - .delimit_indented(fmt!("match {expr} {{\n"), "}") + .delimit_indented(fmt!("match {expr} {{"), "}") .list_wrap("\n", match_arms, ",\n", ",\n"), Self::Fn(pat, expr) => write!(f, "fn {pat} {expr}"), @@ -246,6 +257,7 @@ impl Display for Op { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Op::Do => "; ".fmt(f), + Op::As => " as ".fmt(f), Op::Macro => "macro ".fmt(f), Op::Block => "{}".fmt(f), Op::Array => "[]".fmt(f), @@ -261,8 +273,8 @@ impl Display for Op { Op::Return => "return ".fmt(f), Op::Dot => ".".fmt(f), Op::Path => "::".fmt(f), - Op::RangeEx => " .. ".fmt(f), - Op::RangeIn => " ..= ".fmt(f), + Op::RangeEx => "..".fmt(f), + Op::RangeIn => "..=".fmt(f), Op::Neg => "-".fmt(f), Op::Not => "!".fmt(f), Op::Identity => "!!".fmt(f), @@ -320,6 +332,25 @@ impl Display for Pat { Self::Tuple(pats) => f.delimit("(", ")").list(pats, ", "), Self::Slice(pats) => f.delimit("[", "]").list(pats, ", "), Self::Alt(pats) => f.delimit("<", ">").list(pats, " | "), + Self::Typed(pat, ty) => write!(f, "{pat}: {ty}"), + } + } +} + +impl Display for Ty { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Infer => "_".fmt(f), + Self::Named(name) => name.fmt(f), + Self::Tuple(items) => f.delimit('(', ')').list(items, ", "), + Self::Slice(ty) => write!(f, "[{ty}]"), + Self::Array(ty, n) => write!(f, "[{ty}; {n}]"), + Self::Fn(items) => match items.as_slice() { + [] => write!(f, "fn ()"), + [rety, args @ ..] => f + .delimit(fmt!("fn ("), fmt!(") -> {rety}")) + .list(args, ", "), + }, } } } diff --git a/src/ast/macro_matcher.rs b/src/ast/macro_matcher.rs index e6d7764..25dce0e 100644 --- a/src/ast/macro_matcher.rs +++ b/src/ast/macro_matcher.rs @@ -193,6 +193,8 @@ impl Match for Pat { (Pat::Slice(_), _) => false, (Pat::Alt(pat), Pat::Alt(expr)) => Match::recurse(sub, pat, expr), (Pat::Alt(_), _) => false, + (Pat::Typed(pat, _), Pat::Typed(expr, _)) => Match::recurse(sub, pat, expr), + (Pat::Typed(..), _) => false, } } @@ -208,10 +210,24 @@ impl Match for Pat { Pat::Tuple(pats) => pats.apply(sub), Pat::Slice(pats) => pats.apply(sub), Pat::Alt(pats) => pats.apply(sub), + Pat::Typed(pat, ty) => { + pat.apply(sub); + ty.apply(sub); + } } } } +impl Match for Ty { + fn apply(&mut self, sub: &Subst) { + todo!("Apply subst {sub:?} for {self}.") + } + + fn recurse(sub: &mut Subst, pat: &Self, expr: &Self) -> bool { + todo!("Construct subst {sub:?} from {pat} and {expr}.") + } +} + impl Match for Op { fn recurse(_: &mut Subst, pat: &Self, expr: &Self) -> bool { pat == expr diff --git a/src/lexer.rs b/src/lexer.rs index 9b22d8f..40a4d85 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -220,6 +220,7 @@ impl<'t> Lexer<'t> { let token = self.produce(TKind::Identifier); Ok(Token { kind: match token.lexeme.as_str() { + "as" => TKind::As, "break" => TKind::Break, "const" => TKind::Const, "do" => TKind::Do, diff --git a/src/main.rs b/src/main.rs index 3efe9af..724ae62 100644 --- a/src/main.rs +++ b/src/main.rs @@ -32,7 +32,6 @@ fn main() -> Result<(), Box> { Ok(Response::Deny) } _ => { - lex(line); parse(line); Ok(Response::Accept) } diff --git a/src/parser.rs b/src/parser.rs index cea44c1..4ef071e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -15,6 +15,7 @@ pub enum ParseError { Expected(TKind, Span), NotLiteral(TKind, Span), NotPattern(TKind, Span), + NotType(TKind, Span), NotPrefix(TKind, Span), NotInfix(TKind, Span), NotPostfix(TKind, Span), @@ -27,6 +28,7 @@ impl Display for ParseError { Self::Expected(tk, loc) => write!(f, "{loc}: Expected {tk:?}."), Self::NotLiteral(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a literal."), Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."), + Self::NotType(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a type."), Self::NotPrefix(tk, loc) => write!(f, "{loc}: {tk:?} is not a prefix operator."), Self::NotInfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a infix operator."), Self::NotPostfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a postfix operator."), @@ -155,7 +157,7 @@ impl<'t> Parser<'t> { }) } - /// Parses an expression into a vec unless the next token is `end` + /// Parses a P unless the next token is `end` pub fn opt>(&mut self, level: P::Prec, end: TKind) -> PResult> { let out = match self.peek_if(end) { None => Some(self.parse(level)?), @@ -179,8 +181,8 @@ pub trait Parse<'t> { } impl<'t> Parse<'t> for Literal { - type Prec = usize; - fn parse(p: &mut Parser<'t>, _level: usize) -> PResult { + type Prec = (); + fn parse(p: &mut Parser<'t>, _level: ()) -> PResult { let tok = p.peek()?; Ok(match tok.kind { TKind::True => p.consume().then(Literal::Bool(true)), @@ -209,9 +211,10 @@ impl<'t> Parse<'t> for Literal { #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum PPrec { Min, + Typed, Tuple, Alt, - NoTopAlt, + Max, } impl<'t> Parse<'t> for Pat { @@ -223,7 +226,7 @@ impl<'t> Parse<'t> for Pat { // Prefix let mut head = match tok.kind { TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => { - Pat::Lit(p.parse(0)?) + Pat::Lit(p.parse(())?) } TKind::Identifier => match tok.lexeme.as_str() { "_" => p.consume().then(Pat::Ignore), @@ -254,12 +257,15 @@ impl<'t> Parse<'t> for Pat { let kind = tok.kind; head = match kind { - TKind::Bar if level < PPrec::Alt => { - Pat::Alt(p.consume().list_bare(vec![head], PPrec::Alt, kind)?) + TKind::Colon if level > PPrec::Typed => { + Pat::Typed(head.into(), p.consume().parse(())?) } - TKind::Comma if level < PPrec::Tuple => { + TKind::Comma if level > PPrec::Tuple => { Pat::Tuple(p.consume().list_bare(vec![head], PPrec::Tuple, kind)?) } + TKind::Bar if level > PPrec::Alt => { + Pat::Alt(p.consume().list_bare(vec![head], PPrec::Alt, kind)?) + } _ => break, } } @@ -268,13 +274,44 @@ impl<'t> Parse<'t> for Pat { } } +impl<'t> Parse<'t> for Ty { + type Prec = (); + + fn parse(p: &mut Parser<'t>, level: Self::Prec) -> PResult + where Self: Sized { + let tok = p.peek()?; + + let head = match tok.kind { + TKind::Identifier => match tok.lexeme.as_str() { + "_" => p.consume().then(Ty::Infer), + _ => Ty::Named(p.take_lexeme().expect("should have Token")), + }, + TKind::LBrack => { + let ty = p.consume().parse(level)?; + match p.next()? { + Token { kind: TKind::Semi, .. } => { + let ty = Ty::Array(ty, p.parse(Prec::Binary.next())?); + p.next_if(TKind::RBrack)?; + ty + } + Token { kind: TKind::RBrack, .. } => Ty::Slice(ty), + tok => Err(ParseError::NotType(tok.kind, tok.span))?, + } + } + _ => Err(ParseError::NotType(tok.kind, tok.span))?, + }; + + Ok(head) + } +} + impl<'t> Parse<'t> for MatchArm { type Prec = usize; - fn parse(p: &mut Parser<'t>, _level: usize) -> PResult { + fn parse(p: &mut Parser<'t>, level: usize) -> PResult { p.next_if(TKind::Bar).ok(); Ok(MatchArm( - p.list(vec![], PPrec::Min, TKind::Bar, TKind::FatArrow)?, - p.parse(0)?, + p.list(vec![], PPrec::Max, TKind::Bar, TKind::FatArrow)?, + p.parse(level)?, )) } } @@ -301,11 +338,11 @@ enum Prec { Assign, /// Constructor for a tuple Tuple, - /// Constructor for a struct - Make, /// The body of a function, conditional, etc. Body, - /// The short-circuiting logical operators [Prec::LogOr], [Prec::LogAnd] + /// Constructor for a struct + Make, + /// The conditional of an `if` or `while` (which is really an `if`) Logical, /// The short-circuiting "boolean or" operator LogOr, @@ -372,17 +409,18 @@ pub enum Ps { fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> { Ok(match token.kind { TKind::Do => (Ps::Op(Op::Do), Prec::Do), + + TKind::Identifier => (Ps::Id, Prec::Max), + TKind::Grave => (Ps::Mid, Prec::Max), + TKind::ColonColon => (Ps::Op(Op::Path), Prec::Max), TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => { (Ps::Lit, Prec::Max) } - TKind::Identifier => (Ps::Id, Prec::Max), - TKind::Grave => (Ps::Mid, Prec::Max), TKind::Fn => (Ps::Fn, Prec::Body), - TKind::Match => (Ps::Match, Prec::Body), TKind::Macro => (Ps::Op(Op::Macro), Prec::Assign), - TKind::Let => (Ps::Let, Prec::Body), + TKind::Let => (Ps::Let, Prec::Tuple), TKind::Const => (Ps::Const, Prec::Body), TKind::Loop => (Ps::Op(Op::Loop), Prec::Body), TKind::If => (Ps::Op(Op::If), Prec::Body), @@ -414,6 +452,17 @@ fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> { fn from_infix(token: &Token) -> PResult<(Ps, Prec)> { Ok(match token.kind { + TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration + TKind::As => (Ps::Op(Op::As), Prec::Body), + TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple), + TKind::Dot => (Ps::Op(Op::Dot), Prec::Project), + TKind::ColonColon => (Ps::Op(Op::Path), Prec::Max), + TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd), + TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr), + TKind::Question => (Ps::Op(Op::Try), Prec::Unary), + TKind::LParen => (Ps::Op(Op::Call), Prec::Extend), + TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend), + // TKind::LCurly => (Ps::Make, Prec::Make), TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max), TKind::Eq => (Ps::Op(Op::Set), Prec::Assign), TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical), @@ -435,45 +484,19 @@ fn from_infix(token: &Token) -> PResult<(Ps, Prec)> { TKind::Star => (Ps::Op(Op::Mul), Prec::Term), TKind::Slash => (Ps::Op(Op::Div), Prec::Term), TKind::Rem => (Ps::Op(Op::Rem), Prec::Term), - TKind::ColonColon => (Ps::Op(Op::Path), Prec::Max), - TKind::Question => (Ps::End, Prec::Extend), kind => Err(ParseError::NotInfix(kind, token.span))?, }) } +#[allow(clippy::match_single_binding, unused)] fn from_postfix(token: &Token) -> PResult<(Ps, Prec)> { Ok(match token.kind { - TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration - TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple), - TKind::Dot => (Ps::Op(Op::Dot), Prec::Project), - TKind::ColonColon => (Ps::Op(Op::Path), Prec::Max), - TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd), - TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr), - TKind::Question => (Ps::Op(Op::Try), Prec::Unary), - TKind::LParen => (Ps::Op(Op::Call), Prec::Extend), - TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend), TKind::LCurly => (Ps::Make, Prec::Make), kind => Err(ParseError::NotPostfix(kind, token.span))?, + // _ => (Ps::End, Prec::Max), }) } -#[rustfmt::skip] -fn should_coagulate(prev: Op, op: Op) -> bool { - prev == op && match prev { - Op::LogAnd => true, - Op::LogOr => true, - Op::Dot => false, - Op::Path => true, - Op::Lt => false, - Op::Leq => false, - Op::Eq => false, - Op::Neq => false, - Op::Geq => false, - Op::Gt => false, - _ => false, - } -} - impl<'t> Parse<'t> for Expr { type Prec = usize; @@ -496,12 +519,12 @@ impl<'t> Parse<'t> for Expr { Ps::Id => Expr::Id(p.take_lexeme().expect("should have ident")), Ps::Mid => Expr::MetId(p.consume().next_if(TKind::Identifier)?.lexeme), - Ps::Lit => Expr::Lit(p.parse(MIN)?), + Ps::Lit => Expr::Lit(p.parse(())?), Ps::Let => Expr::Let( - p.consume().parse(PPrec::NoTopAlt)?, - p.opt_if(prec.next(), TKind::Eq)?, + p.consume().parse(PPrec::Alt)?, + p.opt_if(prec.value(), TKind::Eq)?, ), - Ps::Const => Expr::Const(p.consume().parse(PPrec::NoTopAlt)?, { + Ps::Const => Expr::Const(p.consume().parse(PPrec::Tuple)?, { p.next_if(TKind::Eq)?; p.parse(prec.next())? }), @@ -514,7 +537,7 @@ impl<'t> Parse<'t> for Expr { ), Ps::Match => Expr::Match(p.consume().parse(Prec::Logical.value())?, { p.next_if(TKind::LCurly)?; - p.list(vec![], 0, TKind::Comma, TKind::RCurly)? + p.list(vec![], prec.next(), TKind::Comma, TKind::RCurly)? }), Ps::Op(Op::Block) => Expr::Op( Op::Block, @@ -576,10 +599,30 @@ impl<'t> Parse<'t> for Expr { && let Ok((op, prec)) = from_postfix(tok) && level <= prec.prev() && op != Ps::End + { + // let kind = tok.kind; + let span = span.merge(p.span()); + // p.consume(); + head = match (op, &head) { + (Ps::Make, Expr::Op(Op::Path, _) | Expr::Id(_) | Expr::MetId(_)) => Expr::Make( + head.anno(span).into(), + p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?, + ), + _ => break, + }; + } + + // Infix + while let Ok(tok) = p.peek() + && let Ok((op, prec)) = from_infix(tok) + && level <= prec.prev() + && op != Ps::End { let kind = tok.kind; let span = span.merge(p.span()); + p.consume(); + head = match op { Ps::Make => Expr::Make( head.anno(span).into(), @@ -596,27 +639,8 @@ impl<'t> Parse<'t> for Expr { Ps::Op(op @ (Op::Do | Op::Tuple | Op::Dot | Op::Path | Op::LogAnd | Op::LogOr)) => { Expr::Op(op, p.list_bare(vec![head.anno(span)], prec.next(), kind)?) } - Ps::Op(op) => Expr::Op(op, vec![head.anno(span)]), - _ => unimplemented!("postfix {op:?}"), - }; - } - - // Infix - while let Ok(tok) = p.peek() - && let Ok((op, prec)) = from_infix(tok) - && level <= prec.prev() - && op != Ps::End - { - let span = span.merge(p.span()); - p.consume(); - - head = match (op, head) { - // controls expression chaining vs coagulating - (Ps::Op(op), Expr::Op(prev, mut args)) if should_coagulate(prev, op) => { - args.push(p.parse(prec.next())?); - Expr::Op(op, args) - } - (Ps::Op(op), head) => Expr::Op(op, vec![head.anno(span), p.parse(prec.next())?]), + Ps::Op(op @ Op::Try) => Expr::Op(op, vec![head.anno(span)]), + Ps::Op(op) => Expr::Op(op, vec![head.anno(span), p.parse(prec.next())?]), _ => unimplemented!("infix {op:?}"), } } diff --git a/src/token.rs b/src/token.rs index 497743c..b3511e4 100644 --- a/src/token.rs +++ b/src/token.rs @@ -12,6 +12,7 @@ pub struct Token { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum TKind { Comment, + As, Break, Const, Do,