doughlang: "fix" semi elision, add "fully qualified" paths, add proper pattern prec parsing.

This actually gets some old code parsing!
This commit is contained in:
2025-10-16 05:49:02 -04:00
parent 03d9682409
commit 1998558468
5 changed files with 314 additions and 172 deletions

View File

@@ -43,12 +43,13 @@ pub struct Parser<'t> {
pub lexer: Lexer<'t>,
pub next_tok: Option<Token>,
pub last_loc: Span,
pub elide_do: bool,
}
impl<'t> Parser<'t> {
/// Constructs a new Parser
pub fn new(lexer: Lexer<'t>) -> Self {
Self { lexer, next_tok: None, last_loc: Span::default() }
Self { lexer, next_tok: None, last_loc: Span::default(), elide_do: false }
}
/// The identity function. This exists to make production chaining easier.
@@ -86,7 +87,9 @@ impl<'t> Parser<'t> {
/// Consumes and returns the currently-peeked [Token].
pub fn take(&mut self) -> Option<Token> {
self.next_tok.take()
let tok = self.next_tok.take();
self.elide_do = matches!(tok, Some(Token { kind: TKind::RCurly, .. }));
tok
}
/// Consumes the currently-peeked [Token], returning its lexeme without cloning.
@@ -123,7 +126,7 @@ impl<'t> Parser<'t> {
) -> PResult<Vec<P>> {
// TODO: This loses lexer errors
while self.peek_if(end).is_none() {
elems.push(self.parse(level)?);
elems.push(self.parse(level.clone())?);
if self.next_if(sep).is_err() {
break;
}
@@ -143,7 +146,7 @@ impl<'t> Parser<'t> {
sep: TKind,
) -> PResult<Vec<P>> {
loop {
elems.push(self.parse(level)?);
elems.push(self.parse(level.clone())?);
if self.next_if(sep).is_err() {
break Ok(elems);
}
@@ -181,11 +184,36 @@ impl<'t> Parser<'t> {
}
pub trait Parse<'t> {
type Prec: Copy;
type Prec: Clone;
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self>
where Self: Sized;
}
impl<'t> Parse<'t> for FqPath {
// ugly hack: provide a partial path to parse()
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
let mut parts = vec![];
if p.next_if(TKind::ColonColon).is_ok() {
parts.push("".into()); // the "root"
}
loop {
parts.push(
p.next_if(TKind::Identifier)?
.lexeme
.string()
.expect("Identifier should have String"),
);
if p.next_if(TKind::ColonColon).is_err() {
break;
}
}
Ok(FqPath { parts })
}
}
impl<'t> Parse<'t> for Literal {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
@@ -220,24 +248,41 @@ impl<'t> Parse<'t> for Literal {
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum PPrec {
Min,
Typed,
Tuple,
Alt,
Tuple,
Typed,
Range,
Max,
}
impl PPrec {
fn next(self) -> Self {
match self {
Self::Min => Self::Min,
Self::Typed => Self::Min,
Self::Tuple => Self::Typed,
Self::Min => Self::Alt,
Self::Alt => Self::Tuple,
Self::Max => Self::Alt,
Self::Tuple => Self::Typed,
Self::Typed => Self::Range,
Self::Range => Self::Max,
Self::Max => Self::Max,
}
}
}
enum PatPs {
Typed,
Op(PatOp),
}
fn pat_from_infix(token: &Token) -> Option<(PatPs, PPrec)> {
Some(match token.kind {
TKind::DotDot => (PatPs::Op(PatOp::RangeEx), PPrec::Range),
TKind::Colon => (PatPs::Typed, PPrec::Typed),
TKind::Comma => (PatPs::Op(PatOp::Tuple), PPrec::Tuple),
TKind::Bar => (PatPs::Op(PatOp::Alt), PPrec::Alt),
_ => None?,
})
}
impl<'t> Parse<'t> for Pat {
type Prec = PPrec;
fn parse(p: &mut Parser<'t>, level: PPrec) -> PResult<Self> {
@@ -249,63 +294,75 @@ impl<'t> Parse<'t> for Pat {
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
Pat::Lit(p.parse(())?)
}
TKind::Bar => p.consume().parse(level)?,
TKind::Identifier => match tok.lexeme.str() {
Some("_") => p.consume().then(Pat::Ignore),
_ => {
let name = p
.take_lexeme()
.expect("should have Token")
.string()
.expect("Identifier token should have String");
match p.peek().map(|t| t.kind)? {
TKind::LParen => Pat::TupStruct(name, p.parse(PPrec::Tuple)?),
TKind::LCurly => Pat::Struct(
name,
let mut path: FqPath = p.parse(())?;
// TODO: make these postfix.
match p.peek().map(|t| t.kind) {
Ok(TKind::LParen) => Pat::TupStruct(path, p.parse(PPrec::Typed)?),
Ok(TKind::LCurly) => Pat::Struct(
path,
p.consume()
.opt(PPrec::Tuple, TKind::RCurly)?
.unwrap_or_else(|| Box::new(Pat::Tuple(vec![]))),
.opt(PPrec::Alt, TKind::RCurly)?
.unwrap_or_else(|| Box::new(Pat::Op(PatOp::Tuple, vec![]))),
),
_ => Pat::Name(name),
Ok(_) | Err(ParseError::FromLexer(LexError { pos: _, res: "EOF" })) => {
match path.parts.len() {
1 => Self::Name(path.parts.pop().expect("name has 1 part")),
_ => Self::Path(path),
}
}
Err(e) => Err(e)?,
}
}
},
TKind::Grave => Pat::MetId(p.consume().next()?.lexeme.to_string()),
TKind::DotDot => Pat::Rest(match p.consume().peek_if(TKind::Identifier) {
Some(_) => Some(p.parse(level)?),
None => None,
}),
TKind::LParen => {
Pat::Tuple(
p.consume()
.list(vec![], PPrec::Typed, TKind::Comma, TKind::RParen)?,
)
}
TKind::LBrack => {
Pat::Slice(
p.consume()
.list(vec![], PPrec::Typed, TKind::Comma, TKind::RBrack)?,
)
}
TKind::DotDot => Pat::Op(
PatOp::Rest,
// Identifier in Rest position always becomes binder
match p.consume().peek()?.kind {
TKind::Identifier => vec![Pat::Name(
p.take_lexeme()
.expect("should have lexeme")
.string()
.expect("should be string"),
)],
TKind::Grave | TKind::Integer | TKind::Character => vec![p.parse(level)?],
_ => vec![],
},
),
TKind::LParen => Pat::Op(
PatOp::Tuple,
p.consume()
.list(vec![], PPrec::Typed, TKind::Comma, TKind::RParen)?,
),
TKind::LBrack => Pat::Op(
PatOp::Slice,
p.consume()
.list(vec![], PPrec::Typed, TKind::Comma, TKind::RBrack)?,
),
_ => Err(ParseError::NotPattern(tok.kind, tok.span))?,
};
// Infix
while let Ok(tok) = p.peek() {
while let Ok(tok) = p.peek()
&& let Some((op, prec)) = pat_from_infix(tok)
&& level <= prec
{
let kind = tok.kind;
head = match kind {
TKind::Colon if level >= PPrec::Typed => {
Pat::Typed(head.into(), p.consume().parse(())?)
}
TKind::Comma if level >= PPrec::Tuple => Pat::Tuple(p.consume().list_bare(
vec![head],
PPrec::Tuple.next(),
kind,
)?),
TKind::Bar if level >= PPrec::Alt => {
Pat::Alt(p.consume().list_bare(vec![head], PPrec::Alt.next(), kind)?)
}
_ => break,
head = match op {
PatPs::Typed => Pat::Typed(head.into(), p.consume().parse(())?),
PatPs::Op(op @ PatOp::RangeEx) => Pat::Op(
op,
match p.consume().peek().map(|t| t.kind) {
Ok(TKind::Integer | TKind::Character | TKind::Identifier) => {
vec![head, p.parse(prec.next())?]
}
_ => vec![head],
},
),
PatPs::Op(op) => Pat::Op(op, p.consume().list_bare(vec![head], prec.next(), kind)?),
}
}
@@ -323,12 +380,7 @@ impl<'t> Parse<'t> for Ty {
let head = match tok.kind {
TKind::Identifier => match tok.lexeme.str() {
Some("_") => p.consume().then(Ty::Infer),
_ => Ty::Named(
p.take_lexeme()
.expect("should have Token")
.string()
.expect("Identifier token should have String"),
),
_ => Ty::Named(p.parse(())?),
},
TKind::LBrack => {
let ty = p.consume().parse(level)?;
@@ -346,13 +398,11 @@ impl<'t> Parse<'t> for Ty {
p.consume().consume_if(TKind::LParen)?;
let mut tys = p.list(vec![], (), TKind::Comma, TKind::RParen)?;
match p.next_if(TKind::Arrow) {
Ok(_) => {
tys.push(p.parse(())?);
Ty::Fn(tys)
}
_ => Ty::Tuple(tys),
}
tys.push(match p.next_if(TKind::Arrow) {
Ok(_) => p.parse(())?,
_ => Ty::Tuple(vec![]),
});
Ty::Fn(tys)
}
TKind::LParen => {
let mut tys = p.consume().list(vec![], (), TKind::Comma, TKind::RParen)?;
@@ -452,6 +502,7 @@ pub enum Ps {
Match, // match Expr { MatchArm,* }
Mod, // mod Ty Expr
ImplicitDo, // An implicit semicolon
ExplicitDo, // An explicit leading semicolon
End, // Produces an empty value.
Op(Op), // A normal [ast::Op]
}
@@ -459,10 +510,10 @@ pub enum Ps {
fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Do => (Ps::Op(Op::Do), Prec::Do),
TKind::Semi => (Ps::ExplicitDo, Prec::Do),
TKind::Identifier => (Ps::Id, Prec::Max),
TKind::Identifier | TKind::ColonColon => (Ps::Id, Prec::Max),
TKind::Grave => (Ps::Mid, Prec::Max),
TKind::ColonColon => (Ps::Op(Op::Path), Prec::Max),
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
(Ps::Lit, Prec::Max)
}
@@ -510,7 +561,6 @@ fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
TKind::As => (Ps::Op(Op::As), Prec::Body),
TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple),
TKind::Dot => (Ps::Op(Op::Dot), Prec::Project),
TKind::ColonColon => (Ps::Op(Op::Path), Prec::Max),
TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd),
TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr),
TKind::Question => (Ps::Op(Op::Try), Prec::Unary),
@@ -539,22 +589,7 @@ fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
TKind::Slash => (Ps::Op(Op::Div), Prec::Term),
TKind::Rem => (Ps::Op(Op::Rem), Prec::Term),
TKind::True
| TKind::False
| TKind::Character
| TKind::Integer
| TKind::String
| TKind::Identifier
| TKind::Public
| TKind::Module
| TKind::Fn
| TKind::Do
| TKind::While
| TKind::If
| TKind::For
| TKind::Break
| TKind::Return => (Ps::ImplicitDo, Prec::Do),
kind => Err(ParseError::NotInfix(kind, token.span))?,
_ => (Ps::ImplicitDo, Prec::Do),
})
}
@@ -563,7 +598,7 @@ impl<'t> Parse<'t> for Const {
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
Ok(Self(
p.consume().parse(PPrec::Alt)?,
p.consume().parse(PPrec::Tuple)?,
p.consume_if(TKind::Eq)?.parse(Prec::Tuple.value())?,
))
}
@@ -573,7 +608,7 @@ impl<'t> Parse<'t> for Struct {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
let value = p.consume().parse(PPrec::Tuple)?;
let value = p.consume().parse(PPrec::Min)?;
Ok(Self(value))
}
}
@@ -585,19 +620,22 @@ impl<'t> Parse<'t> for Fn {
match p.consume().next_if(TKind::Identifier) {
Ok(Token { lexeme, .. }) => Ok(Self(
lexeme.string(),
p.parse(PPrec::Typed)?,
p.opt_if((), TKind::Arrow)?.unwrap_or_default(),
p.parse(PPrec::Tuple)?,
p.opt_if((), TKind::Arrow)?.unwrap_or(Ty::Tuple(vec![])),
p.parse(Prec::Body.next())?,
)),
_ => Ok(Self(
None,
Pat::Tuple(p.consume_if(TKind::LParen)?.list(
vec![],
PPrec::Tuple,
TKind::Comma,
TKind::RParen,
)?),
p.opt_if((), TKind::Arrow)?.unwrap_or_default(),
Pat::Op(
PatOp::Tuple,
p.consume_if(TKind::LParen)?.list(
vec![],
PPrec::Tuple,
TKind::Comma,
TKind::RParen,
)?,
),
p.opt_if((), TKind::Arrow)?.unwrap_or(Ty::Tuple(vec![])),
p.parse(Prec::Body.next())?,
)),
}
@@ -637,7 +675,7 @@ impl<'t> Parse<'t> for MatchArm {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
p.next_if(TKind::Bar).ok();
Ok(MatchArm(
p.parse(PPrec::Max)?,
p.parse(PPrec::Min)?,
p.consume_if(TKind::FatArrow)?.parse(level)?,
))
}
@@ -751,7 +789,10 @@ fn parse_for<'t>(p: &mut Parser<'t>, _level: ()) -> PResult<Expr> {
Expr::Op(Op::Break, vec![fail]).anno(fspan),
),
MatchArm(
Pat::TupStruct("Some".into(), Box::new(Pat::Tuple(vec![pat]))),
Pat::TupStruct(
"Some".into(),
Box::new(Pat::Op(PatOp::Tuple, vec![pat])),
),
pass,
),
],
@@ -784,7 +825,12 @@ impl<'t> Parse<'t> for Expr {
Ps::End if level == prec.next() => Expr::Op(Op::Tuple, vec![]),
Ps::End => Err(ParseError::NotPrefix(tok.kind, span))?,
Ps::Id => Expr::Id(p.take_lexeme().expect("should have ident").to_string()),
Ps::ExplicitDo => {
p.consume();
Expr::Op(Op::Tuple, vec![])
}
Ps::Id => Expr::Id(p.parse(())?),
Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()),
Ps::Lit => Expr::Lit(p.parse(())?),
Ps::Let => Expr::Let(p.parse(())?),
@@ -820,14 +866,14 @@ impl<'t> Parse<'t> for Expr {
None,
p.consume()
.opt(PPrec::Tuple, TKind::Bar)?
.unwrap_or(Pat::Tuple(vec![])),
p.opt_if((), TKind::Arrow)?.unwrap_or_default(),
.unwrap_or(Pat::Op(PatOp::Tuple, vec![])),
p.opt_if((), TKind::Arrow)?.unwrap_or(Ty::Infer),
p.parse(Prec::Body.next())?,
))),
Ps::Lambda0 => Expr::Fn(Box::new(Fn(
None,
Pat::Tuple(vec![]),
p.consume().opt_if((), TKind::Arrow)?.unwrap_or_default(),
Pat::Op(PatOp::Tuple, vec![]),
p.consume().opt_if((), TKind::Arrow)?.unwrap_or(Ty::Infer),
p.parse(Prec::Body.next())?,
))),
Ps::DoubleRef => p.consume().parse(prec.next()).map(|Anno(expr, span)| {
@@ -853,16 +899,16 @@ impl<'t> Parse<'t> for Expr {
head = match op {
// Make (structor expressions) are context-sensitive
Ps::Make => match &head {
Expr::Op(Op::Path, _) | Expr::Id(_) | Expr::MetId(_) => {
Expr::Make(Box::new(Make(
head.anno(span),
p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?,
)))
}
Expr::Id(_) | Expr::MetId(_) => Expr::Make(Box::new(Make(
head.anno(span),
p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?,
))),
_ => break,
},
// As is ImplicitDo (semicolon elision)
Ps::ImplicitDo if p.elide_do => head.and_do(span, p.parse(prec.next())?),
Ps::ImplicitDo => break,
Ps::Op(Op::Do) => head.and_do(span, p.consume().parse(prec.next())?),
Ps::ImplicitDo => head.and_do(span, p.parse(prec.next())?),
Ps::Op(Op::Index) => Expr::Op(
Op::Index,
p.consume()
@@ -873,7 +919,7 @@ impl<'t> Parse<'t> for Expr {
p.consume()
.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RParen)?,
),
Ps::Op(op @ (Op::Tuple | Op::Dot | Op::Path | Op::LogAnd | Op::LogOr)) => Expr::Op(
Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op(
op,
p.consume()
.list_bare(vec![head.anno(span)], prec.next(), kind)?,
@@ -883,7 +929,7 @@ impl<'t> Parse<'t> for Expr {
Expr::Op(op, vec![head.anno(span)])
}
Ps::Op(op) => Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?]),
_ => unimplemented!("infix {op:?}"),
_ => Err(ParseError::NotInfix(kind, span))?,
}
}