doughlang: "fix" semi elision, add "fully qualified" paths, add proper pattern prec parsing.
This actually gets some old code parsing!
This commit is contained in:
274
src/parser.rs
274
src/parser.rs
@@ -43,12 +43,13 @@ pub struct Parser<'t> {
|
||||
pub lexer: Lexer<'t>,
|
||||
pub next_tok: Option<Token>,
|
||||
pub last_loc: Span,
|
||||
pub elide_do: bool,
|
||||
}
|
||||
|
||||
impl<'t> Parser<'t> {
|
||||
/// Constructs a new Parser
|
||||
pub fn new(lexer: Lexer<'t>) -> Self {
|
||||
Self { lexer, next_tok: None, last_loc: Span::default() }
|
||||
Self { lexer, next_tok: None, last_loc: Span::default(), elide_do: false }
|
||||
}
|
||||
|
||||
/// The identity function. This exists to make production chaining easier.
|
||||
@@ -86,7 +87,9 @@ impl<'t> Parser<'t> {
|
||||
|
||||
/// Consumes and returns the currently-peeked [Token].
|
||||
pub fn take(&mut self) -> Option<Token> {
|
||||
self.next_tok.take()
|
||||
let tok = self.next_tok.take();
|
||||
self.elide_do = matches!(tok, Some(Token { kind: TKind::RCurly, .. }));
|
||||
tok
|
||||
}
|
||||
|
||||
/// Consumes the currently-peeked [Token], returning its lexeme without cloning.
|
||||
@@ -123,7 +126,7 @@ impl<'t> Parser<'t> {
|
||||
) -> PResult<Vec<P>> {
|
||||
// TODO: This loses lexer errors
|
||||
while self.peek_if(end).is_none() {
|
||||
elems.push(self.parse(level)?);
|
||||
elems.push(self.parse(level.clone())?);
|
||||
if self.next_if(sep).is_err() {
|
||||
break;
|
||||
}
|
||||
@@ -143,7 +146,7 @@ impl<'t> Parser<'t> {
|
||||
sep: TKind,
|
||||
) -> PResult<Vec<P>> {
|
||||
loop {
|
||||
elems.push(self.parse(level)?);
|
||||
elems.push(self.parse(level.clone())?);
|
||||
if self.next_if(sep).is_err() {
|
||||
break Ok(elems);
|
||||
}
|
||||
@@ -181,11 +184,36 @@ impl<'t> Parser<'t> {
|
||||
}
|
||||
|
||||
pub trait Parse<'t> {
|
||||
type Prec: Copy;
|
||||
type Prec: Clone;
|
||||
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self>
|
||||
where Self: Sized;
|
||||
}
|
||||
|
||||
impl<'t> Parse<'t> for FqPath {
|
||||
// ugly hack: provide a partial path to parse()
|
||||
type Prec = ();
|
||||
|
||||
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
|
||||
let mut parts = vec![];
|
||||
if p.next_if(TKind::ColonColon).is_ok() {
|
||||
parts.push("".into()); // the "root"
|
||||
}
|
||||
loop {
|
||||
parts.push(
|
||||
p.next_if(TKind::Identifier)?
|
||||
.lexeme
|
||||
.string()
|
||||
.expect("Identifier should have String"),
|
||||
);
|
||||
if p.next_if(TKind::ColonColon).is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(FqPath { parts })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Parse<'t> for Literal {
|
||||
type Prec = ();
|
||||
fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
|
||||
@@ -220,24 +248,41 @@ impl<'t> Parse<'t> for Literal {
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum PPrec {
|
||||
Min,
|
||||
Typed,
|
||||
Tuple,
|
||||
Alt,
|
||||
Tuple,
|
||||
Typed,
|
||||
Range,
|
||||
Max,
|
||||
}
|
||||
|
||||
impl PPrec {
|
||||
fn next(self) -> Self {
|
||||
match self {
|
||||
Self::Min => Self::Min,
|
||||
Self::Typed => Self::Min,
|
||||
Self::Tuple => Self::Typed,
|
||||
Self::Min => Self::Alt,
|
||||
Self::Alt => Self::Tuple,
|
||||
Self::Max => Self::Alt,
|
||||
Self::Tuple => Self::Typed,
|
||||
Self::Typed => Self::Range,
|
||||
Self::Range => Self::Max,
|
||||
Self::Max => Self::Max,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum PatPs {
|
||||
Typed,
|
||||
Op(PatOp),
|
||||
}
|
||||
|
||||
fn pat_from_infix(token: &Token) -> Option<(PatPs, PPrec)> {
|
||||
Some(match token.kind {
|
||||
TKind::DotDot => (PatPs::Op(PatOp::RangeEx), PPrec::Range),
|
||||
TKind::Colon => (PatPs::Typed, PPrec::Typed),
|
||||
TKind::Comma => (PatPs::Op(PatOp::Tuple), PPrec::Tuple),
|
||||
TKind::Bar => (PatPs::Op(PatOp::Alt), PPrec::Alt),
|
||||
_ => None?,
|
||||
})
|
||||
}
|
||||
|
||||
impl<'t> Parse<'t> for Pat {
|
||||
type Prec = PPrec;
|
||||
fn parse(p: &mut Parser<'t>, level: PPrec) -> PResult<Self> {
|
||||
@@ -249,63 +294,75 @@ impl<'t> Parse<'t> for Pat {
|
||||
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
|
||||
Pat::Lit(p.parse(())?)
|
||||
}
|
||||
TKind::Bar => p.consume().parse(level)?,
|
||||
TKind::Identifier => match tok.lexeme.str() {
|
||||
Some("_") => p.consume().then(Pat::Ignore),
|
||||
_ => {
|
||||
let name = p
|
||||
.take_lexeme()
|
||||
.expect("should have Token")
|
||||
.string()
|
||||
.expect("Identifier token should have String");
|
||||
match p.peek().map(|t| t.kind)? {
|
||||
TKind::LParen => Pat::TupStruct(name, p.parse(PPrec::Tuple)?),
|
||||
TKind::LCurly => Pat::Struct(
|
||||
name,
|
||||
let mut path: FqPath = p.parse(())?;
|
||||
// TODO: make these postfix.
|
||||
match p.peek().map(|t| t.kind) {
|
||||
Ok(TKind::LParen) => Pat::TupStruct(path, p.parse(PPrec::Typed)?),
|
||||
Ok(TKind::LCurly) => Pat::Struct(
|
||||
path,
|
||||
p.consume()
|
||||
.opt(PPrec::Tuple, TKind::RCurly)?
|
||||
.unwrap_or_else(|| Box::new(Pat::Tuple(vec![]))),
|
||||
.opt(PPrec::Alt, TKind::RCurly)?
|
||||
.unwrap_or_else(|| Box::new(Pat::Op(PatOp::Tuple, vec![]))),
|
||||
),
|
||||
_ => Pat::Name(name),
|
||||
Ok(_) | Err(ParseError::FromLexer(LexError { pos: _, res: "EOF" })) => {
|
||||
match path.parts.len() {
|
||||
1 => Self::Name(path.parts.pop().expect("name has 1 part")),
|
||||
_ => Self::Path(path),
|
||||
}
|
||||
}
|
||||
Err(e) => Err(e)?,
|
||||
}
|
||||
}
|
||||
},
|
||||
TKind::Grave => Pat::MetId(p.consume().next()?.lexeme.to_string()),
|
||||
TKind::DotDot => Pat::Rest(match p.consume().peek_if(TKind::Identifier) {
|
||||
Some(_) => Some(p.parse(level)?),
|
||||
None => None,
|
||||
}),
|
||||
TKind::LParen => {
|
||||
Pat::Tuple(
|
||||
p.consume()
|
||||
.list(vec![], PPrec::Typed, TKind::Comma, TKind::RParen)?,
|
||||
)
|
||||
}
|
||||
TKind::LBrack => {
|
||||
Pat::Slice(
|
||||
p.consume()
|
||||
.list(vec![], PPrec::Typed, TKind::Comma, TKind::RBrack)?,
|
||||
)
|
||||
}
|
||||
TKind::DotDot => Pat::Op(
|
||||
PatOp::Rest,
|
||||
// Identifier in Rest position always becomes binder
|
||||
match p.consume().peek()?.kind {
|
||||
TKind::Identifier => vec![Pat::Name(
|
||||
p.take_lexeme()
|
||||
.expect("should have lexeme")
|
||||
.string()
|
||||
.expect("should be string"),
|
||||
)],
|
||||
TKind::Grave | TKind::Integer | TKind::Character => vec![p.parse(level)?],
|
||||
_ => vec![],
|
||||
},
|
||||
),
|
||||
TKind::LParen => Pat::Op(
|
||||
PatOp::Tuple,
|
||||
p.consume()
|
||||
.list(vec![], PPrec::Typed, TKind::Comma, TKind::RParen)?,
|
||||
),
|
||||
TKind::LBrack => Pat::Op(
|
||||
PatOp::Slice,
|
||||
p.consume()
|
||||
.list(vec![], PPrec::Typed, TKind::Comma, TKind::RBrack)?,
|
||||
),
|
||||
_ => Err(ParseError::NotPattern(tok.kind, tok.span))?,
|
||||
};
|
||||
|
||||
// Infix
|
||||
while let Ok(tok) = p.peek() {
|
||||
while let Ok(tok) = p.peek()
|
||||
&& let Some((op, prec)) = pat_from_infix(tok)
|
||||
&& level <= prec
|
||||
{
|
||||
let kind = tok.kind;
|
||||
|
||||
head = match kind {
|
||||
TKind::Colon if level >= PPrec::Typed => {
|
||||
Pat::Typed(head.into(), p.consume().parse(())?)
|
||||
}
|
||||
TKind::Comma if level >= PPrec::Tuple => Pat::Tuple(p.consume().list_bare(
|
||||
vec![head],
|
||||
PPrec::Tuple.next(),
|
||||
kind,
|
||||
)?),
|
||||
TKind::Bar if level >= PPrec::Alt => {
|
||||
Pat::Alt(p.consume().list_bare(vec![head], PPrec::Alt.next(), kind)?)
|
||||
}
|
||||
_ => break,
|
||||
head = match op {
|
||||
PatPs::Typed => Pat::Typed(head.into(), p.consume().parse(())?),
|
||||
PatPs::Op(op @ PatOp::RangeEx) => Pat::Op(
|
||||
op,
|
||||
match p.consume().peek().map(|t| t.kind) {
|
||||
Ok(TKind::Integer | TKind::Character | TKind::Identifier) => {
|
||||
vec![head, p.parse(prec.next())?]
|
||||
}
|
||||
_ => vec![head],
|
||||
},
|
||||
),
|
||||
PatPs::Op(op) => Pat::Op(op, p.consume().list_bare(vec![head], prec.next(), kind)?),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -323,12 +380,7 @@ impl<'t> Parse<'t> for Ty {
|
||||
let head = match tok.kind {
|
||||
TKind::Identifier => match tok.lexeme.str() {
|
||||
Some("_") => p.consume().then(Ty::Infer),
|
||||
_ => Ty::Named(
|
||||
p.take_lexeme()
|
||||
.expect("should have Token")
|
||||
.string()
|
||||
.expect("Identifier token should have String"),
|
||||
),
|
||||
_ => Ty::Named(p.parse(())?),
|
||||
},
|
||||
TKind::LBrack => {
|
||||
let ty = p.consume().parse(level)?;
|
||||
@@ -346,13 +398,11 @@ impl<'t> Parse<'t> for Ty {
|
||||
p.consume().consume_if(TKind::LParen)?;
|
||||
|
||||
let mut tys = p.list(vec![], (), TKind::Comma, TKind::RParen)?;
|
||||
match p.next_if(TKind::Arrow) {
|
||||
Ok(_) => {
|
||||
tys.push(p.parse(())?);
|
||||
Ty::Fn(tys)
|
||||
}
|
||||
_ => Ty::Tuple(tys),
|
||||
}
|
||||
tys.push(match p.next_if(TKind::Arrow) {
|
||||
Ok(_) => p.parse(())?,
|
||||
_ => Ty::Tuple(vec![]),
|
||||
});
|
||||
Ty::Fn(tys)
|
||||
}
|
||||
TKind::LParen => {
|
||||
let mut tys = p.consume().list(vec![], (), TKind::Comma, TKind::RParen)?;
|
||||
@@ -452,6 +502,7 @@ pub enum Ps {
|
||||
Match, // match Expr { MatchArm,* }
|
||||
Mod, // mod Ty Expr
|
||||
ImplicitDo, // An implicit semicolon
|
||||
ExplicitDo, // An explicit leading semicolon
|
||||
End, // Produces an empty value.
|
||||
Op(Op), // A normal [ast::Op]
|
||||
}
|
||||
@@ -459,10 +510,10 @@ pub enum Ps {
|
||||
fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
|
||||
Ok(match token.kind {
|
||||
TKind::Do => (Ps::Op(Op::Do), Prec::Do),
|
||||
TKind::Semi => (Ps::ExplicitDo, Prec::Do),
|
||||
|
||||
TKind::Identifier => (Ps::Id, Prec::Max),
|
||||
TKind::Identifier | TKind::ColonColon => (Ps::Id, Prec::Max),
|
||||
TKind::Grave => (Ps::Mid, Prec::Max),
|
||||
TKind::ColonColon => (Ps::Op(Op::Path), Prec::Max),
|
||||
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
|
||||
(Ps::Lit, Prec::Max)
|
||||
}
|
||||
@@ -510,7 +561,6 @@ fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
|
||||
TKind::As => (Ps::Op(Op::As), Prec::Body),
|
||||
TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple),
|
||||
TKind::Dot => (Ps::Op(Op::Dot), Prec::Project),
|
||||
TKind::ColonColon => (Ps::Op(Op::Path), Prec::Max),
|
||||
TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd),
|
||||
TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr),
|
||||
TKind::Question => (Ps::Op(Op::Try), Prec::Unary),
|
||||
@@ -539,22 +589,7 @@ fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
|
||||
TKind::Slash => (Ps::Op(Op::Div), Prec::Term),
|
||||
TKind::Rem => (Ps::Op(Op::Rem), Prec::Term),
|
||||
|
||||
TKind::True
|
||||
| TKind::False
|
||||
| TKind::Character
|
||||
| TKind::Integer
|
||||
| TKind::String
|
||||
| TKind::Identifier
|
||||
| TKind::Public
|
||||
| TKind::Module
|
||||
| TKind::Fn
|
||||
| TKind::Do
|
||||
| TKind::While
|
||||
| TKind::If
|
||||
| TKind::For
|
||||
| TKind::Break
|
||||
| TKind::Return => (Ps::ImplicitDo, Prec::Do),
|
||||
kind => Err(ParseError::NotInfix(kind, token.span))?,
|
||||
_ => (Ps::ImplicitDo, Prec::Do),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -563,7 +598,7 @@ impl<'t> Parse<'t> for Const {
|
||||
|
||||
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
|
||||
Ok(Self(
|
||||
p.consume().parse(PPrec::Alt)?,
|
||||
p.consume().parse(PPrec::Tuple)?,
|
||||
p.consume_if(TKind::Eq)?.parse(Prec::Tuple.value())?,
|
||||
))
|
||||
}
|
||||
@@ -573,7 +608,7 @@ impl<'t> Parse<'t> for Struct {
|
||||
type Prec = ();
|
||||
|
||||
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
|
||||
let value = p.consume().parse(PPrec::Tuple)?;
|
||||
let value = p.consume().parse(PPrec::Min)?;
|
||||
Ok(Self(value))
|
||||
}
|
||||
}
|
||||
@@ -585,19 +620,22 @@ impl<'t> Parse<'t> for Fn {
|
||||
match p.consume().next_if(TKind::Identifier) {
|
||||
Ok(Token { lexeme, .. }) => Ok(Self(
|
||||
lexeme.string(),
|
||||
p.parse(PPrec::Typed)?,
|
||||
p.opt_if((), TKind::Arrow)?.unwrap_or_default(),
|
||||
p.parse(PPrec::Tuple)?,
|
||||
p.opt_if((), TKind::Arrow)?.unwrap_or(Ty::Tuple(vec![])),
|
||||
p.parse(Prec::Body.next())?,
|
||||
)),
|
||||
_ => Ok(Self(
|
||||
None,
|
||||
Pat::Tuple(p.consume_if(TKind::LParen)?.list(
|
||||
vec![],
|
||||
PPrec::Tuple,
|
||||
TKind::Comma,
|
||||
TKind::RParen,
|
||||
)?),
|
||||
p.opt_if((), TKind::Arrow)?.unwrap_or_default(),
|
||||
Pat::Op(
|
||||
PatOp::Tuple,
|
||||
p.consume_if(TKind::LParen)?.list(
|
||||
vec![],
|
||||
PPrec::Tuple,
|
||||
TKind::Comma,
|
||||
TKind::RParen,
|
||||
)?,
|
||||
),
|
||||
p.opt_if((), TKind::Arrow)?.unwrap_or(Ty::Tuple(vec![])),
|
||||
p.parse(Prec::Body.next())?,
|
||||
)),
|
||||
}
|
||||
@@ -637,7 +675,7 @@ impl<'t> Parse<'t> for MatchArm {
|
||||
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
|
||||
p.next_if(TKind::Bar).ok();
|
||||
Ok(MatchArm(
|
||||
p.parse(PPrec::Max)?,
|
||||
p.parse(PPrec::Min)?,
|
||||
p.consume_if(TKind::FatArrow)?.parse(level)?,
|
||||
))
|
||||
}
|
||||
@@ -751,7 +789,10 @@ fn parse_for<'t>(p: &mut Parser<'t>, _level: ()) -> PResult<Expr> {
|
||||
Expr::Op(Op::Break, vec![fail]).anno(fspan),
|
||||
),
|
||||
MatchArm(
|
||||
Pat::TupStruct("Some".into(), Box::new(Pat::Tuple(vec![pat]))),
|
||||
Pat::TupStruct(
|
||||
"Some".into(),
|
||||
Box::new(Pat::Op(PatOp::Tuple, vec![pat])),
|
||||
),
|
||||
pass,
|
||||
),
|
||||
],
|
||||
@@ -784,7 +825,12 @@ impl<'t> Parse<'t> for Expr {
|
||||
Ps::End if level == prec.next() => Expr::Op(Op::Tuple, vec![]),
|
||||
Ps::End => Err(ParseError::NotPrefix(tok.kind, span))?,
|
||||
|
||||
Ps::Id => Expr::Id(p.take_lexeme().expect("should have ident").to_string()),
|
||||
Ps::ExplicitDo => {
|
||||
p.consume();
|
||||
Expr::Op(Op::Tuple, vec![])
|
||||
}
|
||||
|
||||
Ps::Id => Expr::Id(p.parse(())?),
|
||||
Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()),
|
||||
Ps::Lit => Expr::Lit(p.parse(())?),
|
||||
Ps::Let => Expr::Let(p.parse(())?),
|
||||
@@ -820,14 +866,14 @@ impl<'t> Parse<'t> for Expr {
|
||||
None,
|
||||
p.consume()
|
||||
.opt(PPrec::Tuple, TKind::Bar)?
|
||||
.unwrap_or(Pat::Tuple(vec![])),
|
||||
p.opt_if((), TKind::Arrow)?.unwrap_or_default(),
|
||||
.unwrap_or(Pat::Op(PatOp::Tuple, vec![])),
|
||||
p.opt_if((), TKind::Arrow)?.unwrap_or(Ty::Infer),
|
||||
p.parse(Prec::Body.next())?,
|
||||
))),
|
||||
Ps::Lambda0 => Expr::Fn(Box::new(Fn(
|
||||
None,
|
||||
Pat::Tuple(vec![]),
|
||||
p.consume().opt_if((), TKind::Arrow)?.unwrap_or_default(),
|
||||
Pat::Op(PatOp::Tuple, vec![]),
|
||||
p.consume().opt_if((), TKind::Arrow)?.unwrap_or(Ty::Infer),
|
||||
p.parse(Prec::Body.next())?,
|
||||
))),
|
||||
Ps::DoubleRef => p.consume().parse(prec.next()).map(|Anno(expr, span)| {
|
||||
@@ -853,16 +899,16 @@ impl<'t> Parse<'t> for Expr {
|
||||
head = match op {
|
||||
// Make (structor expressions) are context-sensitive
|
||||
Ps::Make => match &head {
|
||||
Expr::Op(Op::Path, _) | Expr::Id(_) | Expr::MetId(_) => {
|
||||
Expr::Make(Box::new(Make(
|
||||
head.anno(span),
|
||||
p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?,
|
||||
)))
|
||||
}
|
||||
Expr::Id(_) | Expr::MetId(_) => Expr::Make(Box::new(Make(
|
||||
head.anno(span),
|
||||
p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?,
|
||||
))),
|
||||
_ => break,
|
||||
},
|
||||
// As is ImplicitDo (semicolon elision)
|
||||
Ps::ImplicitDo if p.elide_do => head.and_do(span, p.parse(prec.next())?),
|
||||
Ps::ImplicitDo => break,
|
||||
Ps::Op(Op::Do) => head.and_do(span, p.consume().parse(prec.next())?),
|
||||
Ps::ImplicitDo => head.and_do(span, p.parse(prec.next())?),
|
||||
Ps::Op(Op::Index) => Expr::Op(
|
||||
Op::Index,
|
||||
p.consume()
|
||||
@@ -873,7 +919,7 @@ impl<'t> Parse<'t> for Expr {
|
||||
p.consume()
|
||||
.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RParen)?,
|
||||
),
|
||||
Ps::Op(op @ (Op::Tuple | Op::Dot | Op::Path | Op::LogAnd | Op::LogOr)) => Expr::Op(
|
||||
Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op(
|
||||
op,
|
||||
p.consume()
|
||||
.list_bare(vec![head.anno(span)], prec.next(), kind)?,
|
||||
@@ -883,7 +929,7 @@ impl<'t> Parse<'t> for Expr {
|
||||
Expr::Op(op, vec![head.anno(span)])
|
||||
}
|
||||
Ps::Op(op) => Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?]),
|
||||
_ => unimplemented!("infix {op:?}"),
|
||||
_ => Err(ParseError::NotInfix(kind, span))?,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user