integer: Match based on base, if present

This commit is contained in:
John 2023-09-25 16:43:50 -05:00
parent 03660fd641
commit 247bb1f8f8
2 changed files with 35 additions and 6 deletions

View File

@ -59,4 +59,15 @@ pub trait Combinator: Sized {
where Self: Clone { where Self: Clone {
self.clone().and(f).or(|_| g(self)) self.clone().and(f).or(|_| g(self))
} }
/// Returns the result of the first f that succeeds, or self
fn and_one_of(mut self, f: &[&dyn Fn(Self) -> Self]) -> Self {
for &f in f {
self = self.into_alright().and(f);
if self.is_alright() {
break;
}
}
self
}
} }

View File

@ -58,6 +58,11 @@ pub mod lexer {
pub fn new(text: &'t str) -> Self { pub fn new(text: &'t str) -> Self {
Self { text, cursor: 0 } Self { text, cursor: 0 }
} }
fn skip_whitespace(&mut self) {
if let Some(len) = Rule::new(self.text()).and_any(Rule::whitespace).end() {
self.cursor += len
}
}
fn produce_token(&mut self, ty: Type, len: usize) -> Option<Token> { fn produce_token(&mut self, ty: Type, len: usize) -> Option<Token> {
let start = self.cursor; let start = self.cursor;
self.cursor += len; self.cursor += len;
@ -66,9 +71,6 @@ pub mod lexer {
fn text(&self) -> &str { fn text(&self) -> &str {
&self.text[self.cursor..] &self.text[self.cursor..]
} }
fn skip_whitespace(&mut self) {
self.cursor += Rule::new(self.text).whitespace().end().unwrap_or_default()
}
// functions for lexing individual tokens // functions for lexing individual tokens
pub fn line_comment(&mut self) -> Option<Token> { pub fn line_comment(&mut self) -> Option<Token> {
// line_comment := "//" ~ (^newline)* // line_comment := "//" ~ (^newline)*
@ -120,8 +122,13 @@ pub mod lexer {
self.produce_token( self.produce_token(
Type::Integer, Type::Integer,
Rule::new(self.text()) Rule::new(self.text())
.and_maybe(|rule| rule.char('0').char_fn(|c| "xdob".contains(c))) .and_one_of(&[
.and_many(|this| this.char_fn(|c| c.is_ascii_hexdigit())) &|rule| rule.str("0x").and_any(Rule::hex_digit),
&|rule| rule.str("0d").and_any(Rule::dec_digit),
&|rule| rule.str("0o").and_any(Rule::oct_digit),
&|rule| rule.str("0b").and_any(Rule::bin_digit),
&|rule| rule.and_many(Rule::dec_digit),
])
.end()?, .end()?,
) )
} }
@ -192,6 +199,18 @@ pub mod lexer {
use unicode_xid::UnicodeXID; use unicode_xid::UnicodeXID;
self.char_fn(UnicodeXID::is_xid_continue) self.char_fn(UnicodeXID::is_xid_continue)
} }
pub fn hex_digit(self) -> Self {
self.char_fn(|c| c.is_ascii_hexdigit())
}
pub fn dec_digit(self) -> Self {
self.char_fn(|c| c.is_ascii_digit())
}
pub fn oct_digit(self) -> Self {
self.char_between('0', '7')
}
pub fn bin_digit(self) -> Self {
self.char_between('0', '1')
}
fn has(self, condition: impl Fn(&Self) -> bool, len: usize) -> Self { fn has(self, condition: impl Fn(&Self) -> bool, len: usize) -> Self {
let len = next_utf8(self.text, len); let len = next_utf8(self.text, len);
self.and(|rule| match condition(&rule) && !rule.text.is_empty() { self.and(|rule| match condition(&rule) && !rule.text.is_empty() {
@ -335,7 +354,6 @@ mod tests {
assert_whole_input_is_token("10010110", Lexer::integer, Type::Integer); assert_whole_input_is_token("10010110", Lexer::integer, Type::Integer);
assert_whole_input_is_token("12345670", Lexer::integer, Type::Integer); assert_whole_input_is_token("12345670", Lexer::integer, Type::Integer);
assert_whole_input_is_token("1234567890", Lexer::integer, Type::Integer); assert_whole_input_is_token("1234567890", Lexer::integer, Type::Integer);
assert_whole_input_is_token("123456789ABCDEF0", Lexer::integer, Type::Integer);
} }
#[test] #[test]
fn base16() { fn base16() {