token & lexer: add line & col to tokens
This commit is contained in:
parent
545483dae6
commit
c15490847a
@ -83,22 +83,31 @@ pub mod token {
|
|||||||
ty: Type,
|
ty: Type,
|
||||||
head: usize,
|
head: usize,
|
||||||
tail: usize,
|
tail: usize,
|
||||||
|
line: usize,
|
||||||
|
col: usize,
|
||||||
}
|
}
|
||||||
impl Token {
|
impl Token {
|
||||||
pub fn new(ty: Type, head: usize, tail: usize) -> Self {
|
pub fn new(ty: Type, head: usize, tail: usize, line: usize, col: usize) -> Self {
|
||||||
Self { ty, head, tail }
|
Self { ty, head, tail, line, col }
|
||||||
|
}
|
||||||
|
pub fn line(&self) -> usize {
|
||||||
|
self.line
|
||||||
|
}
|
||||||
|
pub fn col(&self) -> usize {
|
||||||
|
self.col
|
||||||
}
|
}
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
self.tail == self.head
|
self.tail == self.head
|
||||||
}
|
}
|
||||||
|
/// Gets the length of the token, in bytes
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
self.tail - self.head
|
self.tail - self.head
|
||||||
}
|
}
|
||||||
// Gets the [Type] of the token
|
/// Gets the [Type] of the token
|
||||||
pub fn ty(&self) -> Type {
|
pub fn ty(&self) -> Type {
|
||||||
self.ty
|
self.ty
|
||||||
}
|
}
|
||||||
// Gets the exclusive range of the token
|
/// Gets the exclusive range of the token
|
||||||
pub fn range(&self) -> Range<usize> {
|
pub fn range(&self) -> Range<usize> {
|
||||||
self.head..self.tail
|
self.head..self.tail
|
||||||
}
|
}
|
||||||
@ -118,25 +127,48 @@ pub mod lexer {
|
|||||||
pub struct Lexer<'t> {
|
pub struct Lexer<'t> {
|
||||||
text: &'t str,
|
text: &'t str,
|
||||||
cursor: usize,
|
cursor: usize,
|
||||||
|
line: usize,
|
||||||
|
col: usize,
|
||||||
}
|
}
|
||||||
/// Implements the non-terminals of a language
|
/// Implements the non-terminals of a language
|
||||||
impl<'t> Lexer<'t> {
|
impl<'t> Lexer<'t> {
|
||||||
pub fn new(text: &'t str) -> Self {
|
pub fn new(text: &'t str) -> Self {
|
||||||
Self { text, cursor: 0 }
|
Self { text, cursor: 0, line: 1, col: 1 }
|
||||||
|
}
|
||||||
|
/// Counts some length
|
||||||
|
#[inline]
|
||||||
|
fn count_len(&mut self, len: usize) -> &mut Self {
|
||||||
|
self.cursor += len;
|
||||||
|
self.col += len;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
/// Counts a line
|
||||||
|
#[inline]
|
||||||
|
fn count_line(&mut self, lines: usize) -> &mut Self {
|
||||||
|
self.line += lines;
|
||||||
|
self.col = 1;
|
||||||
|
self
|
||||||
}
|
}
|
||||||
/// Skips whitespace in the text
|
/// Skips whitespace in the text
|
||||||
fn skip_whitespace(&mut self) {
|
fn skip_whitespace(&mut self) {
|
||||||
if let Some(len) = Rule::new(self.text()).and_any(Rule::whitespace).end() {
|
self.count_len(
|
||||||
self.cursor += len
|
Rule::new(self.text())
|
||||||
|
.and_any(Rule::whitespace_not_newline)
|
||||||
|
.end()
|
||||||
|
.unwrap_or_default(),
|
||||||
|
);
|
||||||
|
if Rule::new(self.text()).char('\n').end().is_some() {
|
||||||
|
// recurse until all newlines are skipped
|
||||||
|
self.count_len(1).count_line(1).skip_whitespace();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// Advances the cursor and produces a token from a provided [Rule] function
|
/// Advances the cursor and produces a token from a provided [Rule] function
|
||||||
fn map_rule<F>(&mut self, rule: F, ty: Type) -> Option<Token>
|
fn map_rule<F>(&mut self, rule: F, ty: Type) -> Option<Token>
|
||||||
where F: Fn(Rule) -> Rule {
|
where F: Fn(Rule) -> Rule {
|
||||||
self.skip_whitespace();
|
self.skip_whitespace();
|
||||||
let start = self.cursor;
|
let (line, col, start) = (self.line, self.col, self.cursor);
|
||||||
self.cursor += Rule::new(self.text()).and(rule).end()?;
|
self.count_len(Rule::new(self.text()).and(rule).end()?);
|
||||||
Some(Token::new(ty, start, self.cursor))
|
Some(Token::new(ty, start, self.cursor, line, col))
|
||||||
}
|
}
|
||||||
/// Gets a slice of text beginning at the cursor
|
/// Gets a slice of text beginning at the cursor
|
||||||
fn text(&self) -> &str {
|
fn text(&self) -> &str {
|
||||||
@ -553,6 +585,10 @@ pub mod lexer {
|
|||||||
pub fn whitespace(self) -> Self {
|
pub fn whitespace(self) -> Self {
|
||||||
self.char_fn(|c| c.is_whitespace())
|
self.char_fn(|c| c.is_whitespace())
|
||||||
}
|
}
|
||||||
|
/// Matches one whitespace, except `'\n'`
|
||||||
|
pub fn whitespace_not_newline(self) -> Self {
|
||||||
|
self.char_fn(|c| '\n' != c && c.is_whitespace())
|
||||||
|
}
|
||||||
/// Matches anything but whitespace
|
/// Matches anything but whitespace
|
||||||
pub fn not_whitespace(self) -> Self {
|
pub fn not_whitespace(self) -> Self {
|
||||||
self.char_fn(|c| !c.is_whitespace())
|
self.char_fn(|c| !c.is_whitespace())
|
||||||
@ -587,6 +623,7 @@ pub mod lexer {
|
|||||||
pub fn string_escape(self) -> Self {
|
pub fn string_escape(self) -> Self {
|
||||||
self.char('\\').and(Rule::any)
|
self.char('\\').and(Rule::any)
|
||||||
}
|
}
|
||||||
|
/// Performs a consuming condition assertion on the input
|
||||||
fn has(self, condition: impl Fn(&Self) -> bool, len: usize) -> Self {
|
fn has(self, condition: impl Fn(&Self) -> bool, len: usize) -> Self {
|
||||||
let len = next_utf8(self.text, len);
|
let len = next_utf8(self.text, len);
|
||||||
self.and(|rule| match condition(&rule) && !rule.text.is_empty() {
|
self.and(|rule| match condition(&rule) && !rule.text.is_empty() {
|
||||||
@ -629,12 +666,15 @@ mod tests {
|
|||||||
use crate::token::*;
|
use crate::token::*;
|
||||||
#[test]
|
#[test]
|
||||||
fn token_has_type() {
|
fn token_has_type() {
|
||||||
assert_eq!(Token::new(Type::Comment, 0, 10).ty(), Type::Comment);
|
assert_eq!(Token::new(Type::Comment, 0, 10, 1, 1).ty(), Type::Comment);
|
||||||
assert_eq!(Token::new(Type::Identifier, 0, 10).ty(), Type::Identifier);
|
assert_eq!(
|
||||||
|
Token::new(Type::Identifier, 0, 10, 1, 1).ty(),
|
||||||
|
Type::Identifier
|
||||||
|
);
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn token_has_range() {
|
fn token_has_range() {
|
||||||
let t = Token::new(Type::Comment, 0, 10);
|
let t = Token::new(Type::Comment, 0, 10, 1, 1);
|
||||||
assert_eq!(t.range(), 0..10);
|
assert_eq!(t.range(), 0..10);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -850,7 +890,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn escape_string() {
|
fn escape_string() {
|
||||||
assert_whole_input_is_token(
|
assert_whole_input_is_token(
|
||||||
r#"" \"This is a quote\" ""#,
|
"\" \\\"This is a quote\\\" \"",
|
||||||
Lexer::lit_string,
|
Lexer::lit_string,
|
||||||
Type::LitString,
|
Type::LitString,
|
||||||
);
|
);
|
||||||
|
Loading…
Reference in New Issue
Block a user