constr: Misuse iterators to parse tokens
This commit is contained in:
10
constr/Cargo.toml
Normal file
10
constr/Cargo.toml
Normal file
@@ -0,0 +1,10 @@
|
||||
[package]
|
||||
name = "constr"
|
||||
version.workspace = true
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
236
constr/src/lib.rs
Normal file
236
constr/src/lib.rs
Normal file
@@ -0,0 +1,236 @@
|
||||
//! [String] tools for Conlang
|
||||
//#![warn(clippy::all)]
|
||||
#![feature(decl_macro, const_trait_impl)]
|
||||
|
||||
impl<T: Iterator> ConstrTools for T {}
|
||||
pub trait ConstrTools {
|
||||
/// Unescapes string escape sequences
|
||||
fn unescape(self) -> UnescapeString<Self>
|
||||
where Self: Iterator<Item = char> + Sized {
|
||||
UnescapeString::new(self)
|
||||
}
|
||||
fn parse_int<O>(self) -> ParseInt<Self, O>
|
||||
where Self: Iterator<Item = char> + Sized {
|
||||
ParseInt::new(self)
|
||||
}
|
||||
}
|
||||
|
||||
pub use unescape_string::UnescapeString;
|
||||
pub mod unescape_string {
|
||||
//! TODO: Write the module-level documentation
|
||||
pub struct UnescapeString<I: Iterator<Item = char>> {
|
||||
inner: I,
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item = char>> Iterator for UnescapeString<I> {
|
||||
type Item = I::Item;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.unescape()
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item = char>> UnescapeString<I> {
|
||||
pub fn new(inner: I) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
/// Consumes an escape sequence. See the [module level documentation](self).
|
||||
pub fn unescape(&mut self) -> Option<char> {
|
||||
match self.inner.next()? {
|
||||
'\\' => (),
|
||||
other => return Some(other),
|
||||
}
|
||||
Some(match self.inner.next()? {
|
||||
'a' => '\x07',
|
||||
'b' => '\x08',
|
||||
'f' => '\x0c',
|
||||
'n' => '\n',
|
||||
't' => '\t',
|
||||
'x' => self.hex_digits::<2>()?,
|
||||
'u' => self.hex_digits::<4>()?,
|
||||
'U' => self.hex_digits::<8>()?,
|
||||
'0' => '\0',
|
||||
byte => byte,
|
||||
})
|
||||
}
|
||||
fn hex_digits<const DIGITS: u32>(&mut self) -> Option<char> {
|
||||
let mut out = 0;
|
||||
for _ in 0..DIGITS {
|
||||
out = (out << 4) + self.hex_digit()? as u32;
|
||||
}
|
||||
char::from_u32(out)
|
||||
}
|
||||
fn hex_digit(&mut self) -> Option<u8> {
|
||||
super::base::<16>(self.inner.next()?)
|
||||
}
|
||||
}
|
||||
}
|
||||
pub use parse_int::ParseInt;
|
||||
pub mod parse_int {
|
||||
use std::marker::PhantomData;
|
||||
|
||||
pub struct ParseInt<I: Iterator<Item = char>, O> {
|
||||
inner: I,
|
||||
_data: PhantomData<O>,
|
||||
}
|
||||
impl<I: Iterator<Item = char>, O> ParseInt<I, O> {
|
||||
pub fn new(inner: I) -> Self {
|
||||
Self { inner, _data: Default::default() }
|
||||
}
|
||||
fn digit<const B: u8>(&mut self) -> Option<u8> {
|
||||
let next = loop {
|
||||
match self.inner.next()? {
|
||||
'_' => continue,
|
||||
c => break c,
|
||||
}
|
||||
};
|
||||
super::base::<B>(next)
|
||||
}
|
||||
}
|
||||
parse_int_impl!(u8, i8, u16, i16, u32, i32, u64, i64, u128, i128);
|
||||
macro parse_int_impl($($T:ty),*$(,)?) {$(
|
||||
impl<I: Iterator<Item = char>> ParseInt<I, $T> {
|
||||
fn digits<const B: u8>(&mut self, init: Option<u8>) -> Option<$T> {
|
||||
let mut out = match init {
|
||||
Some(digit) => digit,
|
||||
None => self.digit::<B>()?,
|
||||
} as $T;
|
||||
while let Some(digit) = self.digit::<B>() {
|
||||
out = out.checked_mul(B as $T)?.checked_add(digit as $T)?
|
||||
}
|
||||
Some(out)
|
||||
}
|
||||
fn base(&mut self) -> Option<$T> {
|
||||
match self.inner.next()? {
|
||||
'b' => self.digits::<2>(None),
|
||||
'd' => self.digits::<10>(None),
|
||||
'o' => self.digits::<8>(None),
|
||||
'x' => self.digits::<16>(None),
|
||||
c => self.digits::<10>(Some(super::base::<10>(c)?)),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<I: Iterator<Item = char>> Iterator for ParseInt<I, $T> {
|
||||
type Item = $T;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.digit::<10>()? {
|
||||
0 => self.base(),
|
||||
c if (0..=9).contains(&c) => self.digits::<10>(Some(c)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
)*}
|
||||
}
|
||||
|
||||
/// Converts a single char [0-9A-Za-z] to their [base B](base::<B>) equivalent.
|
||||
///
|
||||
/// # May Panic
|
||||
/// Panics in debug mode when B > 36
|
||||
pub const fn base<const B: u8>(c: char) -> Option<u8> {
|
||||
// TODO: Wait for a way to limit const generics at compile time
|
||||
debug_assert!(B <= 36);
|
||||
// Can't use Ord::min in const context yet :(
|
||||
// This function also relies on wrapping arithmetic
|
||||
macro wrap ($c:ident - $b:literal $(+ $ten:literal)? $(< $B:ident.min($min:literal))?) {
|
||||
$c.wrapping_sub($b)$(.wrapping_add($ten))? $(< if $B < $min {$B} else {$min})?
|
||||
}
|
||||
let c = c as u8;
|
||||
match c {
|
||||
c if wrap!(c - b'0' < B.min(10)) => Some(wrap!(c - b'0')),
|
||||
_ if B <= 10 => None, // cuts base<1..=10> to 4 instructions on x86 :^)
|
||||
c if wrap!(c - b'A' + 10 < B.min(36)) => Some(wrap!(c - b'A' + 10)),
|
||||
c if wrap!(c - b'a' + 10 < B.min(36)) => Some(wrap!(c - b'a' + 10)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
mod unescape_string {
|
||||
use super::*;
|
||||
test_unescape! {
|
||||
empty = ["" => ""];
|
||||
n_newline = ["\\n" => "\n", "This is a\\ntest" => "This is a\ntest"];
|
||||
a_bell = ["\\a" => "\x07", "Ring the \\abell" => "Ring the \x07bell"];
|
||||
b_backspace = ["\\b" => "\x08"];
|
||||
f_feed = ["\\f" => "\x0c"];
|
||||
t_tab = ["\\t" => "\t"];
|
||||
_0_nul = ["\\0" => "\0"];
|
||||
x_hex = [
|
||||
"\\x41\\x41\\x41\\x41" => "AAAA",
|
||||
"\x00" => "\0",
|
||||
"\\x7f" => "\x7f",
|
||||
"\\x80" => "\u{80}",
|
||||
"\\xD0" => "\u{D0}",
|
||||
];
|
||||
}
|
||||
macro test_unescape ($($f:ident = [$($test:expr => $expect:expr),*$(,)?];)*) {$(
|
||||
#[test] fn $f () {
|
||||
$(assert_eq!($test.chars().unescape().collect::<String>(), dbg!($expect));)*
|
||||
}
|
||||
)*}
|
||||
}
|
||||
mod parse_int {
|
||||
use super::*;
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn base_37_panics() {
|
||||
base::<37>('a');
|
||||
}
|
||||
test_parse! {
|
||||
parse_u8: u8 = [
|
||||
"0xc5" => Some(0xc5),
|
||||
"0xc_____________________5" => Some(0xc5),
|
||||
"0x7d" => Some(0x7d),
|
||||
"0b10" => Some(0b10),
|
||||
"0o10" => Some(0o10),
|
||||
"0x10" => Some(0x10),
|
||||
"0d10" => Some(10),
|
||||
"10" => Some(10),
|
||||
];
|
||||
parse_u16: u16 = [
|
||||
"0xc5c5" => Some(0xc5c5),
|
||||
"0x1234" => Some(0x1234),
|
||||
"0x5678" => Some(0x5678),
|
||||
"0x9abc" => Some(0x9abc),
|
||||
"0xdef0" => Some(0xdef0),
|
||||
"0xg" => None,
|
||||
"0b10" => Some(0b10),
|
||||
"0o10" => Some(0o10),
|
||||
"0x10" => Some(0x10),
|
||||
"0d10" => Some(10),
|
||||
"10" => Some(10),
|
||||
];
|
||||
parse_u32: u32 = [
|
||||
"0xc5c5c5c5" => Some(0xc5c5c5c5),
|
||||
"0xc5_c5_c5_c5" => Some(0xc5c5c5c5),
|
||||
"1_234_567____" => Some(1234567),
|
||||
"4294967295" => Some(4294967295),
|
||||
"4294967296" => None,
|
||||
"🦈" => None,
|
||||
];
|
||||
parse_u64: u64 = [
|
||||
"0xffffffffffffffff" => Some(0xffffffffffffffff),
|
||||
"0x10000000000000000" => None,
|
||||
"0xc5c5c5c5c5c5c5c5" => Some(0xc5c5c5c5c5c5c5c5),
|
||||
"0x123456789abcdef0" => Some(1311768467463790320),
|
||||
"0x123456789abcdefg" => Some(81985529216486895),
|
||||
"0d1234567890" => Some(1234567890),
|
||||
"0o12345670" => Some(2739128),
|
||||
"0b10" => Some(2),
|
||||
];
|
||||
parse_u128: u128 = [
|
||||
"0x10000000000000000" => Some(0x10000000000000000),
|
||||
"0xc5c5c5c5c5c5c5c5c5c5c5c5c5c5c5c5" => Some(0xc5c5c5c5c5c5c5c5c5c5c5c5c5c5c5c5),
|
||||
"0o77777777777777777777777777777777" => Some(0o77777777777777777777777777777777),
|
||||
];
|
||||
}
|
||||
macro test_parse ($($f:ident : $T:ty = [$($test:expr => $expect:expr),*$(,)?];)*) {$(
|
||||
#[test] fn $f () {
|
||||
type Test = $T;
|
||||
$(assert_eq!(($test.chars().parse_int() as ParseInt<_, Test>).next(), dbg!($expect));)*
|
||||
}
|
||||
)*}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user