//! [String] tools for Conlang //#![warn(clippy::all)] #![feature(decl_macro, const_trait_impl)] impl ConstrTools for T {} pub trait ConstrTools { /// Unescapes string escape sequences fn unescape(self) -> UnescapeString where Self: Iterator + Sized { UnescapeString::new(self) } /// Parse an integer fn parse_int(self) -> ParseInt where Self: Iterator + Sized { ParseInt::new(self) } } pub use unescape_string::UnescapeString; pub mod unescape_string { //! TODO: Write the module-level documentation pub struct UnescapeString> { inner: I, } impl> Iterator for UnescapeString { type Item = I::Item; fn next(&mut self) -> Option { self.unescape() } } impl> UnescapeString { pub fn new(inner: I) -> Self { Self { inner } } /// Consumes an escape sequence. See the [module level documentation](self). pub fn unescape(&mut self) -> Option { match self.inner.next()? { '\\' => (), other => return Some(other), } Some(match self.inner.next()? { 'a' => '\x07', 'b' => '\x08', 'f' => '\x0c', 'n' => '\n', 'r' => '\r', 't' => '\t', 'x' => self.hex_digits::<2>()?, 'u' => self.unicode()?, '0' => '\0', byte => byte, }) } fn unicode(&mut self) -> Option { let mut out = 0; let Some('{') = self.inner.next() else { return None; }; for c in self.inner.by_ref() { match c { '}' => return char::from_u32(out), _ => out = (out << 4) + super::base::<16>(c)? as u32, } } None } fn hex_digits(&mut self) -> Option { let mut out = 0; for _ in 0..DIGITS { out = (out << 4) + self.hex_digit()? as u32; } char::from_u32(out) } fn hex_digit(&mut self) -> Option { super::base::<16>(self.inner.next()?) } } } pub use parse_int::ParseInt; pub mod parse_int { use std::marker::PhantomData; pub struct ParseInt, O> { inner: I, _data: PhantomData, } impl, O> ParseInt { pub fn new(inner: I) -> Self { Self { inner, _data: Default::default() } } fn digit(&mut self) -> Option { let next = loop { match self.inner.next()? { '_' => continue, c => break c, } }; super::base::(next) } } parse_int_impl!(u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); macro parse_int_impl($($T:ty),*$(,)?) {$( impl> ParseInt { fn digits(&mut self, init: Option) -> Option<$T> { let mut out = match init { Some(digit) => digit, None => self.digit::()?, } as $T; while let Some(digit) = self.digit::() { out = out.checked_mul(B as $T)?.checked_add(digit as $T)? } Some(out) } fn base(&mut self) -> Option<$T> { match self.inner.next()? { 'b' => self.digits::<2>(None), 'd' => self.digits::<10>(None), 'o' => self.digits::<8>(None), 'x' => self.digits::<16>(None), c => self.digits::<10>(Some(super::base::<10>(c)?)), } } } impl> Iterator for ParseInt { type Item = $T; fn next(&mut self) -> Option { match self.digit::<10>()? { 0 => self.base(), c if (0..=9).contains(&c) => self.digits::<10>(Some(c)), _ => None, } } } )*} } /// Converts a single char [0-9A-Za-z] to their [base B](base::) equivalent. /// /// # May Panic /// Panics in debug mode when B > 36 pub const fn base(c: char) -> Option { // TODO: Wait for a way to limit const generics at compile time debug_assert!(B <= 36); // Can't use Ord::min in const context yet :( // This function also relies on wrapping arithmetic macro wrap ($c:ident - $b:literal $(+ $ten:literal)? $(< $B:ident.min($min:literal))?) { $c.wrapping_sub($b)$(.wrapping_add($ten))? $(< if $B < $min {$B} else {$min})? } let c = c as u8; match c { c if wrap!(c - b'0' < B.min(10)) => Some(wrap!(c - b'0')), _ if B <= 10 => None, // cuts base<1..=10> to 4 instructions on x86 :^) c if wrap!(c - b'A' + 10 < B.min(36)) => Some(wrap!(c - b'A' + 10)), c if wrap!(c - b'a' + 10 < B.min(36)) => Some(wrap!(c - b'a' + 10)), _ => None, } } #[cfg(test)] mod tests { use super::*; mod unescape_string { use super::*; test_unescape! { empty = ["" => ""]; n_newline = ["\\n" => "\n", "This is a\\ntest" => "This is a\ntest"]; a_bell = ["\\a" => "\x07", "Ring the \\abell" => "Ring the \x07bell"]; b_backspace = ["\\b" => "\x08"]; f_feed = ["\\f" => "\x0c"]; r_return = ["\\r" => "\r"]; t_tab = ["\\t" => "\t"]; _0_nul = ["\\0" => "\0"]; x_hex = [ "\\x41\\x41\\x41\\x41" => "AAAA", "\x00" => "\0", "\\x7f" => "\x7f", "\\x80" => "\u{80}", "\\xD0" => "\u{D0}", ]; u_unicode = [ "\\u{41}" => "A", "\\u{1f988}" => "🦈", ]; } macro test_unescape ($($f:ident = [$($test:expr => $expect:expr),*$(,)?];)*) {$( #[test] fn $f () { $(assert_eq!($test.chars().unescape().collect::(), dbg!($expect));)* } )*} } mod parse_int { use super::*; #[test] #[should_panic] fn base_37_panics() { base::<37>('a'); } test_parse! { parse_u8: u8 = [ "0xc5" => Some(0xc5), "0xc_____________________5" => Some(0xc5), "0x7d" => Some(0x7d), "0b10" => Some(0b10), "0o10" => Some(0o10), "0x10" => Some(0x10), "0d10" => Some(10), "10" => Some(10), ]; parse_u16: u16 = [ "0xc5c5" => Some(0xc5c5), "0x1234" => Some(0x1234), "0x5678" => Some(0x5678), "0x9abc" => Some(0x9abc), "0xdef0" => Some(0xdef0), "0xg" => None, "0b10" => Some(0b10), "0o10" => Some(0o10), "0x10" => Some(0x10), "0d10" => Some(10), "10" => Some(10), ]; parse_u32: u32 = [ "0xc5c5c5c5" => Some(0xc5c5c5c5), "0xc5_c5_c5_c5" => Some(0xc5c5c5c5), "1_234_567____" => Some(1234567), "4294967295" => Some(4294967295), "4294967296" => None, "🦈" => None, ]; parse_u64: u64 = [ "0xffffffffffffffff" => Some(0xffffffffffffffff), "0x10000000000000000" => None, "0xc5c5c5c5c5c5c5c5" => Some(0xc5c5c5c5c5c5c5c5), "0x123456789abcdef0" => Some(1311768467463790320), "0x123456789abcdefg" => Some(81985529216486895), "0d1234567890" => Some(1234567890), "0o12345670" => Some(2739128), "0b10" => Some(2), ]; parse_u128: u128 = [ "0x10000000000000000" => Some(0x10000000000000000), "0xc5c5c5c5c5c5c5c5c5c5c5c5c5c5c5c5" => Some(0xc5c5c5c5c5c5c5c5c5c5c5c5c5c5c5c5), "0o77777777777777777777777777777777" => Some(0o77777777777777777777777777777777), ]; } macro test_parse ($($f:ident : $T:ty = [$($test:expr => $expect:expr),*$(,)?];)*) {$( #[test] fn $f () { type Test = $T; $(assert_eq!(($test.chars().parse_int() as ParseInt<_, Test>).next(), dbg!($expect));)* } )*} } }