// © 2023 John Breaux
//! A [Token] is a [semantically tagged](Type) sequence of characters

use crate::Error;
use regex::Regex;
use std::{
    fmt::{Debug, Display},
    sync::OnceLock,
};

/// Implements regex matching functions on [`Token`] for each [`Type`],
/// and implements [`From<&str>`] for [`Token`]
macro_rules! regex_impl {
(<$t:lifetime> $type:ty {$(
    $(#[$meta:meta])*
    pub fn $func:ident (text: &str) -> Option<Self> {
        regex!($out:path = $re:literal)
    }
)*}) => {
impl<$t> $type {
    /// Lexes a token only for the expected `variant`
    ///
    /// Warning: This bypasses precedence rules. Only use for specific patterns.
    pub fn expect(text: &$t str, expected: Type) -> Result<Self, Error> {
        match expected {$(
            $out => Self::$func(text),
        )*}.ok_or(Error::UnexpectedToken {
            expected,
            got: Self::from(text).into(),
        })
    }
    $(
    $(#[$meta])*
    /// Tries to read [`
    #[doc = stringify!($out)]
    /// `] from `text`
    pub fn $func(text: &$t str) -> Option<Self> {
        static RE: OnceLock<Regex> = OnceLock::new();
        let lexeme = RE.get_or_init(|| Regex::new($re).unwrap())
            .find(text)?.into();
        Some(Self { variant: $out, lexeme })
    })*
}
impl<$t> From<&$t str> for $type {
    fn from (value: &$t str) -> Self {
        $(
            if let Some(token) = Self::$func(value) {
                token
            } else
        )*
        {todo!("Unexpected input: {value:#?} (Tokenization failure)")}
    }
}
};
}

/// A [Token] is a [semantically tagged](Type) sequence of characters
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token<'text> {
    /// The type of this token
    variant: Type,
    /// The sub[str]ing corresponding to this token
    lexeme: &'text str,
}

impl<'text> Token<'text> {
    /// Returns the [Type] of this [Token]
    pub fn variant(&self) -> Type { self.variant }

    /// Returns the lexeme (originating string slice) of this token
    pub fn lexeme(&self) -> &'text str { self.lexeme }

    /// Parses this [Token] into another type
    pub fn parse<F>(&self) -> Result<F, <F as std::str::FromStr>::Err>
    where F: std::str::FromStr {
        self.lexeme.parse()
    }
    /// Returns whether the Lexeme is the expected [Type]
    pub fn is_variant(&self, expected: Type) -> bool { self.variant == expected }

    /// Returns the length of [Self::lexeme] in bytes.
    pub fn len(&self) -> usize { self.lexeme.len() }

    /// Returns `true` if [Self::lexeme] has a length of zero bytes.
    pub fn is_empty(&self) -> bool { self.lexeme.is_empty() }
}

impl<'text> Debug for Token<'text> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_list().entry(&self.variant).entry(&self.lexeme).finish()
    }
}

impl<'text> Display for Token<'text> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self.variant {
            Type::Endl | Type::EndOfFile => Display::fmt(&self.variant, f),
            v => write!(f, "{v} \"{}\"", self.lexeme),
        }
    }
}

/// A [token Type](Type) is a semantic tag for a sequence of characters
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Type {
    /// contiguous whitespace, excluding newline
    Space,
    /// newline and contiguous whitespace
    Endl,
    /// A line-comment
    Comment,
    /// Jump label *definition*
    Label,
    /// Instructions
    Insn,
    /// Operand width is byte
    ByteWidth,
    /// Operand width is word
    WordWidth,
    /// Register mnemonic (i.e. `pc`, `r14`)
    Register,
    /// Marker for base-10
    RadixMarkerDec,
    /// Marker for base-16
    RadixMarkerHex,
    /// Marker for base-8
    RadixMarkerOct,
    /// Marker for base-2
    RadixMarkerBin,
    /// 1-4 hexadigit numbers only
    Number,
    /// Negative number marker
    Minus,
    /// post-increment mode marker
    Plus,
    /// Open-Indexed-Mode marker
    LParen,
    /// Close-Indexed-Mode marker
    RParen,
    /// Indirect mode marker
    Indirect,
    /// absolute address marker
    Absolute,
    /// immediate value marker
    Immediate,
    /// Valid identifier. Identifiers must start with a Latin alphabetic character or underline
    Identifier,
    /// Assembler directive
    Directive,
    /// Separator (comma)
    Separator,
    /// End of File marker
    #[default]
    EndOfFile,
    /// Invalid token
    Invalid,
}

regex_impl! {<'text> Token<'text> {
    pub fn expect_space(text: &str) -> Option<Self> {
        regex!(Type::Space = r"^[\s--\n]+")
    }
    pub fn expect_endl(text: &str) -> Option<Self> {
        regex!(Type::Endl = r"^\n[\s--\n]*")
    }
    pub fn expect_comment(text: &str) -> Option<Self> {
        regex!(Type::Comment = r"^(;|//|<.*>|\{.*\}).*")
    }
    pub fn expect_label(text: &str) -> Option<Self> {
        regex!(Type::Label = r"^:")
    }
    pub fn expect_insn(text: &str) -> Option<Self> {
        regex!(Type::Insn = r"(?i)^(adc|addc?|and|bi[cs]|bitb?|br|call|clr[cnz]?|cmp|dad[cd]|decd?|[de]int|incd?|inv|j([cz]|eq|ge|hs|lo?|mp|n[cez]?)|mov|[np]op|push|reti?|r[lr][ac]|sbc|set[cnz]|subc?|swpb|sxt|tst|xor)(?-u:\b)")
    }
    pub fn expect_byte_width(text: &str) -> Option<Self> {
        regex!(Type::ByteWidth = r"(?i)^\.b")
    }
    pub fn expect_word_width(text: &str) -> Option<Self> {
        regex!(Type::WordWidth = r"(?i)^\.w")
    }
    pub fn expect_register(text: &str) -> Option<Self> {
        // old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)")
        regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)(?-u:\b)")
    }
    pub fn expect_radix_marker_dec(text: &str) -> Option<Self> {
        regex!(Type::RadixMarkerDec = r"(?i)^0d")
    }
    pub fn expect_radix_marker_hex(text: &str) -> Option<Self> {
        regex!(Type::RadixMarkerHex = r"(?i)^(0x|\$)")
    }
    pub fn expect_radix_marker_oct(text: &str) -> Option<Self> {
        regex!(Type::RadixMarkerOct = r"(?i)^0o")
    }
    pub fn expect_radix_marker_bin(text: &str) -> Option<Self> {
        regex!(Type::RadixMarkerBin = r"(?i)^0b")
    }
    pub fn expect_number(text: &str) -> Option<Self> {
        regex!(Type::Number = r"^+?[[:xdigit:]]+(?-u:\b)")
    }
    pub fn expect_minus(text: &str) -> Option<Self> {
        regex!(Type::Minus = r"^-")
    }
    pub fn expect_plus(text: &str) -> Option<Self> {
        regex!(Type::Plus = r"^\+")
    }
    pub fn expect_l_paren(text: &str) -> Option<Self> {
        regex!(Type::LParen = r"^\(")
    }
    pub fn expect_r_paren(text: &str) -> Option<Self> {
        regex!(Type::RParen = r"^\)")
    }
    pub fn expect_indrect(text: &str) -> Option<Self> {
        regex!(Type::Indirect = r"^@")
    }
    pub fn expect_absolute(text: &str) -> Option<Self> {
        regex!(Type::Absolute = r"^&")
    }
    pub fn expect_immediate(text: &str) -> Option<Self> {
        regex!(Type::Immediate = r"^#")
    }
    pub fn expect_directive(text: &str) -> Option<Self> {
        regex!(Type::Directive = r"^\.\S+")
    }
    pub fn expect_identifier(text: &str) -> Option<Self> {
        regex!(Type::Identifier = r"^[A-Za-z_]\w*")
    }
    pub fn expect_separator(text: &str) -> Option<Self> {
        regex!(Type::Separator = r"^,")
    }
    pub fn expect_end_of_file(text: &str) -> Option<Self> {
        regex!(Type::EndOfFile = r"^$")
    }
    pub fn expect_anything(text: &str) -> Option<Self> {
        regex!(Type::Invalid = r"^.*")
    }
}}

impl Display for Type {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Space => Display::fmt("space", f),
            Self::Endl => Display::fmt("newline", f),
            Self::Comment => Display::fmt("comment", f),
            Self::Label => Display::fmt("label definition", f),
            Self::Insn => Display::fmt("opcode", f),
            Self::ByteWidth => Display::fmt("byte-width", f),
            Self::WordWidth => Display::fmt("word-width", f),
            Self::Register => Display::fmt("register", f),
            Self::RadixMarkerDec => Display::fmt("decimal marker", f),
            Self::RadixMarkerHex => Display::fmt("hexadecimal marker", f),
            Self::RadixMarkerOct => Display::fmt("octal marker", f),
            Self::RadixMarkerBin => Display::fmt("binary marker", f),
            Self::Number => Display::fmt("number", f),
            Self::Minus => Display::fmt("minus sign", f),
            Self::Plus => Display::fmt("plus sign", f),
            Self::LParen => Display::fmt("left parenthesis", f),
            Self::RParen => Display::fmt("right parenthesis", f),
            Self::Indirect => Display::fmt("indirect", f),
            Self::Absolute => Display::fmt("absolute", f),
            Self::Immediate => Display::fmt("immediate", f),
            Self::Identifier => Display::fmt("identifier", f),
            Self::Directive => Display::fmt("directive", f),
            Self::Separator => Display::fmt("comma", f),
            Self::EndOfFile => Display::fmt("EOF", f),
            Self::Invalid => Display::fmt("invalid token", f),
        }
    }
}

/// A [Token] which can outlive its parent buffer
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct OwnedToken {
    /// The type of this token
    variant: Type,
    /// The sub[String] corresponding to this token
    lexeme: String,
}

impl Display for OwnedToken {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", Token::from(self)) }
}

impl<'t> From<&'t OwnedToken> for Token<'t> {
    fn from(value: &'t OwnedToken) -> Self { Token { variant: value.variant, lexeme: &value.lexeme } }
}

impl From<Token<'_>> for OwnedToken {
    fn from(value: Token<'_>) -> Self {
        let Token { variant, lexeme } = value;
        OwnedToken { variant, lexeme: lexeme.to_owned() }
    }
}

/// [Types] are an owned array of [types](Type), with a custom [Display] implementation
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Types(Vec<Type>);

impl<T: AsRef<[Type]>> From<T> for Types {
    // TODO: Possibly bad. Check out in rust playground.
    fn from(value: T) -> Self { Self(value.as_ref().to_owned()) }
}

impl Display for Types {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        for (idx, t) in self.0.iter().enumerate() {
            Display::fmt(t, f)?;
            match idx {
                i if i < self.0.len() - 2 => Display::fmt(", ", f)?,
                i if i < self.0.len() - 1 => Display::fmt(" or ", f)?,
                _ => (),
            }
        }
        Ok(())
    }
}