Unit tests: Add more lexer tests, add parser tests

2024-01-31 17:59:55 -06:00
parent 860c9d4a97
commit 11bae9b348
2 changed files with 388 additions and 23 deletions
--- a/src/lexer/tests.rs
+++ b/src/lexer/tests.rs
@@ -1,7 +1,11 @@
 use super::*;
 macro_rules! lex {
-    (type ($t:tt), $expected:expr) => {
-        let token = Lexer::new(stringify!($t)).scan().expect(stringify!($t:tt should yield a valid token));
+    (type ($($t:tt)*), $expected:expr) => {
+        let token = Lexer::new(stringify!($($t)*)).scan().expect(stringify!($($t:tt)* should yield a valid token));
+        assert_eq!(token.kind, $expected);
+    };
+    (str $t:literal, $expected:expr) => {
+        let token = Lexer::new($t).scan().expect(stringify!($t:tt should yield a valid token));
        assert_eq!(token.kind, $expected);
    };
    ({ $($t:tt)* }) => {
@@ -40,27 +44,132 @@ fn no_operand_emulated() {
    lex!(type (eint), TokenKind::NoEm(NoEm::Eint)); // eint should be a valid NoEm
 }
 #[test]
+fn br() {
+    lex!(type (br), TokenKind::Special(Special::Br));
+}
+#[test]
+fn one_operand_emulated() {
+    lex!(type (pop), TokenKind::OneEm(OneEm::Pop));
+    lex!(type (rla), TokenKind::OneEm(OneEm::Rla));
+    lex!(type (rlc), TokenKind::OneEm(OneEm::Rlc));
+    lex!(type (inv), TokenKind::OneEm(OneEm::Inv));
+    lex!(type (clr), TokenKind::OneEm(OneEm::Clr));
+    lex!(type (tst), TokenKind::OneEm(OneEm::Tst));
+    lex!(type (dec), TokenKind::OneEm(OneEm::Dec));
+    lex!(type (decd), TokenKind::OneEm(OneEm::Decd));
+    lex!(type (inc), TokenKind::OneEm(OneEm::Inc));
+    lex!(type (incd), TokenKind::OneEm(OneEm::Incd));
+    lex!(type (adc), TokenKind::OneEm(OneEm::Adc));
+    lex!(type (dadc), TokenKind::OneEm(OneEm::Dadc));
+    lex!(type (sbc), TokenKind::OneEm(OneEm::Sbc));
+}
+#[test]
+fn one_operand() {
+    lex!(type (rrc), TokenKind::OneArg(OneArg::Rrc));
+    lex!(type (swpb), TokenKind::OneArg(OneArg::Swpb));
+    lex!(type (rra), TokenKind::OneArg(OneArg::Rra));
+    lex!(type (sxt), TokenKind::OneArg(OneArg::Sxt));
+    lex!(type (push), TokenKind::OneArg(OneArg::Push));
+    lex!(type (call), TokenKind::OneArg(OneArg::Call));
+    lex!(type (reti), TokenKind::OneArg(OneArg::Reti));
+}
+#[test]
+fn two_operand() {
+    lex!(type (mov), TokenKind::TwoArg(TwoArg::Mov));
+    lex!(type (add), TokenKind::TwoArg(TwoArg::Add));
+    lex!(type (addc), TokenKind::TwoArg(TwoArg::Addc));
+    lex!(type (subc), TokenKind::TwoArg(TwoArg::Subc));
+    lex!(type (sub), TokenKind::TwoArg(TwoArg::Sub));
+    lex!(type (cmp), TokenKind::TwoArg(TwoArg::Cmp));
+    lex!(type (dadd), TokenKind::TwoArg(TwoArg::Dadd));
+    lex!(type (bit), TokenKind::TwoArg(TwoArg::Bit));
+    lex!(type (bic), TokenKind::TwoArg(TwoArg::Bic));
+    lex!(type (bis), TokenKind::TwoArg(TwoArg::Bis));
+    lex!(type (xor), TokenKind::TwoArg(TwoArg::Xor));
+    lex!(type (and), TokenKind::TwoArg(TwoArg::And));
+}
+#[test]
+fn jump() {
+    lex!(type (jne), TokenKind::Jump(Jump::Jne));
+    lex!(type (jnz), TokenKind::Jump(Jump::Jnz));
+    lex!(type (jeq), TokenKind::Jump(Jump::Jeq));
+    lex!(type (jz), TokenKind::Jump(Jump::Jz));
+    lex!(type (jnc), TokenKind::Jump(Jump::Jnc));
+    lex!(type (jlo), TokenKind::Jump(Jump::Jlo));
+    lex!(type (jc), TokenKind::Jump(Jump::Jc));
+    lex!(type (jhs), TokenKind::Jump(Jump::Jhs));
+    lex!(type (jn), TokenKind::Jump(Jump::Jn));
+    lex!(type (jge), TokenKind::Jump(Jump::Jge));
+    lex!(type (jl), TokenKind::Jump(Jump::Jl));
+    lex!(type (jmp), TokenKind::Jump(Jump::Jmp));
+}
+#[test]
 fn registers() {
-    lex!(type(pc), TokenKind::Reg(Reg::PC));
-    lex!(type(sp), TokenKind::Reg(Reg::SP));
-    lex!(type(sr), TokenKind::Reg(Reg::SR));
-    lex!(type(cg), TokenKind::Reg(Reg::CG));
-    lex!(type(r0), TokenKind::Reg(Reg::PC));
-    lex!(type(r1), TokenKind::Reg(Reg::SP));
-    lex!(type(r2), TokenKind::Reg(Reg::SR));
-    lex!(type(r3), TokenKind::Reg(Reg::CG));
-    lex!(type(r4), TokenKind::Reg(Reg::R4));
-    lex!(type(r5), TokenKind::Reg(Reg::R5));
-    lex!(type(r6), TokenKind::Reg(Reg::R6));
-    lex!(type(r7), TokenKind::Reg(Reg::R7));
-    lex!(type(r8), TokenKind::Reg(Reg::R8));
-    lex!(type(r9), TokenKind::Reg(Reg::R9));
-    lex!(type(r10), TokenKind::Reg(Reg::R10));
-    lex!(type(r11), TokenKind::Reg(Reg::R11));
-    lex!(type(r12), TokenKind::Reg(Reg::R12));
-    lex!(type(r13), TokenKind::Reg(Reg::R13));
-    lex!(type(r14), TokenKind::Reg(Reg::R14));
-    lex!(type(r15), TokenKind::Reg(Reg::R15));
+    lex!(type (pc), TokenKind::Reg(Reg::PC));
+    lex!(type (sp), TokenKind::Reg(Reg::SP));
+    lex!(type (sr), TokenKind::Reg(Reg::SR));
+    lex!(type (cg), TokenKind::Reg(Reg::CG));
+    lex!(type (r0), TokenKind::Reg(Reg::PC));
+    lex!(type (r1), TokenKind::Reg(Reg::SP));
+    lex!(type (r2), TokenKind::Reg(Reg::SR));
+    lex!(type (r3), TokenKind::Reg(Reg::CG));
+    lex!(type (r4), TokenKind::Reg(Reg::R4));
+    lex!(type (r5), TokenKind::Reg(Reg::R5));
+    lex!(type (r6), TokenKind::Reg(Reg::R6));
+    lex!(type (r7), TokenKind::Reg(Reg::R7));
+    lex!(type (r8), TokenKind::Reg(Reg::R8));
+    lex!(type (r9), TokenKind::Reg(Reg::R9));
+    lex!(type (r10), TokenKind::Reg(Reg::R10));
+    lex!(type (r11), TokenKind::Reg(Reg::R11));
+    lex!(type (r12), TokenKind::Reg(Reg::R12));
+    lex!(type (r13), TokenKind::Reg(Reg::R13));
+    lex!(type (r14), TokenKind::Reg(Reg::R14));
+    lex!(type (r15), TokenKind::Reg(Reg::R15));
 }

-// TODO: opcode tests, misc. special character tests, etc.
+#[test]
+fn delimiters() {
+    lex!(str "", TokenKind::Eof);
+    lex!(str "\n", TokenKind::Newline);
+    lex!(str "(", TokenKind::OpenParen);
+    lex!(str ")", TokenKind::CloseParen);
+    lex!(str "{", TokenKind::OpenCurly);
+    lex!(str "}", TokenKind::CloseCurly);
+    lex!(str "[", TokenKind::OpenBrace);
+    lex!(str "]", TokenKind::CloseBrace);
+}
+
+#[test]
+fn comment() {
+    lex!(str "; this is a comment!\n\n", TokenKind::Comment);
+}
+
+#[test]
+fn other() {
+    // lex!(type (), TokenKind::)
+    lex!(type (,), TokenKind::Comma);
+    lex!(type (:), TokenKind::Colon);
+    lex!(type (!), TokenKind::Bang);
+    lex!(type (@), TokenKind::At);
+    lex!(type (&), TokenKind::Amp);
+    lex!(type (|), TokenKind::Bar);
+    lex!(type (^), TokenKind::Caret);
+    lex!(type (*), TokenKind::Star);
+    lex!(type (#), TokenKind::Hash);
+    lex!(type ($), TokenKind::Dollar);
+    lex!(type (%), TokenKind::Percent);
+    lex!(type (+), TokenKind::Plus);
+    lex!(type (-), TokenKind::Minus);
+    lex!(type (/), TokenKind::Slash);
+    lex!(type (<<), TokenKind::Lsh);
+    lex!(type (>>), TokenKind::Rsh);
+    lex!(type (.directive), TokenKind::Directive);
+    lex!(type (identifier), TokenKind::Identifier);
+    lex!(type (.b), TokenKind::Byte);
+    lex!(type (.w), TokenKind::Word);
+}
+
+#[test]
+fn ignores_leading_whitespace() {
+    lex!(str " \u{a0}\t\t\t\t\t\t\t-", TokenKind::Minus);
+}
--- a/src/parser/tests.rs
+++ b/src/parser/tests.rs
@@ -0,0 +1,256 @@
+#![allow(non_upper_case_globals)]
+use super::*;
+use crate::lexer::token;
+
+/// Because [assert_matches](core::assert_matches::assert_matches) is unstable
+macro_rules! assert_matches {
+    ($e: expr, $($p: pat $(if $condition:expr)?)* ) => {
+        match $e {
+            $($p $(if $condition)? => (),)*
+            _ => panic!("{}", stringify!($e did not match $($p),*)),
+        }
+    };
+}
+
+/// Simplified grammar for constructing an expression
+macro_rules! expr {
+    ($ident:ident) => {
+        Expr::Ident(stringify!($ident)).into()
+    };
+    ($lit:literal) => {
+        Expr::Number($lit).into()
+    };
+    (& $lit:literal) => {
+        Expr::AddrOf($lit).into()
+    };
+    (($($t:tt)*)) => {
+        Expr::Group(expr!($($t)*))
+    };
+    ([$($op:tt)*] $($t:tt)*) => {
+        Expr::Unary(vec![$(UnOp::$op),*], expr!($($t)*))
+    };
+    (($($a:tt)*) $($op:tt ($($b:tt)*))+) => {
+        Expr::Binary(expr!($($a)*), vec![$((BinOp::$op, expr!($($b)*))),+])
+    }
+}
+
+macro_rules! passert {
+    ($expected:expr, $text:literal) => {
+        assert_eq!($expected, Parsable::parse($text).unwrap())
+    };
+}
+
+// #[test]
+// fn statements() {
+// passert!(, "");
+// }
+// #[test]
+// fn statement() {
+// passert!(, "");
+// }
+#[test]
+fn directive() {
+    passert!(Directive::Org(expr!(0x8000)), ".org 0x8000");
+    passert!(Directive::String("Hello, world!"), ".string \"Hello, world!\"");
+    assert_eq!(Directive::parse(".word 0x40").unwrap(), Directive::Word(expr!(0x40)));
+    passert!(
+        Directive::Words(vec![expr!(0x40), expr!(0x41), expr!(0x42), expr!(0x43)]),
+        ".words [ 0x40 0x41 0x42 0x43 ]"
+    );
+}
+// #[test]
+// fn instruction() {
+// passert!(, "");
+// }
+#[test]
+fn instruction_kind() {
+    assert_matches!(Parsable::parse("nop").unwrap(), InstructionKind::NoEm(NoEm { .. }));
+
+    assert_matches!(Parsable::parse("pop sp").unwrap(), InstructionKind::OneEm(OneEm { .. }));
+}
+#[test]
+fn no_em() {
+    passert!(NoEm { opcode: token::NoEm::Nop }, "nop");
+    passert!(NoEm { opcode: token::NoEm::Ret }, "ret");
+    passert!(NoEm { opcode: token::NoEm::Clrc }, "clrc");
+    passert!(NoEm { opcode: token::NoEm::Clrz }, "clrz");
+    passert!(NoEm { opcode: token::NoEm::Clrn }, "clrn");
+    passert!(NoEm { opcode: token::NoEm::Setc }, "setc");
+    passert!(NoEm { opcode: token::NoEm::Setz }, "setz");
+    passert!(NoEm { opcode: token::NoEm::Setn }, "setn");
+    passert!(NoEm { opcode: token::NoEm::Dint }, "dint");
+    passert!(NoEm { opcode: token::NoEm::Eint }, "eint");
+}
+#[test]
+fn one_em() {
+    const dst: Dst = Dst::Direct(Reg::R15);
+    let width = Width::Word;
+    passert!(OneEm { opcode: token::OneEm::Pop, width, dst }, "pop r15");
+    passert!(OneEm { opcode: token::OneEm::Rla, width, dst }, "rla r15");
+    passert!(OneEm { opcode: token::OneEm::Rlc, width, dst }, "rlc r15");
+    passert!(OneEm { opcode: token::OneEm::Inv, width, dst }, "inv r15");
+    passert!(OneEm { opcode: token::OneEm::Clr, width, dst }, "clr r15");
+    passert!(OneEm { opcode: token::OneEm::Tst, width, dst }, "tst r15");
+    passert!(OneEm { opcode: token::OneEm::Dec, width, dst }, "dec r15");
+    passert!(OneEm { opcode: token::OneEm::Decd, width, dst }, "decd r15");
+    passert!(OneEm { opcode: token::OneEm::Inc, width, dst }, "inc r15");
+    passert!(OneEm { opcode: token::OneEm::Incd, width, dst }, "incd r15");
+    passert!(OneEm { opcode: token::OneEm::Adc, width, dst }, "adc r15");
+    passert!(OneEm { opcode: token::OneEm::Dadc, width, dst }, "dadc r15");
+    passert!(OneEm { opcode: token::OneEm::Sbc, width, dst }, "sbc r15");
+
+    let width = Width::Byte;
+    passert!(OneEm { opcode: token::OneEm::Pop, width, dst }, "pop.b r15");
+    passert!(OneEm { opcode: token::OneEm::Rla, width, dst }, "rla.b r15");
+    passert!(OneEm { opcode: token::OneEm::Rlc, width, dst }, "rlc.b r15");
+    passert!(OneEm { opcode: token::OneEm::Inv, width, dst }, "inv.b r15");
+    passert!(OneEm { opcode: token::OneEm::Clr, width, dst }, "clr.b r15");
+    passert!(OneEm { opcode: token::OneEm::Tst, width, dst }, "tst.b r15");
+    passert!(OneEm { opcode: token::OneEm::Dec, width, dst }, "dec.b r15");
+    passert!(OneEm { opcode: token::OneEm::Decd, width, dst }, "decd.b r15");
+    passert!(OneEm { opcode: token::OneEm::Inc, width, dst }, "inc.b r15");
+    passert!(OneEm { opcode: token::OneEm::Incd, width, dst }, "incd.b r15");
+    passert!(OneEm { opcode: token::OneEm::Adc, width, dst }, "adc.b r15");
+    passert!(OneEm { opcode: token::OneEm::Dadc, width, dst }, "dadc.b r15");
+    passert!(OneEm { opcode: token::OneEm::Sbc, width, dst }, "sbc.b r15");
+}
+#[test]
+fn one_arg() {
+    const src: Src = Src::Direct(Reg::PC);
+    let width = Width::Word;
+    passert!(OneArg { opcode: token::OneArg::Rrc, width, src }, "rrc pc");
+    passert!(OneArg { opcode: token::OneArg::Swpb, width, src }, "swpb pc");
+    passert!(OneArg { opcode: token::OneArg::Rra, width, src }, "rra pc");
+    passert!(OneArg { opcode: token::OneArg::Sxt, width, src }, "sxt pc");
+    passert!(OneArg { opcode: token::OneArg::Push, width, src }, "push pc");
+    passert!(OneArg { opcode: token::OneArg::Call, width, src }, "call pc");
+
+    let width = Width::Byte;
+    passert!(OneArg { opcode: token::OneArg::Rrc, width, src }, "rrc.b pc");
+    passert!(OneArg { opcode: token::OneArg::Swpb, width, src }, "swpb.b pc");
+    passert!(OneArg { opcode: token::OneArg::Rra, width, src }, "rra.b pc");
+    passert!(OneArg { opcode: token::OneArg::Sxt, width, src }, "sxt.b pc");
+    passert!(OneArg { opcode: token::OneArg::Push, width, src }, "push.b pc");
+    passert!(OneArg { opcode: token::OneArg::Call, width, src }, "call.b pc");
+}
+#[test]
+fn two_arg() {
+    const src: Src = Src::Direct(Reg::R14);
+    const dst: Dst = Dst::Direct(Reg::R15);
+    let width = Width::Word;
+    passert!(TwoArg { opcode: token::TwoArg::Mov, width, src, dst }, "mov r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Add, width, src, dst }, "add r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Addc, width, src, dst }, "addc r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Subc, width, src, dst }, "subc r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Sub, width, src, dst }, "sub r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Cmp, width, src, dst }, "cmp r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Dadd, width, src, dst }, "dadd r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Bit, width, src, dst }, "bit r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Bic, width, src, dst }, "bic r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Bis, width, src, dst }, "bis r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Xor, width, src, dst }, "xor r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::And, width, src, dst }, "and r14, r15");
+
+    let width = Width::Byte;
+    passert!(TwoArg { opcode: token::TwoArg::Mov, width, src, dst }, "mov.b r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Add, width, src, dst }, "add.b r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Addc, width, src, dst }, "addc.b r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Subc, width, src, dst }, "subc.b r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Sub, width, src, dst }, "sub.b r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Cmp, width, src, dst }, "cmp.b r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Dadd, width, src, dst }, "dadd.b r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Bit, width, src, dst }, "bit.b r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Bic, width, src, dst }, "bic.b r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Bis, width, src, dst }, "bis.b r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::Xor, width, src, dst }, "xor.b r14, r15");
+    passert!(TwoArg { opcode: token::TwoArg::And, width, src, dst }, "and.b r14, r15");
+}
+#[test]
+fn jump() {
+    const dst100: JumpDst = JumpDst::Rel(100);
+    passert!(Jump { opcode: token::Jump::Jne, dst: dst100 }, "jne 100");
+    passert!(Jump { opcode: token::Jump::Jnz, dst: dst100 }, "jnz 100");
+    passert!(Jump { opcode: token::Jump::Jeq, dst: dst100 }, "jeq 100");
+    passert!(Jump { opcode: token::Jump::Jz, dst: dst100 }, "jz 100");
+    passert!(Jump { opcode: token::Jump::Jnc, dst: dst100 }, "jnc 100");
+    passert!(Jump { opcode: token::Jump::Jlo, dst: dst100 }, "jlo 100");
+    passert!(Jump { opcode: token::Jump::Jc, dst: dst100 }, "jc 100");
+    passert!(Jump { opcode: token::Jump::Jhs, dst: dst100 }, "jhs 100");
+    passert!(Jump { opcode: token::Jump::Jn, dst: dst100 }, "jn 100");
+    passert!(Jump { opcode: token::Jump::Jge, dst: dst100 }, "jge 100");
+    passert!(Jump { opcode: token::Jump::Jl, dst: dst100 }, "jl 100");
+    passert!(Jump { opcode: token::Jump::Jmp, dst: dst100 }, "jmp 100");
+}
+#[test]
+fn reti() {
+    passert!(Reti, "reti");
+}
+#[test]
+fn br() {
+    passert!(Br { src: Src::Direct(Reg::R15) }, "br r15");
+}
+#[test]
+fn width() {
+    passert!(Width::Byte, ".b");
+    passert!(Width::Word, ".w");
+    passert!(Width::Word, "");
+}
+#[test]
+fn src() {
+    passert!(Src::Direct(Reg::R15), "r15");
+    passert!(Src::Indexed(expr!(0x1000), Reg::R15), "0x1000(r15)");
+    passert!(Src::Indirect(Reg::R15), "@r15");
+    passert!(Src::PostInc(Reg::R15), "@r15+");
+    passert!(Src::Absolute(expr!(0x1000)), "&0x1000");
+    passert!(Src::Immediate(expr!(0x1000)), "#0x1000");
+    passert!(Src::BareExpr(expr!(foo)), "foo");
+}
+#[test]
+fn dst() {
+    passert!(Dst::Direct(Reg::R15), "r15");
+    passert!(Dst::Indexed(expr!(0x1000), Reg::R15), "0x1000(r15)");
+    passert!(Dst::Absolute(expr!(0x1000)), "&0x1000");
+    passert!(Dst::Special(DstSpecial::Zero), "#0");
+    passert!(Dst::Special(DstSpecial::One), "#1");
+}
+#[test]
+fn jump_dst() {
+    passert!(JumpDst::Rel(100), "100");
+    passert!(JumpDst::Rel(-100), "-100");
+    passert!(JumpDst::Label("foo"), "foo");
+}
+
+#[test]
+fn expr() {
+    // Terms=
+    passert!(expr!((1) Mul(2) Rem(3) Div(4)), "1 * 2 % 3 / 4");
+    // Factors
+    passert!(expr!((1) Add(2) Sub(3)), "1 + 2 - 3");
+    // Shift
+    passert!(expr!((1) Lsh(2) Rsh(3)), "1 << 2 >> 3");
+    // Bitwise logic
+    passert!(expr!((1) And(2) Or(3) Xor(4)), "1 & 2 | 3 ^ 4");
+    // Unary
+    passert!(expr!([Deref Neg Not] 1), "*-!1");
+    // Number
+    passert!(Expr::Number(42), "42");
+    // Identifier
+    passert!(Expr::Ident("foo"), "foo");
+    // Addrof
+    passert!(Expr::AddrOf("bar"), "&bar");
+    // Group
+    passert!(expr!((42)), "(42)");
+    // All of the above
+    passert!(
+        expr!(
+            (4) Mul(
+                (3) Add(
+                    (2) Lsh(
+                        (1) And([Neg] 1)
+                    ) Rsh([Deref] 2)
+                ) Add([Not] 3)
+            ) Mul(4)
+        ),
+        "4 * 3 + 2 << 1 & -1 >> *2 + !3 * 4"
+    );
+}