From 11bae9b348d18e1b59cfd89de482c08d00db1513 Mon Sep 17 00:00:00 2001 From: John Breaux Date: Wed, 31 Jan 2024 17:59:55 -0600 Subject: [PATCH] Unit tests: Add more lexer tests, add parser tests --- src/lexer/tests.rs | 155 +++++++++++++++++++++++---- src/parser/tests.rs | 256 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 388 insertions(+), 23 deletions(-) create mode 100644 src/parser/tests.rs diff --git a/src/lexer/tests.rs b/src/lexer/tests.rs index df3c0dc..296e40c 100644 --- a/src/lexer/tests.rs +++ b/src/lexer/tests.rs @@ -1,7 +1,11 @@ use super::*; macro_rules! lex { - (type ($t:tt), $expected:expr) => { - let token = Lexer::new(stringify!($t)).scan().expect(stringify!($t:tt should yield a valid token)); + (type ($($t:tt)*), $expected:expr) => { + let token = Lexer::new(stringify!($($t)*)).scan().expect(stringify!($($t:tt)* should yield a valid token)); + assert_eq!(token.kind, $expected); + }; + (str $t:literal, $expected:expr) => { + let token = Lexer::new($t).scan().expect(stringify!($t:tt should yield a valid token)); assert_eq!(token.kind, $expected); }; ({ $($t:tt)* }) => { @@ -40,27 +44,132 @@ fn no_operand_emulated() { lex!(type (eint), TokenKind::NoEm(NoEm::Eint)); // eint should be a valid NoEm } #[test] +fn br() { + lex!(type (br), TokenKind::Special(Special::Br)); +} +#[test] +fn one_operand_emulated() { + lex!(type (pop), TokenKind::OneEm(OneEm::Pop)); + lex!(type (rla), TokenKind::OneEm(OneEm::Rla)); + lex!(type (rlc), TokenKind::OneEm(OneEm::Rlc)); + lex!(type (inv), TokenKind::OneEm(OneEm::Inv)); + lex!(type (clr), TokenKind::OneEm(OneEm::Clr)); + lex!(type (tst), TokenKind::OneEm(OneEm::Tst)); + lex!(type (dec), TokenKind::OneEm(OneEm::Dec)); + lex!(type (decd), TokenKind::OneEm(OneEm::Decd)); + lex!(type (inc), TokenKind::OneEm(OneEm::Inc)); + lex!(type (incd), TokenKind::OneEm(OneEm::Incd)); + lex!(type (adc), TokenKind::OneEm(OneEm::Adc)); + lex!(type (dadc), TokenKind::OneEm(OneEm::Dadc)); + lex!(type (sbc), TokenKind::OneEm(OneEm::Sbc)); +} +#[test] +fn one_operand() { + lex!(type (rrc), TokenKind::OneArg(OneArg::Rrc)); + lex!(type (swpb), TokenKind::OneArg(OneArg::Swpb)); + lex!(type (rra), TokenKind::OneArg(OneArg::Rra)); + lex!(type (sxt), TokenKind::OneArg(OneArg::Sxt)); + lex!(type (push), TokenKind::OneArg(OneArg::Push)); + lex!(type (call), TokenKind::OneArg(OneArg::Call)); + lex!(type (reti), TokenKind::OneArg(OneArg::Reti)); +} +#[test] +fn two_operand() { + lex!(type (mov), TokenKind::TwoArg(TwoArg::Mov)); + lex!(type (add), TokenKind::TwoArg(TwoArg::Add)); + lex!(type (addc), TokenKind::TwoArg(TwoArg::Addc)); + lex!(type (subc), TokenKind::TwoArg(TwoArg::Subc)); + lex!(type (sub), TokenKind::TwoArg(TwoArg::Sub)); + lex!(type (cmp), TokenKind::TwoArg(TwoArg::Cmp)); + lex!(type (dadd), TokenKind::TwoArg(TwoArg::Dadd)); + lex!(type (bit), TokenKind::TwoArg(TwoArg::Bit)); + lex!(type (bic), TokenKind::TwoArg(TwoArg::Bic)); + lex!(type (bis), TokenKind::TwoArg(TwoArg::Bis)); + lex!(type (xor), TokenKind::TwoArg(TwoArg::Xor)); + lex!(type (and), TokenKind::TwoArg(TwoArg::And)); +} +#[test] +fn jump() { + lex!(type (jne), TokenKind::Jump(Jump::Jne)); + lex!(type (jnz), TokenKind::Jump(Jump::Jnz)); + lex!(type (jeq), TokenKind::Jump(Jump::Jeq)); + lex!(type (jz), TokenKind::Jump(Jump::Jz)); + lex!(type (jnc), TokenKind::Jump(Jump::Jnc)); + lex!(type (jlo), TokenKind::Jump(Jump::Jlo)); + lex!(type (jc), TokenKind::Jump(Jump::Jc)); + lex!(type (jhs), TokenKind::Jump(Jump::Jhs)); + lex!(type (jn), TokenKind::Jump(Jump::Jn)); + lex!(type (jge), TokenKind::Jump(Jump::Jge)); + lex!(type (jl), TokenKind::Jump(Jump::Jl)); + lex!(type (jmp), TokenKind::Jump(Jump::Jmp)); +} +#[test] fn registers() { - lex!(type(pc), TokenKind::Reg(Reg::PC)); - lex!(type(sp), TokenKind::Reg(Reg::SP)); - lex!(type(sr), TokenKind::Reg(Reg::SR)); - lex!(type(cg), TokenKind::Reg(Reg::CG)); - lex!(type(r0), TokenKind::Reg(Reg::PC)); - lex!(type(r1), TokenKind::Reg(Reg::SP)); - lex!(type(r2), TokenKind::Reg(Reg::SR)); - lex!(type(r3), TokenKind::Reg(Reg::CG)); - lex!(type(r4), TokenKind::Reg(Reg::R4)); - lex!(type(r5), TokenKind::Reg(Reg::R5)); - lex!(type(r6), TokenKind::Reg(Reg::R6)); - lex!(type(r7), TokenKind::Reg(Reg::R7)); - lex!(type(r8), TokenKind::Reg(Reg::R8)); - lex!(type(r9), TokenKind::Reg(Reg::R9)); - lex!(type(r10), TokenKind::Reg(Reg::R10)); - lex!(type(r11), TokenKind::Reg(Reg::R11)); - lex!(type(r12), TokenKind::Reg(Reg::R12)); - lex!(type(r13), TokenKind::Reg(Reg::R13)); - lex!(type(r14), TokenKind::Reg(Reg::R14)); - lex!(type(r15), TokenKind::Reg(Reg::R15)); + lex!(type (pc), TokenKind::Reg(Reg::PC)); + lex!(type (sp), TokenKind::Reg(Reg::SP)); + lex!(type (sr), TokenKind::Reg(Reg::SR)); + lex!(type (cg), TokenKind::Reg(Reg::CG)); + lex!(type (r0), TokenKind::Reg(Reg::PC)); + lex!(type (r1), TokenKind::Reg(Reg::SP)); + lex!(type (r2), TokenKind::Reg(Reg::SR)); + lex!(type (r3), TokenKind::Reg(Reg::CG)); + lex!(type (r4), TokenKind::Reg(Reg::R4)); + lex!(type (r5), TokenKind::Reg(Reg::R5)); + lex!(type (r6), TokenKind::Reg(Reg::R6)); + lex!(type (r7), TokenKind::Reg(Reg::R7)); + lex!(type (r8), TokenKind::Reg(Reg::R8)); + lex!(type (r9), TokenKind::Reg(Reg::R9)); + lex!(type (r10), TokenKind::Reg(Reg::R10)); + lex!(type (r11), TokenKind::Reg(Reg::R11)); + lex!(type (r12), TokenKind::Reg(Reg::R12)); + lex!(type (r13), TokenKind::Reg(Reg::R13)); + lex!(type (r14), TokenKind::Reg(Reg::R14)); + lex!(type (r15), TokenKind::Reg(Reg::R15)); } -// TODO: opcode tests, misc. special character tests, etc. +#[test] +fn delimiters() { + lex!(str "", TokenKind::Eof); + lex!(str "\n", TokenKind::Newline); + lex!(str "(", TokenKind::OpenParen); + lex!(str ")", TokenKind::CloseParen); + lex!(str "{", TokenKind::OpenCurly); + lex!(str "}", TokenKind::CloseCurly); + lex!(str "[", TokenKind::OpenBrace); + lex!(str "]", TokenKind::CloseBrace); +} + +#[test] +fn comment() { + lex!(str "; this is a comment!\n\n", TokenKind::Comment); +} + +#[test] +fn other() { + // lex!(type (), TokenKind::) + lex!(type (,), TokenKind::Comma); + lex!(type (:), TokenKind::Colon); + lex!(type (!), TokenKind::Bang); + lex!(type (@), TokenKind::At); + lex!(type (&), TokenKind::Amp); + lex!(type (|), TokenKind::Bar); + lex!(type (^), TokenKind::Caret); + lex!(type (*), TokenKind::Star); + lex!(type (#), TokenKind::Hash); + lex!(type ($), TokenKind::Dollar); + lex!(type (%), TokenKind::Percent); + lex!(type (+), TokenKind::Plus); + lex!(type (-), TokenKind::Minus); + lex!(type (/), TokenKind::Slash); + lex!(type (<<), TokenKind::Lsh); + lex!(type (>>), TokenKind::Rsh); + lex!(type (.directive), TokenKind::Directive); + lex!(type (identifier), TokenKind::Identifier); + lex!(type (.b), TokenKind::Byte); + lex!(type (.w), TokenKind::Word); +} + +#[test] +fn ignores_leading_whitespace() { + lex!(str " \u{a0}\t\t\t\t\t\t\t-", TokenKind::Minus); +} diff --git a/src/parser/tests.rs b/src/parser/tests.rs new file mode 100644 index 0000000..12f77f3 --- /dev/null +++ b/src/parser/tests.rs @@ -0,0 +1,256 @@ +#![allow(non_upper_case_globals)] +use super::*; +use crate::lexer::token; + +/// Because [assert_matches](core::assert_matches::assert_matches) is unstable +macro_rules! assert_matches { + ($e: expr, $($p: pat $(if $condition:expr)?)* ) => { + match $e { + $($p $(if $condition)? => (),)* + _ => panic!("{}", stringify!($e did not match $($p),*)), + } + }; +} + +/// Simplified grammar for constructing an expression +macro_rules! expr { + ($ident:ident) => { + Expr::Ident(stringify!($ident)).into() + }; + ($lit:literal) => { + Expr::Number($lit).into() + }; + (& $lit:literal) => { + Expr::AddrOf($lit).into() + }; + (($($t:tt)*)) => { + Expr::Group(expr!($($t)*)) + }; + ([$($op:tt)*] $($t:tt)*) => { + Expr::Unary(vec![$(UnOp::$op),*], expr!($($t)*)) + }; + (($($a:tt)*) $($op:tt ($($b:tt)*))+) => { + Expr::Binary(expr!($($a)*), vec![$((BinOp::$op, expr!($($b)*))),+]) + } +} + +macro_rules! passert { + ($expected:expr, $text:literal) => { + assert_eq!($expected, Parsable::parse($text).unwrap()) + }; +} + +// #[test] +// fn statements() { +// passert!(, ""); +// } +// #[test] +// fn statement() { +// passert!(, ""); +// } +#[test] +fn directive() { + passert!(Directive::Org(expr!(0x8000)), ".org 0x8000"); + passert!(Directive::String("Hello, world!"), ".string \"Hello, world!\""); + assert_eq!(Directive::parse(".word 0x40").unwrap(), Directive::Word(expr!(0x40))); + passert!( + Directive::Words(vec![expr!(0x40), expr!(0x41), expr!(0x42), expr!(0x43)]), + ".words [ 0x40 0x41 0x42 0x43 ]" + ); +} +// #[test] +// fn instruction() { +// passert!(, ""); +// } +#[test] +fn instruction_kind() { + assert_matches!(Parsable::parse("nop").unwrap(), InstructionKind::NoEm(NoEm { .. })); + + assert_matches!(Parsable::parse("pop sp").unwrap(), InstructionKind::OneEm(OneEm { .. })); +} +#[test] +fn no_em() { + passert!(NoEm { opcode: token::NoEm::Nop }, "nop"); + passert!(NoEm { opcode: token::NoEm::Ret }, "ret"); + passert!(NoEm { opcode: token::NoEm::Clrc }, "clrc"); + passert!(NoEm { opcode: token::NoEm::Clrz }, "clrz"); + passert!(NoEm { opcode: token::NoEm::Clrn }, "clrn"); + passert!(NoEm { opcode: token::NoEm::Setc }, "setc"); + passert!(NoEm { opcode: token::NoEm::Setz }, "setz"); + passert!(NoEm { opcode: token::NoEm::Setn }, "setn"); + passert!(NoEm { opcode: token::NoEm::Dint }, "dint"); + passert!(NoEm { opcode: token::NoEm::Eint }, "eint"); +} +#[test] +fn one_em() { + const dst: Dst = Dst::Direct(Reg::R15); + let width = Width::Word; + passert!(OneEm { opcode: token::OneEm::Pop, width, dst }, "pop r15"); + passert!(OneEm { opcode: token::OneEm::Rla, width, dst }, "rla r15"); + passert!(OneEm { opcode: token::OneEm::Rlc, width, dst }, "rlc r15"); + passert!(OneEm { opcode: token::OneEm::Inv, width, dst }, "inv r15"); + passert!(OneEm { opcode: token::OneEm::Clr, width, dst }, "clr r15"); + passert!(OneEm { opcode: token::OneEm::Tst, width, dst }, "tst r15"); + passert!(OneEm { opcode: token::OneEm::Dec, width, dst }, "dec r15"); + passert!(OneEm { opcode: token::OneEm::Decd, width, dst }, "decd r15"); + passert!(OneEm { opcode: token::OneEm::Inc, width, dst }, "inc r15"); + passert!(OneEm { opcode: token::OneEm::Incd, width, dst }, "incd r15"); + passert!(OneEm { opcode: token::OneEm::Adc, width, dst }, "adc r15"); + passert!(OneEm { opcode: token::OneEm::Dadc, width, dst }, "dadc r15"); + passert!(OneEm { opcode: token::OneEm::Sbc, width, dst }, "sbc r15"); + + let width = Width::Byte; + passert!(OneEm { opcode: token::OneEm::Pop, width, dst }, "pop.b r15"); + passert!(OneEm { opcode: token::OneEm::Rla, width, dst }, "rla.b r15"); + passert!(OneEm { opcode: token::OneEm::Rlc, width, dst }, "rlc.b r15"); + passert!(OneEm { opcode: token::OneEm::Inv, width, dst }, "inv.b r15"); + passert!(OneEm { opcode: token::OneEm::Clr, width, dst }, "clr.b r15"); + passert!(OneEm { opcode: token::OneEm::Tst, width, dst }, "tst.b r15"); + passert!(OneEm { opcode: token::OneEm::Dec, width, dst }, "dec.b r15"); + passert!(OneEm { opcode: token::OneEm::Decd, width, dst }, "decd.b r15"); + passert!(OneEm { opcode: token::OneEm::Inc, width, dst }, "inc.b r15"); + passert!(OneEm { opcode: token::OneEm::Incd, width, dst }, "incd.b r15"); + passert!(OneEm { opcode: token::OneEm::Adc, width, dst }, "adc.b r15"); + passert!(OneEm { opcode: token::OneEm::Dadc, width, dst }, "dadc.b r15"); + passert!(OneEm { opcode: token::OneEm::Sbc, width, dst }, "sbc.b r15"); +} +#[test] +fn one_arg() { + const src: Src = Src::Direct(Reg::PC); + let width = Width::Word; + passert!(OneArg { opcode: token::OneArg::Rrc, width, src }, "rrc pc"); + passert!(OneArg { opcode: token::OneArg::Swpb, width, src }, "swpb pc"); + passert!(OneArg { opcode: token::OneArg::Rra, width, src }, "rra pc"); + passert!(OneArg { opcode: token::OneArg::Sxt, width, src }, "sxt pc"); + passert!(OneArg { opcode: token::OneArg::Push, width, src }, "push pc"); + passert!(OneArg { opcode: token::OneArg::Call, width, src }, "call pc"); + + let width = Width::Byte; + passert!(OneArg { opcode: token::OneArg::Rrc, width, src }, "rrc.b pc"); + passert!(OneArg { opcode: token::OneArg::Swpb, width, src }, "swpb.b pc"); + passert!(OneArg { opcode: token::OneArg::Rra, width, src }, "rra.b pc"); + passert!(OneArg { opcode: token::OneArg::Sxt, width, src }, "sxt.b pc"); + passert!(OneArg { opcode: token::OneArg::Push, width, src }, "push.b pc"); + passert!(OneArg { opcode: token::OneArg::Call, width, src }, "call.b pc"); +} +#[test] +fn two_arg() { + const src: Src = Src::Direct(Reg::R14); + const dst: Dst = Dst::Direct(Reg::R15); + let width = Width::Word; + passert!(TwoArg { opcode: token::TwoArg::Mov, width, src, dst }, "mov r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Add, width, src, dst }, "add r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Addc, width, src, dst }, "addc r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Subc, width, src, dst }, "subc r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Sub, width, src, dst }, "sub r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Cmp, width, src, dst }, "cmp r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Dadd, width, src, dst }, "dadd r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Bit, width, src, dst }, "bit r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Bic, width, src, dst }, "bic r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Bis, width, src, dst }, "bis r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Xor, width, src, dst }, "xor r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::And, width, src, dst }, "and r14, r15"); + + let width = Width::Byte; + passert!(TwoArg { opcode: token::TwoArg::Mov, width, src, dst }, "mov.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Add, width, src, dst }, "add.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Addc, width, src, dst }, "addc.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Subc, width, src, dst }, "subc.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Sub, width, src, dst }, "sub.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Cmp, width, src, dst }, "cmp.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Dadd, width, src, dst }, "dadd.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Bit, width, src, dst }, "bit.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Bic, width, src, dst }, "bic.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Bis, width, src, dst }, "bis.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Xor, width, src, dst }, "xor.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::And, width, src, dst }, "and.b r14, r15"); +} +#[test] +fn jump() { + const dst100: JumpDst = JumpDst::Rel(100); + passert!(Jump { opcode: token::Jump::Jne, dst: dst100 }, "jne 100"); + passert!(Jump { opcode: token::Jump::Jnz, dst: dst100 }, "jnz 100"); + passert!(Jump { opcode: token::Jump::Jeq, dst: dst100 }, "jeq 100"); + passert!(Jump { opcode: token::Jump::Jz, dst: dst100 }, "jz 100"); + passert!(Jump { opcode: token::Jump::Jnc, dst: dst100 }, "jnc 100"); + passert!(Jump { opcode: token::Jump::Jlo, dst: dst100 }, "jlo 100"); + passert!(Jump { opcode: token::Jump::Jc, dst: dst100 }, "jc 100"); + passert!(Jump { opcode: token::Jump::Jhs, dst: dst100 }, "jhs 100"); + passert!(Jump { opcode: token::Jump::Jn, dst: dst100 }, "jn 100"); + passert!(Jump { opcode: token::Jump::Jge, dst: dst100 }, "jge 100"); + passert!(Jump { opcode: token::Jump::Jl, dst: dst100 }, "jl 100"); + passert!(Jump { opcode: token::Jump::Jmp, dst: dst100 }, "jmp 100"); +} +#[test] +fn reti() { + passert!(Reti, "reti"); +} +#[test] +fn br() { + passert!(Br { src: Src::Direct(Reg::R15) }, "br r15"); +} +#[test] +fn width() { + passert!(Width::Byte, ".b"); + passert!(Width::Word, ".w"); + passert!(Width::Word, ""); +} +#[test] +fn src() { + passert!(Src::Direct(Reg::R15), "r15"); + passert!(Src::Indexed(expr!(0x1000), Reg::R15), "0x1000(r15)"); + passert!(Src::Indirect(Reg::R15), "@r15"); + passert!(Src::PostInc(Reg::R15), "@r15+"); + passert!(Src::Absolute(expr!(0x1000)), "&0x1000"); + passert!(Src::Immediate(expr!(0x1000)), "#0x1000"); + passert!(Src::BareExpr(expr!(foo)), "foo"); +} +#[test] +fn dst() { + passert!(Dst::Direct(Reg::R15), "r15"); + passert!(Dst::Indexed(expr!(0x1000), Reg::R15), "0x1000(r15)"); + passert!(Dst::Absolute(expr!(0x1000)), "&0x1000"); + passert!(Dst::Special(DstSpecial::Zero), "#0"); + passert!(Dst::Special(DstSpecial::One), "#1"); +} +#[test] +fn jump_dst() { + passert!(JumpDst::Rel(100), "100"); + passert!(JumpDst::Rel(-100), "-100"); + passert!(JumpDst::Label("foo"), "foo"); +} + +#[test] +fn expr() { + // Terms= + passert!(expr!((1) Mul(2) Rem(3) Div(4)), "1 * 2 % 3 / 4"); + // Factors + passert!(expr!((1) Add(2) Sub(3)), "1 + 2 - 3"); + // Shift + passert!(expr!((1) Lsh(2) Rsh(3)), "1 << 2 >> 3"); + // Bitwise logic + passert!(expr!((1) And(2) Or(3) Xor(4)), "1 & 2 | 3 ^ 4"); + // Unary + passert!(expr!([Deref Neg Not] 1), "*-!1"); + // Number + passert!(Expr::Number(42), "42"); + // Identifier + passert!(Expr::Ident("foo"), "foo"); + // Addrof + passert!(Expr::AddrOf("bar"), "&bar"); + // Group + passert!(expr!((42)), "(42)"); + // All of the above + passert!( + expr!( + (4) Mul( + (3) Add( + (2) Lsh( + (1) And([Neg] 1) + ) Rsh([Deref] 2) + ) Add([Not] 3) + ) Mul(4) + ), + "4 * 3 + 2 << 1 & -1 >> *2 + !3 * 4" + ); +}