diff --git a/Cargo.lock b/Cargo.lock index 5307978..6f93806 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,17 +2,34 @@ # It is not intended for manual editing. [[package]] name = "corten" -version = "0.1.0" +version = "0.1.1" dependencies = [ "metacrate", + "nom", "rbtag", ] +[[package]] +name = "memchr" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3197e20c7edb283f87c071ddfc7a2cca8f8e0b888c242959846a6fce03c72223" + [[package]] name = "metacrate" version = "0.1.0" source = "git+https://github.com/tonytins/metacrate#efab304454bd48946d52efa0912b65898f37b1c9" +[[package]] +name = "nom" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" +dependencies = [ + "memchr", + "version_check", +] + [[package]] name = "proc-macro2" version = "0.4.30" @@ -67,3 +84,9 @@ name = "unicode-xid" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" + +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" diff --git a/Cargo.toml b/Cargo.toml index f65c093..2392be8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,10 @@ [package] name = "corten" -version = "0.1.0" -authors = ["Anthony Foxclaw <35226681+tonytins@users.noreply.github.com>"] +version = "0.1.1" +authors = ["Anthony Foxclaw"] edition = "2018" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] rbtag = "0.3" +nom = "4.2" metacrate = { git = "https://github.com/tonytins/metacrate" } \ No newline at end of file diff --git a/README.md b/README.md index e6b7fab..eb2f85f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Corten -Corten is a stack-based virtual machine written in Rust as a hobby. It's based on the [Iridium 1](docs/spec.md#Iridium) architecture from Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/). +Corten is a [MIPS64 Release 6](https://en.wikipedia.org/wiki/MIPS_architecture#MIPS32/MIPS64_Release_6) virtual machine written in Rust as a hobby and based on Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/) tutorial. ## Build Status diff --git a/docs/README.md b/docs/README.md index e33b8b7..dc708f6 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,6 +1,6 @@ # Welcome -Corten is a stack-based virtual machine written in Rust as a hobby. It's based on the [Iridium 1](spec.md#Iridium) architecture from Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/). +Corten is a [MIPS64 Release 6](https://en.wikipedia.org/wiki/MIPS_architecture#MIPS32/MIPS64_Release_6) virtual machine written in Rust as a hobby and based on Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/) tutorial. ## Specifications diff --git a/docs/spec.md b/docs/spec.md index 3f62c7d..fc695a8 100644 --- a/docs/spec.md +++ b/docs/spec.md @@ -1,9 +1,5 @@ # Specifications -## Iridium - -Corten is based on Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/) tutorial. His virtual machine used for the tutorial is known as [Iridium](https://github.com/fhaynes/iridium) and is based on the [MIPS64 Release 6](https://en.wikipedia.org/wiki/MIPS_architecture#MIPS32/MIPS64_Release_6) architecture. Corten aims to be Iridium 1-compatible. - ## Instruction Set | Opcode | Function | Comment | diff --git a/src/assembler.rs b/src/assembler.rs new file mode 100644 index 0000000..834bbe9 --- /dev/null +++ b/src/assembler.rs @@ -0,0 +1,95 @@ +#![allow(dead_code)] +pub mod instruction_parser; +pub mod opcode_parser; +pub mod operand_parser; +pub mod program_parser; +pub mod register_parser; + +#[derive(Debug, PartialEq, Clone)] +pub enum Token { + Opcode { code: Opcode }, + Register { reg_num: u8 }, + Number { value: i32 }, +} + +#[derive(Debug, PartialEq, Clone)] +pub enum Opcode { + HLT, + IGL, + LOAD, + ADD, + SUB, + MUL, + DIV, + /// Equal + EQ, + /// Not equal + NEQ, + /// Greater then + GT, + /// Less then + LT, + /// Greater then or equal to + GTE, + /// less then or equal + LTE, + /// jump if equal + JMPE, + /// Jump + JMP, + /// Jump forward + JMPF, + /// Jump backward + JMPB, + NOP, +} + +impl From for Opcode { + fn from(vm: u8) -> Self { + match vm { + 0 => Opcode::LOAD, + 1 => Opcode::ADD, + 2 => Opcode::SUB, + 3 => Opcode::MUL, + 4 => Opcode::DIV, + 5 => Opcode::HLT, + 6 => Opcode::JMP, + 7 => Opcode::JMPF, + 8 => Opcode::JMPB, + 9 => Opcode::EQ, + 10 => Opcode::NEQ, + 11 => Opcode::GTE, + 12 => Opcode::GT, + 13 => Opcode::LTE, + 14 => Opcode::LT, + 15 => Opcode::JMPE, + 16 => Opcode::NOP, + _ => Opcode::IGL, + } + } +} + +impl From for u8 { + fn from(op: Opcode) -> Self { + match op { + Opcode::LOAD => 0, + Opcode::ADD => 1, + Opcode::SUB => 2, + Opcode::MUL => 3, + Opcode::DIV => 4, + Opcode::HLT => 5, + Opcode::JMP => 6, + Opcode::JMPF => 7, + Opcode::JMPB => 8, + Opcode::EQ => 9, + Opcode::NEQ => 10, + Opcode::GTE => 11, + Opcode::LTE => 12, + Opcode::LT => 13, + Opcode::GT => 14, + Opcode::JMPE => 15, + Opcode::NOP => 16, + Opcode::IGL => 100, + } + } +} \ No newline at end of file diff --git a/src/assembler/instruction_parser.rs b/src/assembler/instruction_parser.rs new file mode 100644 index 0000000..ddcdf11 --- /dev/null +++ b/src/assembler/instruction_parser.rs @@ -0,0 +1,102 @@ +use nom::*; +use crate::assembler::Token; +use crate::assembler::opcode_parser::opcode_load; +use crate::assembler::operand_parser::integer_operand; +use crate::assembler::register_parser::register; + +use nom::types::CompleteStr; + +#[derive(Debug, PartialEq)] +pub struct AssemblerInstruction { + opcode: Token, + operand1: Option, + operand2: Option, + operand3: Option, +} + +impl AssemblerInstruction { + pub fn to_bytes(&self) -> Vec { + let mut results = vec![]; + + match self.opcode.to_owned() { + Token::Opcode { code } => match code { + _ => { + results.push(code.into()); + } + }, + _ => { + println!("Incorrect opcode!"); + std::process::exit(1); + } + }; + + + for operand in &[&self.operand1, &self.operand2, &self.operand3] { + if let Some(token) = operand { + AssemblerInstruction::extract_operand(token, &mut results) + } + } + + results + } + + fn extract_operand(t: &Token, results: &mut Vec) { + match t { + Token::Register { reg_num } => { + results.push(*reg_num); + }, + Token::Number { value } => { + let conv = *value as u16; + let byte1 = conv; + let byte2 = conv >> 8; + + results.push(byte2 as u8); + results.push(byte1 as u8); + }, + _ => { + println!("Opcode found in operand field"); + std::process::exit(1); + } + }; + } + +} + +named!(pub instruction_one, + do_parse!( + o: opcode_load >> + r: register >> + i: integer_operand >> + ( + AssemblerInstruction{ + opcode: o, + operand1: Some(r), + operand2: Some(i), + operand3: None + } + ) + ) +); + +#[cfg(test)] +mod instruction_parser_test { + use super::*; + use crate::assembler::Opcode; + + #[test] + fn test_parse_instruction_form_one() { + let result = instruction_one(CompleteStr("load $0 #100\n")); + assert_eq!( + result, + Ok(( + CompleteStr(""), + AssemblerInstruction { + opcode: Token::Opcode { code: Opcode::LOAD }, + operand1: Some(Token::Register { reg_num: 0 }), + operand2: Some(Token::Number { value: 100 }), + operand3: None + } + )) + ); + } +} diff --git a/src/assembler/opcode_parser.rs b/src/assembler/opcode_parser.rs new file mode 100644 index 0000000..8d4406f --- /dev/null +++ b/src/assembler/opcode_parser.rs @@ -0,0 +1,24 @@ +#![allow(unused_imports)] +use nom::*; +use nom::{digit, types::CompleteStr}; + +use crate::assembler::{Token, Opcode}; + +named!(pub opcode_load, + do_parse!(tag!("load") >> (Token::Opcode{code: Opcode::LOAD})) +); + +#[cfg(test)] +mod opcode_parser_test { + use super::*; + + #[test] + fn test_parser_op_load() { + // Test that opcode is dected and parsed correctly + let result = opcode_load(CompleteStr("load")); + assert_eq!(result.is_ok(), true); + let (rest, token) = result.unwrap(); + assert_eq!(token, Token::Opcode { code: Opcode::LOAD }); + assert_eq!(rest, CompleteStr("")); + } +} diff --git a/src/assembler/operand_parser.rs b/src/assembler/operand_parser.rs new file mode 100644 index 0000000..110d9fe --- /dev/null +++ b/src/assembler/operand_parser.rs @@ -0,0 +1,30 @@ +#![allow(unused_imports)] +use nom::*; +use nom::{digit, types::CompleteStr}; + +use crate::assembler::Token; + +named!(pub integer_operand, + ws!( + do_parse!( + tag!("#") >> + reg_num: digit >> + ( + Token::Number{value: reg_num.parse::().unwrap()} + ) + ) + ) +); + +#[cfg(test)] +mod reg_parser_test { + use super::*; + + #[test] + fn test_opcode_load() { + let result = integer_operand(CompleteStr("#10")); + let (rest, value) = result.unwrap(); + assert_eq!(rest, CompleteStr("")); + assert_eq!(value, Token::Number { value: 10 }); + } +} diff --git a/src/assembler/program_parser.rs b/src/assembler/program_parser.rs new file mode 100644 index 0000000..a0ac7bf --- /dev/null +++ b/src/assembler/program_parser.rs @@ -0,0 +1,53 @@ +use nom::types::CompleteStr; +use nom::*; + +use crate::assembler::instruction_parser::{instruction_one, AssemblerInstruction}; + +#[derive(Debug, PartialEq)] +pub struct Program { + instructions: Vec, +} + +impl Program { + pub fn to_bytes(&self) -> Vec { + let mut prog = vec![]; + for instruction in &self.instructions { + prog.append(&mut instruction.to_bytes()); + } + prog + } +} + +named!(pub program, + do_parse!( + instructions: many1!(instruction_one) >> + ( + Program { + instructions + } + ) + ) +); + +#[cfg(test)] +mod instruction_parser_test { + use super::*; + + #[test] + fn test_parse_instruction() { + let result = program(CompleteStr("load $0 #100\n")); + let (leftover, p) = result.unwrap(); + assert_eq!(leftover, CompleteStr("")); + assert_eq!(1, p.instructions.len()); + } + + #[test] + fn test_program_to_bytes() { + let result = program(CompleteStr("load $0 #100\n")); + assert_eq!(result.is_ok(), true); + let (_, prog) = result.unwrap(); + let bytecode = prog.to_bytes(); + assert_eq!(bytecode.len(), 4); + println!("{:?}", bytecode); + } +} diff --git a/src/assembler/register_parser.rs b/src/assembler/register_parser.rs new file mode 100644 index 0000000..1482e1a --- /dev/null +++ b/src/assembler/register_parser.rs @@ -0,0 +1,33 @@ +#![allow(unused_imports)] +use nom::*; +use nom::{digit, types::CompleteStr}; + +use crate::assembler::Token; + +named!(pub register , + ws!( + do_parse!( + tag!("$") >> + reg_num: digit >> + ( + Token::Register{ + reg_num: reg_num.parse::().unwrap() + } + ) + ) + ) +); + +mod register_parser_tests { + use super::*; + + #[test] + fn test_parse_register() { + let result = register(CompleteStr("$0")); + assert_eq!(result.is_ok(), true); + let result = register(CompleteStr("0")); + assert_eq!(result.is_ok(), false); + let result = register(CompleteStr("$a")); + assert_eq!(result.is_ok(), false); + } +} diff --git a/src/instruction.rs b/src/instruction.rs index 0bd5f23..e84f5af 100644 --- a/src/instruction.rs +++ b/src/instruction.rs @@ -1,61 +1,6 @@ #![allow(dead_code)] -#[derive(Debug, PartialEq)] -pub enum Opcode { - HLT, - IGL, - LOAD, - ADD, - SUB, - MUL, - DIV, - /// Equal - EQ, - /// Not equal - NEQ, - /// Greater then - GT, - /// Less then - LT, - /// Greater then or equal to - GTE, - /// less then or equal - LTE, - /// jump if equal - JMPE, - /// Jump - JMP, - /// Jump forward - JMPF, - /// Jump backward - JMPB, - NOP, -} - -impl From for Opcode { - fn from(vm: u8) -> Self { - match vm { - 0 => Opcode::LOAD, - 1 => Opcode::ADD, - 2 => Opcode::SUB, - 3 => Opcode::MUL, - 4 => Opcode::DIV, - 5 => Opcode::HLT, - 6 => Opcode::JMP, - 7 => Opcode::JMPF, - 8 => Opcode::JMPB, - 9 => Opcode::EQ, - 10 => Opcode::NEQ, - 11 => Opcode::GTE, - 12 => Opcode::GT, - 13 => Opcode::LTE, - 14 => Opcode::LT, - 15 => Opcode::JMPE, - 16 => Opcode::NOP, - _ => Opcode::IGL, - } - } -} +use crate::assembler::Opcode; #[derive(Debug, PartialEq)] pub struct Instruction { @@ -71,6 +16,7 @@ impl Instruction { #[cfg(test)] mod instruction_tests { use crate::instruction::*; + use crate::assembler::Opcode; #[test] fn test_crate_hlt() { diff --git a/src/main.rs b/src/main.rs index f330e7a..905048f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ +mod assembler; mod instruction; -mod vm; mod repl; +mod vm; use repl::REPL; diff --git a/src/repl.rs b/src/repl.rs index 1de1668..2154bcc 100644 --- a/src/repl.rs +++ b/src/repl.rs @@ -1,11 +1,12 @@ use std; use std::io; use std::io::Write; -use std::num::ParseIntError; use crate::vm::VM; +use crate::assembler::program_parser::program; use metacrate::crate_version; use rbtag::{BuildDateTime, BuildInfo}; +use nom::types::CompleteStr; #[derive(BuildDateTime, BuildInfo)] struct BuildTag; @@ -76,37 +77,17 @@ impl REPL { println!("{:#?}", self.vm.registers); } _ => { - let results = self.parse_hex(buffer); - match results { - Ok(bytes) => { - for byte in bytes { - self.vm.add_byte(byte); - } + let prog = match program(buffer.into()) { + Ok((_, prog)) => prog, + Err(_) => { + println!("Unable to parse input."); + continue; } - Err(_er) => { - println!("Unable to decode hex string. Please enter 4 groups of 2 hex characters."); - } - } - self.vm.run_once(); - } - } - } - } + }; - /// Accepts the hexadecimal without a leading '0x' and returns a Vec of a - /// u8. Example: 00 01 03 E8 - fn parse_hex(&mut self, i: &str) -> Result, ParseIntError> { - let split = i.split(" ").collect::>(); - let mut results: Vec = vec![]; - for hex_string in split { - let byte = u8::from_str_radix(&hex_string, 16); - match byte { - Ok(result) => { - results.push(result); + self.vm.program.append(&mut prog.to_bytes()); } - Err(err) => return Err(err), } } - Ok(results) } } diff --git a/src/vm.rs b/src/vm.rs index d76209c..6bb1595 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -1,5 +1,5 @@ #![allow(dead_code)] -use crate::instruction::Opcode; +use crate::assembler::Opcode; pub struct VM { /// Array that simulates the hardware register