Merge pull request #1 from tonytins/features/paser

This is the first part of the MIPs assembly language parser. At the moment, this mostly serves as a proof-of-concept and is only capable of the LOAD instruction.
This commit is contained in:
Anthony Foxclaw 2020-02-08 16:58:44 -05:00 committed by GitHub
commit 17ab034784
15 changed files with 380 additions and 97 deletions

25
Cargo.lock generated
View file

@ -2,17 +2,34 @@
# It is not intended for manual editing.
[[package]]
name = "corten"
version = "0.1.0"
version = "0.1.1"
dependencies = [
"metacrate",
"nom",
"rbtag",
]
[[package]]
name = "memchr"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3197e20c7edb283f87c071ddfc7a2cca8f8e0b888c242959846a6fce03c72223"
[[package]]
name = "metacrate"
version = "0.1.0"
source = "git+https://github.com/tonytins/metacrate#efab304454bd48946d52efa0912b65898f37b1c9"
[[package]]
name = "nom"
version = "4.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6"
dependencies = [
"memchr",
"version_check",
]
[[package]]
name = "proc-macro2"
version = "0.4.30"
@ -67,3 +84,9 @@ name = "unicode-xid"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
[[package]]
name = "version_check"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"

View file

@ -1,11 +1,10 @@
[package]
name = "corten"
version = "0.1.0"
authors = ["Anthony Foxclaw <35226681+tonytins@users.noreply.github.com>"]
version = "0.1.1"
authors = ["Anthony Foxclaw"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
rbtag = "0.3"
nom = "4.2"
metacrate = { git = "https://github.com/tonytins/metacrate" }

View file

@ -1,6 +1,6 @@
# Corten
Corten is a stack-based virtual machine written in Rust as a hobby. It's based on the [Iridium 1](docs/spec.md#Iridium) architecture from Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/).
Corten is a [MIPS64 Release 6](https://en.wikipedia.org/wiki/MIPS_architecture#MIPS32/MIPS64_Release_6) virtual machine written in Rust as a hobby and based on Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/) tutorial.
## Build Status

View file

@ -1,6 +1,6 @@
# Welcome
Corten is a stack-based virtual machine written in Rust as a hobby. It's based on the [Iridium 1](spec.md#Iridium) architecture from Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/).
Corten is a [MIPS64 Release 6](https://en.wikipedia.org/wiki/MIPS_architecture#MIPS32/MIPS64_Release_6) virtual machine written in Rust as a hobby and based on Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/) tutorial.
## Specifications

View file

@ -1,9 +1,5 @@
# Specifications
## Iridium
Corten is based on Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/) tutorial. His virtual machine used for the tutorial is known as [Iridium](https://github.com/fhaynes/iridium) and is based on the [MIPS64 Release 6](https://en.wikipedia.org/wiki/MIPS_architecture#MIPS32/MIPS64_Release_6) architecture. Corten aims to be Iridium 1-compatible.
## Instruction Set
| Opcode | Function | Comment |

95
src/assembler.rs Normal file
View file

@ -0,0 +1,95 @@
#![allow(dead_code)]
pub mod instruction_parser;
pub mod opcode_parser;
pub mod operand_parser;
pub mod program_parser;
pub mod register_parser;
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
Opcode { code: Opcode },
Register { reg_num: u8 },
Number { value: i32 },
}
#[derive(Debug, PartialEq, Clone)]
pub enum Opcode {
HLT,
IGL,
LOAD,
ADD,
SUB,
MUL,
DIV,
/// Equal
EQ,
/// Not equal
NEQ,
/// Greater then
GT,
/// Less then
LT,
/// Greater then or equal to
GTE,
/// less then or equal
LTE,
/// jump if equal
JMPE,
/// Jump
JMP,
/// Jump forward
JMPF,
/// Jump backward
JMPB,
NOP,
}
impl From<u8> for Opcode {
fn from(vm: u8) -> Self {
match vm {
0 => Opcode::LOAD,
1 => Opcode::ADD,
2 => Opcode::SUB,
3 => Opcode::MUL,
4 => Opcode::DIV,
5 => Opcode::HLT,
6 => Opcode::JMP,
7 => Opcode::JMPF,
8 => Opcode::JMPB,
9 => Opcode::EQ,
10 => Opcode::NEQ,
11 => Opcode::GTE,
12 => Opcode::GT,
13 => Opcode::LTE,
14 => Opcode::LT,
15 => Opcode::JMPE,
16 => Opcode::NOP,
_ => Opcode::IGL,
}
}
}
impl From<Opcode> for u8 {
fn from(op: Opcode) -> Self {
match op {
Opcode::LOAD => 0,
Opcode::ADD => 1,
Opcode::SUB => 2,
Opcode::MUL => 3,
Opcode::DIV => 4,
Opcode::HLT => 5,
Opcode::JMP => 6,
Opcode::JMPF => 7,
Opcode::JMPB => 8,
Opcode::EQ => 9,
Opcode::NEQ => 10,
Opcode::GTE => 11,
Opcode::LTE => 12,
Opcode::LT => 13,
Opcode::GT => 14,
Opcode::JMPE => 15,
Opcode::NOP => 16,
Opcode::IGL => 100,
}
}
}

View file

@ -0,0 +1,102 @@
use nom::*;
use crate::assembler::Token;
use crate::assembler::opcode_parser::opcode_load;
use crate::assembler::operand_parser::integer_operand;
use crate::assembler::register_parser::register;
use nom::types::CompleteStr;
#[derive(Debug, PartialEq)]
pub struct AssemblerInstruction {
opcode: Token,
operand1: Option<Token>,
operand2: Option<Token>,
operand3: Option<Token>,
}
impl AssemblerInstruction {
pub fn to_bytes(&self) -> Vec<u8> {
let mut results = vec![];
match self.opcode.to_owned() {
Token::Opcode { code } => match code {
_ => {
results.push(code.into());
}
},
_ => {
println!("Incorrect opcode!");
std::process::exit(1);
}
};
for operand in &[&self.operand1, &self.operand2, &self.operand3] {
if let Some(token) = operand {
AssemblerInstruction::extract_operand(token, &mut results)
}
}
results
}
fn extract_operand(t: &Token, results: &mut Vec<u8>) {
match t {
Token::Register { reg_num } => {
results.push(*reg_num);
},
Token::Number { value } => {
let conv = *value as u16;
let byte1 = conv;
let byte2 = conv >> 8;
results.push(byte2 as u8);
results.push(byte1 as u8);
},
_ => {
println!("Opcode found in operand field");
std::process::exit(1);
}
};
}
}
named!(pub instruction_one<CompleteStr, AssemblerInstruction>,
do_parse!(
o: opcode_load >>
r: register >>
i: integer_operand >>
(
AssemblerInstruction{
opcode: o,
operand1: Some(r),
operand2: Some(i),
operand3: None
}
)
)
);
#[cfg(test)]
mod instruction_parser_test {
use super::*;
use crate::assembler::Opcode;
#[test]
fn test_parse_instruction_form_one() {
let result = instruction_one(CompleteStr("load $0 #100\n"));
assert_eq!(
result,
Ok((
CompleteStr(""),
AssemblerInstruction {
opcode: Token::Opcode { code: Opcode::LOAD },
operand1: Some(Token::Register { reg_num: 0 }),
operand2: Some(Token::Number { value: 100 }),
operand3: None
}
))
);
}
}

View file

@ -0,0 +1,24 @@
#![allow(unused_imports)]
use nom::*;
use nom::{digit, types::CompleteStr};
use crate::assembler::{Token, Opcode};
named!(pub opcode_load<CompleteStr, Token>,
do_parse!(tag!("load") >> (Token::Opcode{code: Opcode::LOAD}))
);
#[cfg(test)]
mod opcode_parser_test {
use super::*;
#[test]
fn test_parser_op_load() {
// Test that opcode is dected and parsed correctly
let result = opcode_load(CompleteStr("load"));
assert_eq!(result.is_ok(), true);
let (rest, token) = result.unwrap();
assert_eq!(token, Token::Opcode { code: Opcode::LOAD });
assert_eq!(rest, CompleteStr(""));
}
}

View file

@ -0,0 +1,30 @@
#![allow(unused_imports)]
use nom::*;
use nom::{digit, types::CompleteStr};
use crate::assembler::Token;
named!(pub integer_operand<CompleteStr, Token>,
ws!(
do_parse!(
tag!("#") >>
reg_num: digit >>
(
Token::Number{value: reg_num.parse::<i32>().unwrap()}
)
)
)
);
#[cfg(test)]
mod reg_parser_test {
use super::*;
#[test]
fn test_opcode_load() {
let result = integer_operand(CompleteStr("#10"));
let (rest, value) = result.unwrap();
assert_eq!(rest, CompleteStr(""));
assert_eq!(value, Token::Number { value: 10 });
}
}

View file

@ -0,0 +1,53 @@
use nom::types::CompleteStr;
use nom::*;
use crate::assembler::instruction_parser::{instruction_one, AssemblerInstruction};
#[derive(Debug, PartialEq)]
pub struct Program {
instructions: Vec<AssemblerInstruction>,
}
impl Program {
pub fn to_bytes(&self) -> Vec<u8> {
let mut prog = vec![];
for instruction in &self.instructions {
prog.append(&mut instruction.to_bytes());
}
prog
}
}
named!(pub program<CompleteStr, Program>,
do_parse!(
instructions: many1!(instruction_one) >>
(
Program {
instructions
}
)
)
);
#[cfg(test)]
mod instruction_parser_test {
use super::*;
#[test]
fn test_parse_instruction() {
let result = program(CompleteStr("load $0 #100\n"));
let (leftover, p) = result.unwrap();
assert_eq!(leftover, CompleteStr(""));
assert_eq!(1, p.instructions.len());
}
#[test]
fn test_program_to_bytes() {
let result = program(CompleteStr("load $0 #100\n"));
assert_eq!(result.is_ok(), true);
let (_, prog) = result.unwrap();
let bytecode = prog.to_bytes();
assert_eq!(bytecode.len(), 4);
println!("{:?}", bytecode);
}
}

View file

@ -0,0 +1,33 @@
#![allow(unused_imports)]
use nom::*;
use nom::{digit, types::CompleteStr};
use crate::assembler::Token;
named!(pub register <CompleteStr, Token>,
ws!(
do_parse!(
tag!("$") >>
reg_num: digit >>
(
Token::Register{
reg_num: reg_num.parse::<u8>().unwrap()
}
)
)
)
);
mod register_parser_tests {
use super::*;
#[test]
fn test_parse_register() {
let result = register(CompleteStr("$0"));
assert_eq!(result.is_ok(), true);
let result = register(CompleteStr("0"));
assert_eq!(result.is_ok(), false);
let result = register(CompleteStr("$a"));
assert_eq!(result.is_ok(), false);
}
}

View file

@ -1,61 +1,6 @@
#![allow(dead_code)]
#[derive(Debug, PartialEq)]
pub enum Opcode {
HLT,
IGL,
LOAD,
ADD,
SUB,
MUL,
DIV,
/// Equal
EQ,
/// Not equal
NEQ,
/// Greater then
GT,
/// Less then
LT,
/// Greater then or equal to
GTE,
/// less then or equal
LTE,
/// jump if equal
JMPE,
/// Jump
JMP,
/// Jump forward
JMPF,
/// Jump backward
JMPB,
NOP,
}
impl From<u8> for Opcode {
fn from(vm: u8) -> Self {
match vm {
0 => Opcode::LOAD,
1 => Opcode::ADD,
2 => Opcode::SUB,
3 => Opcode::MUL,
4 => Opcode::DIV,
5 => Opcode::HLT,
6 => Opcode::JMP,
7 => Opcode::JMPF,
8 => Opcode::JMPB,
9 => Opcode::EQ,
10 => Opcode::NEQ,
11 => Opcode::GTE,
12 => Opcode::GT,
13 => Opcode::LTE,
14 => Opcode::LT,
15 => Opcode::JMPE,
16 => Opcode::NOP,
_ => Opcode::IGL,
}
}
}
use crate::assembler::Opcode;
#[derive(Debug, PartialEq)]
pub struct Instruction {
@ -71,6 +16,7 @@ impl Instruction {
#[cfg(test)]
mod instruction_tests {
use crate::instruction::*;
use crate::assembler::Opcode;
#[test]
fn test_crate_hlt() {

View file

@ -1,6 +1,7 @@
mod assembler;
mod instruction;
mod vm;
mod repl;
mod vm;
use repl::REPL;

View file

@ -1,11 +1,12 @@
use std;
use std::io;
use std::io::Write;
use std::num::ParseIntError;
use crate::vm::VM;
use crate::assembler::program_parser::program;
use metacrate::crate_version;
use rbtag::{BuildDateTime, BuildInfo};
use nom::types::CompleteStr;
#[derive(BuildDateTime, BuildInfo)]
struct BuildTag;
@ -76,37 +77,17 @@ impl REPL {
println!("{:#?}", self.vm.registers);
}
_ => {
let results = self.parse_hex(buffer);
match results {
Ok(bytes) => {
for byte in bytes {
self.vm.add_byte(byte);
}
let prog = match program(buffer.into()) {
Ok((_, prog)) => prog,
Err(_) => {
println!("Unable to parse input.");
continue;
}
Err(_er) => {
println!("Unable to decode hex string. Please enter 4 groups of 2 hex characters.");
}
}
self.vm.run_once();
}
}
}
}
};
/// Accepts the hexadecimal without a leading '0x' and returns a Vec of a
/// u8. Example: 00 01 03 E8
fn parse_hex(&mut self, i: &str) -> Result<Vec<u8>, ParseIntError> {
let split = i.split(" ").collect::<Vec<&str>>();
let mut results: Vec<u8> = vec![];
for hex_string in split {
let byte = u8::from_str_radix(&hex_string, 16);
match byte {
Ok(result) => {
results.push(result);
self.vm.program.append(&mut prog.to_bytes());
}
Err(err) => return Err(err),
}
}
Ok(results)
}
}

View file

@ -1,5 +1,5 @@
#![allow(dead_code)]
use crate::instruction::Opcode;
use crate::assembler::Opcode;
pub struct VM {
/// Array that simulates the hardware register