Merge pull request #1 from tonytins/features/paser
This is the first part of the MIPs assembly language parser. At the moment, this mostly serves as a proof-of-concept and is only capable of the LOAD instruction.
This commit is contained in:
commit
17ab034784
15 changed files with 380 additions and 97 deletions
25
Cargo.lock
generated
25
Cargo.lock
generated
|
@ -2,17 +2,34 @@
|
|||
# It is not intended for manual editing.
|
||||
[[package]]
|
||||
name = "corten"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"metacrate",
|
||||
"nom",
|
||||
"rbtag",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3197e20c7edb283f87c071ddfc7a2cca8f8e0b888c242959846a6fce03c72223"
|
||||
|
||||
[[package]]
|
||||
name = "metacrate"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/tonytins/metacrate#efab304454bd48946d52efa0912b65898f37b1c9"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "4.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "0.4.30"
|
||||
|
@ -67,3 +84,9 @@ name = "unicode-xid"
|
|||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
[package]
|
||||
name = "corten"
|
||||
version = "0.1.0"
|
||||
authors = ["Anthony Foxclaw <35226681+tonytins@users.noreply.github.com>"]
|
||||
version = "0.1.1"
|
||||
authors = ["Anthony Foxclaw"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
rbtag = "0.3"
|
||||
nom = "4.2"
|
||||
metacrate = { git = "https://github.com/tonytins/metacrate" }
|
|
@ -1,6 +1,6 @@
|
|||
# Corten
|
||||
|
||||
Corten is a stack-based virtual machine written in Rust as a hobby. It's based on the [Iridium 1](docs/spec.md#Iridium) architecture from Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/).
|
||||
Corten is a [MIPS64 Release 6](https://en.wikipedia.org/wiki/MIPS_architecture#MIPS32/MIPS64_Release_6) virtual machine written in Rust as a hobby and based on Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/) tutorial.
|
||||
|
||||
## Build Status
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# Welcome
|
||||
|
||||
Corten is a stack-based virtual machine written in Rust as a hobby. It's based on the [Iridium 1](spec.md#Iridium) architecture from Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/).
|
||||
Corten is a [MIPS64 Release 6](https://en.wikipedia.org/wiki/MIPS_architecture#MIPS32/MIPS64_Release_6) virtual machine written in Rust as a hobby and based on Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/) tutorial.
|
||||
|
||||
## Specifications
|
||||
|
||||
|
|
|
@ -1,9 +1,5 @@
|
|||
# Specifications
|
||||
|
||||
## Iridium
|
||||
|
||||
Corten is based on Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/) tutorial. His virtual machine used for the tutorial is known as [Iridium](https://github.com/fhaynes/iridium) and is based on the [MIPS64 Release 6](https://en.wikipedia.org/wiki/MIPS_architecture#MIPS32/MIPS64_Release_6) architecture. Corten aims to be Iridium 1-compatible.
|
||||
|
||||
## Instruction Set
|
||||
|
||||
| Opcode | Function | Comment |
|
||||
|
|
95
src/assembler.rs
Normal file
95
src/assembler.rs
Normal file
|
@ -0,0 +1,95 @@
|
|||
#![allow(dead_code)]
|
||||
pub mod instruction_parser;
|
||||
pub mod opcode_parser;
|
||||
pub mod operand_parser;
|
||||
pub mod program_parser;
|
||||
pub mod register_parser;
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Token {
|
||||
Opcode { code: Opcode },
|
||||
Register { reg_num: u8 },
|
||||
Number { value: i32 },
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Opcode {
|
||||
HLT,
|
||||
IGL,
|
||||
LOAD,
|
||||
ADD,
|
||||
SUB,
|
||||
MUL,
|
||||
DIV,
|
||||
/// Equal
|
||||
EQ,
|
||||
/// Not equal
|
||||
NEQ,
|
||||
/// Greater then
|
||||
GT,
|
||||
/// Less then
|
||||
LT,
|
||||
/// Greater then or equal to
|
||||
GTE,
|
||||
/// less then or equal
|
||||
LTE,
|
||||
/// jump if equal
|
||||
JMPE,
|
||||
/// Jump
|
||||
JMP,
|
||||
/// Jump forward
|
||||
JMPF,
|
||||
/// Jump backward
|
||||
JMPB,
|
||||
NOP,
|
||||
}
|
||||
|
||||
impl From<u8> for Opcode {
|
||||
fn from(vm: u8) -> Self {
|
||||
match vm {
|
||||
0 => Opcode::LOAD,
|
||||
1 => Opcode::ADD,
|
||||
2 => Opcode::SUB,
|
||||
3 => Opcode::MUL,
|
||||
4 => Opcode::DIV,
|
||||
5 => Opcode::HLT,
|
||||
6 => Opcode::JMP,
|
||||
7 => Opcode::JMPF,
|
||||
8 => Opcode::JMPB,
|
||||
9 => Opcode::EQ,
|
||||
10 => Opcode::NEQ,
|
||||
11 => Opcode::GTE,
|
||||
12 => Opcode::GT,
|
||||
13 => Opcode::LTE,
|
||||
14 => Opcode::LT,
|
||||
15 => Opcode::JMPE,
|
||||
16 => Opcode::NOP,
|
||||
_ => Opcode::IGL,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Opcode> for u8 {
|
||||
fn from(op: Opcode) -> Self {
|
||||
match op {
|
||||
Opcode::LOAD => 0,
|
||||
Opcode::ADD => 1,
|
||||
Opcode::SUB => 2,
|
||||
Opcode::MUL => 3,
|
||||
Opcode::DIV => 4,
|
||||
Opcode::HLT => 5,
|
||||
Opcode::JMP => 6,
|
||||
Opcode::JMPF => 7,
|
||||
Opcode::JMPB => 8,
|
||||
Opcode::EQ => 9,
|
||||
Opcode::NEQ => 10,
|
||||
Opcode::GTE => 11,
|
||||
Opcode::LTE => 12,
|
||||
Opcode::LT => 13,
|
||||
Opcode::GT => 14,
|
||||
Opcode::JMPE => 15,
|
||||
Opcode::NOP => 16,
|
||||
Opcode::IGL => 100,
|
||||
}
|
||||
}
|
||||
}
|
102
src/assembler/instruction_parser.rs
Normal file
102
src/assembler/instruction_parser.rs
Normal file
|
@ -0,0 +1,102 @@
|
|||
use nom::*;
|
||||
use crate::assembler::Token;
|
||||
use crate::assembler::opcode_parser::opcode_load;
|
||||
use crate::assembler::operand_parser::integer_operand;
|
||||
use crate::assembler::register_parser::register;
|
||||
|
||||
use nom::types::CompleteStr;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct AssemblerInstruction {
|
||||
opcode: Token,
|
||||
operand1: Option<Token>,
|
||||
operand2: Option<Token>,
|
||||
operand3: Option<Token>,
|
||||
}
|
||||
|
||||
impl AssemblerInstruction {
|
||||
pub fn to_bytes(&self) -> Vec<u8> {
|
||||
let mut results = vec![];
|
||||
|
||||
match self.opcode.to_owned() {
|
||||
Token::Opcode { code } => match code {
|
||||
_ => {
|
||||
results.push(code.into());
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
println!("Incorrect opcode!");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
for operand in &[&self.operand1, &self.operand2, &self.operand3] {
|
||||
if let Some(token) = operand {
|
||||
AssemblerInstruction::extract_operand(token, &mut results)
|
||||
}
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
fn extract_operand(t: &Token, results: &mut Vec<u8>) {
|
||||
match t {
|
||||
Token::Register { reg_num } => {
|
||||
results.push(*reg_num);
|
||||
},
|
||||
Token::Number { value } => {
|
||||
let conv = *value as u16;
|
||||
let byte1 = conv;
|
||||
let byte2 = conv >> 8;
|
||||
|
||||
results.push(byte2 as u8);
|
||||
results.push(byte1 as u8);
|
||||
},
|
||||
_ => {
|
||||
println!("Opcode found in operand field");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
named!(pub instruction_one<CompleteStr, AssemblerInstruction>,
|
||||
do_parse!(
|
||||
o: opcode_load >>
|
||||
r: register >>
|
||||
i: integer_operand >>
|
||||
(
|
||||
AssemblerInstruction{
|
||||
opcode: o,
|
||||
operand1: Some(r),
|
||||
operand2: Some(i),
|
||||
operand3: None
|
||||
}
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod instruction_parser_test {
|
||||
use super::*;
|
||||
use crate::assembler::Opcode;
|
||||
|
||||
#[test]
|
||||
fn test_parse_instruction_form_one() {
|
||||
let result = instruction_one(CompleteStr("load $0 #100\n"));
|
||||
assert_eq!(
|
||||
result,
|
||||
Ok((
|
||||
CompleteStr(""),
|
||||
AssemblerInstruction {
|
||||
opcode: Token::Opcode { code: Opcode::LOAD },
|
||||
operand1: Some(Token::Register { reg_num: 0 }),
|
||||
operand2: Some(Token::Number { value: 100 }),
|
||||
operand3: None
|
||||
}
|
||||
))
|
||||
);
|
||||
}
|
||||
}
|
24
src/assembler/opcode_parser.rs
Normal file
24
src/assembler/opcode_parser.rs
Normal file
|
@ -0,0 +1,24 @@
|
|||
#![allow(unused_imports)]
|
||||
use nom::*;
|
||||
use nom::{digit, types::CompleteStr};
|
||||
|
||||
use crate::assembler::{Token, Opcode};
|
||||
|
||||
named!(pub opcode_load<CompleteStr, Token>,
|
||||
do_parse!(tag!("load") >> (Token::Opcode{code: Opcode::LOAD}))
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod opcode_parser_test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parser_op_load() {
|
||||
// Test that opcode is dected and parsed correctly
|
||||
let result = opcode_load(CompleteStr("load"));
|
||||
assert_eq!(result.is_ok(), true);
|
||||
let (rest, token) = result.unwrap();
|
||||
assert_eq!(token, Token::Opcode { code: Opcode::LOAD });
|
||||
assert_eq!(rest, CompleteStr(""));
|
||||
}
|
||||
}
|
30
src/assembler/operand_parser.rs
Normal file
30
src/assembler/operand_parser.rs
Normal file
|
@ -0,0 +1,30 @@
|
|||
#![allow(unused_imports)]
|
||||
use nom::*;
|
||||
use nom::{digit, types::CompleteStr};
|
||||
|
||||
use crate::assembler::Token;
|
||||
|
||||
named!(pub integer_operand<CompleteStr, Token>,
|
||||
ws!(
|
||||
do_parse!(
|
||||
tag!("#") >>
|
||||
reg_num: digit >>
|
||||
(
|
||||
Token::Number{value: reg_num.parse::<i32>().unwrap()}
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod reg_parser_test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_opcode_load() {
|
||||
let result = integer_operand(CompleteStr("#10"));
|
||||
let (rest, value) = result.unwrap();
|
||||
assert_eq!(rest, CompleteStr(""));
|
||||
assert_eq!(value, Token::Number { value: 10 });
|
||||
}
|
||||
}
|
53
src/assembler/program_parser.rs
Normal file
53
src/assembler/program_parser.rs
Normal file
|
@ -0,0 +1,53 @@
|
|||
use nom::types::CompleteStr;
|
||||
use nom::*;
|
||||
|
||||
use crate::assembler::instruction_parser::{instruction_one, AssemblerInstruction};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Program {
|
||||
instructions: Vec<AssemblerInstruction>,
|
||||
}
|
||||
|
||||
impl Program {
|
||||
pub fn to_bytes(&self) -> Vec<u8> {
|
||||
let mut prog = vec![];
|
||||
for instruction in &self.instructions {
|
||||
prog.append(&mut instruction.to_bytes());
|
||||
}
|
||||
prog
|
||||
}
|
||||
}
|
||||
|
||||
named!(pub program<CompleteStr, Program>,
|
||||
do_parse!(
|
||||
instructions: many1!(instruction_one) >>
|
||||
(
|
||||
Program {
|
||||
instructions
|
||||
}
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod instruction_parser_test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_instruction() {
|
||||
let result = program(CompleteStr("load $0 #100\n"));
|
||||
let (leftover, p) = result.unwrap();
|
||||
assert_eq!(leftover, CompleteStr(""));
|
||||
assert_eq!(1, p.instructions.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_program_to_bytes() {
|
||||
let result = program(CompleteStr("load $0 #100\n"));
|
||||
assert_eq!(result.is_ok(), true);
|
||||
let (_, prog) = result.unwrap();
|
||||
let bytecode = prog.to_bytes();
|
||||
assert_eq!(bytecode.len(), 4);
|
||||
println!("{:?}", bytecode);
|
||||
}
|
||||
}
|
33
src/assembler/register_parser.rs
Normal file
33
src/assembler/register_parser.rs
Normal file
|
@ -0,0 +1,33 @@
|
|||
#![allow(unused_imports)]
|
||||
use nom::*;
|
||||
use nom::{digit, types::CompleteStr};
|
||||
|
||||
use crate::assembler::Token;
|
||||
|
||||
named!(pub register <CompleteStr, Token>,
|
||||
ws!(
|
||||
do_parse!(
|
||||
tag!("$") >>
|
||||
reg_num: digit >>
|
||||
(
|
||||
Token::Register{
|
||||
reg_num: reg_num.parse::<u8>().unwrap()
|
||||
}
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
mod register_parser_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_register() {
|
||||
let result = register(CompleteStr("$0"));
|
||||
assert_eq!(result.is_ok(), true);
|
||||
let result = register(CompleteStr("0"));
|
||||
assert_eq!(result.is_ok(), false);
|
||||
let result = register(CompleteStr("$a"));
|
||||
assert_eq!(result.is_ok(), false);
|
||||
}
|
||||
}
|
|
@ -1,61 +1,6 @@
|
|||
#![allow(dead_code)]
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Opcode {
|
||||
HLT,
|
||||
IGL,
|
||||
LOAD,
|
||||
ADD,
|
||||
SUB,
|
||||
MUL,
|
||||
DIV,
|
||||
/// Equal
|
||||
EQ,
|
||||
/// Not equal
|
||||
NEQ,
|
||||
/// Greater then
|
||||
GT,
|
||||
/// Less then
|
||||
LT,
|
||||
/// Greater then or equal to
|
||||
GTE,
|
||||
/// less then or equal
|
||||
LTE,
|
||||
/// jump if equal
|
||||
JMPE,
|
||||
/// Jump
|
||||
JMP,
|
||||
/// Jump forward
|
||||
JMPF,
|
||||
/// Jump backward
|
||||
JMPB,
|
||||
NOP,
|
||||
}
|
||||
|
||||
impl From<u8> for Opcode {
|
||||
fn from(vm: u8) -> Self {
|
||||
match vm {
|
||||
0 => Opcode::LOAD,
|
||||
1 => Opcode::ADD,
|
||||
2 => Opcode::SUB,
|
||||
3 => Opcode::MUL,
|
||||
4 => Opcode::DIV,
|
||||
5 => Opcode::HLT,
|
||||
6 => Opcode::JMP,
|
||||
7 => Opcode::JMPF,
|
||||
8 => Opcode::JMPB,
|
||||
9 => Opcode::EQ,
|
||||
10 => Opcode::NEQ,
|
||||
11 => Opcode::GTE,
|
||||
12 => Opcode::GT,
|
||||
13 => Opcode::LTE,
|
||||
14 => Opcode::LT,
|
||||
15 => Opcode::JMPE,
|
||||
16 => Opcode::NOP,
|
||||
_ => Opcode::IGL,
|
||||
}
|
||||
}
|
||||
}
|
||||
use crate::assembler::Opcode;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Instruction {
|
||||
|
@ -71,6 +16,7 @@ impl Instruction {
|
|||
#[cfg(test)]
|
||||
mod instruction_tests {
|
||||
use crate::instruction::*;
|
||||
use crate::assembler::Opcode;
|
||||
|
||||
#[test]
|
||||
fn test_crate_hlt() {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
mod assembler;
|
||||
mod instruction;
|
||||
mod vm;
|
||||
mod repl;
|
||||
mod vm;
|
||||
|
||||
use repl::REPL;
|
||||
|
||||
|
|
37
src/repl.rs
37
src/repl.rs
|
@ -1,11 +1,12 @@
|
|||
use std;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::num::ParseIntError;
|
||||
|
||||
use crate::vm::VM;
|
||||
use crate::assembler::program_parser::program;
|
||||
use metacrate::crate_version;
|
||||
use rbtag::{BuildDateTime, BuildInfo};
|
||||
use nom::types::CompleteStr;
|
||||
|
||||
#[derive(BuildDateTime, BuildInfo)]
|
||||
struct BuildTag;
|
||||
|
@ -76,37 +77,17 @@ impl REPL {
|
|||
println!("{:#?}", self.vm.registers);
|
||||
}
|
||||
_ => {
|
||||
let results = self.parse_hex(buffer);
|
||||
match results {
|
||||
Ok(bytes) => {
|
||||
for byte in bytes {
|
||||
self.vm.add_byte(byte);
|
||||
}
|
||||
let prog = match program(buffer.into()) {
|
||||
Ok((_, prog)) => prog,
|
||||
Err(_) => {
|
||||
println!("Unable to parse input.");
|
||||
continue;
|
||||
}
|
||||
Err(_er) => {
|
||||
println!("Unable to decode hex string. Please enter 4 groups of 2 hex characters.");
|
||||
}
|
||||
}
|
||||
self.vm.run_once();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// Accepts the hexadecimal without a leading '0x' and returns a Vec of a
|
||||
/// u8. Example: 00 01 03 E8
|
||||
fn parse_hex(&mut self, i: &str) -> Result<Vec<u8>, ParseIntError> {
|
||||
let split = i.split(" ").collect::<Vec<&str>>();
|
||||
let mut results: Vec<u8> = vec![];
|
||||
for hex_string in split {
|
||||
let byte = u8::from_str_radix(&hex_string, 16);
|
||||
match byte {
|
||||
Ok(result) => {
|
||||
results.push(result);
|
||||
self.vm.program.append(&mut prog.to_bytes());
|
||||
}
|
||||
Err(err) => return Err(err),
|
||||
}
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#![allow(dead_code)]
|
||||
use crate::instruction::Opcode;
|
||||
use crate::assembler::Opcode;
|
||||
|
||||
pub struct VM {
|
||||
/// Array that simulates the hardware register
|
||||
|
|
Reference in a new issue