Merge pull request #2 from tonytins/features/paser

This is the second last part of the assembly parser. It simply adds functionality to the HLT instruction and preliminary support for memory allocation has been added. ## Removal of MIPs While I will be keeping up with the tutorial, the MIPs ISA will no longer be the target goal after discovering that they shutdown their Open Initiative program. But it's just one of many derivatives RISC which is a open design and many famous RISC ISAs, such Power and SPARK, are themselves open. ## Fixes - The VM now executes the whole program until it's stopped. - An instruction that isn't the full 32bit width is now padded.
2020-02-09 20:12:15 -05:00 · 2020-02-09 20:12:15 -05:00 · 1cc7b2ec6c
commit 1cc7b2ec6c
parent 17ab034784 4dfb99568f
12 changed files with 155 additions and 38 deletions
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@ -1,6 +1,6 @@
 name: Rust

-on: [push]
+on: [push, pull_request]

 jobs:
  build:
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # Corten

-Corten is a [MIPS64 Release 6](https://en.wikipedia.org/wiki/MIPS_architecture#MIPS32/MIPS64_Release_6)  virtual machine written in Rust as a hobby and based on Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/) tutorial.
+Corten is a RISC virtual machine written in Rust as a hobby and based on Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/) tutorial.

 ## Build Status

--- a/docs/README.md
+++ b/docs/README.md
@ -1,6 +1,6 @@
 # Welcome

-Corten is a [MIPS64 Release 6](https://en.wikipedia.org/wiki/MIPS_architecture#MIPS32/MIPS64_Release_6)  virtual machine written in Rust as a hobby and based on Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/) tutorial.
+Corten is a RISC virtual machine written in Rust as a hobby and based on Fletcher Haynes's [So you want to build a language VM](https://blog.subnetzero.io/post/building-language-vm-part-01/) tutorial.

 ## Specifications

--- a/docs/spec.md
+++ b/docs/spec.md
@ -1,14 +1,14 @@
 # Specifications

-## Instruction Set 
+## Instruction Set

-| Opcode | Function | Comment |
+| Code | Operation  | Comment |
 | --- | --- | --- |
 | 0 | LOAD | Load program |
-| 1 | ADD |
-| 2 | SUB |
-| 3 | MUL |
-| 4 | DIV |
+| 1 | ADD | Add |
+| 2 | SUB | Subtract
+| 3 | MUL | Multiply |
+| 4 | DIV | Division |
 | 5 | HLT | Halt |
 | 6 | JMP | Jump |
 | 7 | JMPF | Jump forward |
@ -20,5 +20,5 @@
 | 13 | LTE | Less then or equal to |
 | 14 | LT | Less then |
 | 15 | JMPE | Jump if equal |
-| 16 | NOP |
+| 16 | NOP | No Operation |
 | _ | IGL | Illegal action |
--- a/src/assembler.rs
+++ b/src/assembler.rs
@ -1,4 +1,7 @@
 #![allow(dead_code)]
+
+use nom::types::CompleteStr;
+
 pub mod instruction_parser;
 pub mod opcode_parser;
 pub mod operand_parser;
@ -41,6 +44,8 @@ pub enum Opcode {
    JMPF,
    /// Jump backward
    JMPB,
+    /// Allocate memory
+    ALOC,
    NOP,
 }

@ -64,6 +69,7 @@ impl From<u8> for Opcode {
            14 => Opcode::LT,
            15 => Opcode::JMPE,
            16 => Opcode::NOP,
+            17 => Opcode::ALOC,
            _ => Opcode::IGL,
        }
    }
@ -89,7 +95,34 @@ impl From<Opcode> for u8 {
            Opcode::GT => 14,
            Opcode::JMPE => 15,
            Opcode::NOP => 16,
+            Opcode::ALOC => 17,
            Opcode::IGL => 100,
        }
    }
-}
+}
+
+impl<'a> From<CompleteStr<'a>> for Opcode {
+    fn from(v: CompleteStr<'a>) -> Self {
+        match v {
+            CompleteStr("load") => Opcode::LOAD,
+            CompleteStr("add") => Opcode::ADD,
+            CompleteStr("sub") => Opcode::SUB,
+            CompleteStr("mul") => Opcode::MUL,
+            CompleteStr("div") => Opcode::DIV,
+            CompleteStr("hlt") => Opcode::HLT,
+            CompleteStr("jmp") => Opcode::JMP,
+            CompleteStr("jmpf") => Opcode::JMPF,
+            CompleteStr("jmpb") => Opcode::JMPB,
+            CompleteStr("eq") => Opcode::EQ,
+            CompleteStr("neq") => Opcode::NEQ,
+            CompleteStr("gte") => Opcode::GTE,
+            CompleteStr("gt") => Opcode::GT,
+            CompleteStr("lte") => Opcode::LTE,
+            CompleteStr("lt") => Opcode::LT,
+            CompleteStr("jmpe") => Opcode::JMPE,
+            CompleteStr("aloc") => Opcode::ALOC,
+            CompleteStr("nop") => Opcode::NOP,
+            _ => Opcode::IGL,
+        }
+    }
+}
--- a/src/assembler/instruction_parser.rs
+++ b/src/assembler/instruction_parser.rs
@ -1,9 +1,8 @@
-use nom::*;
-use crate::assembler::Token;
-use crate::assembler::opcode_parser::opcode_load;
+use crate::assembler::opcode_parser::opcode;
 use crate::assembler::operand_parser::integer_operand;
 use crate::assembler::register_parser::register;
-
+use crate::assembler::Token;
+use nom::*;
 use nom::types::CompleteStr;

 #[derive(Debug, PartialEq)]
@ -30,13 +29,16 @@ impl AssemblerInstruction {
            }
        };

-
        for operand in &[&self.operand1, &self.operand2, &self.operand3] {
            if let Some(token) = operand {
                AssemblerInstruction::extract_operand(token, &mut results)
            }
        }

+        while results.len() < 4 {
+            results.push(0);
+        }
+
        results
    }

@ -44,7 +46,7 @@ impl AssemblerInstruction {
        match t {
            Token::Register { reg_num } => {
                results.push(*reg_num);
-            },
+            }
            Token::Number { value } => {
                let conv = *value as u16;
                let byte1 = conv;
@ -52,19 +54,45 @@ impl AssemblerInstruction {

                results.push(byte2 as u8);
                results.push(byte1 as u8);
-            },
+            }
            _ => {
                println!("Opcode found in operand field");
                std::process::exit(1);
            }
        };
    }
-
 }

-named!(pub instruction_one<CompleteStr, AssemblerInstruction>,
+named!(instruction_two<CompleteStr, AssemblerInstruction>,
    do_parse!(
-        o: opcode_load >>
+        o: opcode >>
+        opt!(multispace) >>
+        (
+            AssemblerInstruction{
+                opcode: o,
+                operand1: None,
+                operand2: None,
+                operand3: None
+            }
+        )
+    )
+);
+
+named!(pub instruction<CompleteStr, AssemblerInstruction>,
+    do_parse!(
+        ins: alt!(
+            instruction_one |
+            instruction_two
+        ) >>
+        (
+        ins
+        )
+    )
+);
+
+named!(instruction_one<CompleteStr, AssemblerInstruction>,
+    do_parse!(
+        o: opcode >>
        r: register >>
        i: integer_operand >>
        (
@ -99,4 +127,21 @@ mod instruction_parser_test {
            ))
        );
    }
-}
+
+    #[test]
+    fn test_parse_instruction_form_two() {
+        let result = instruction_two(CompleteStr("hlt\n"));
+        assert_eq!(
+            result,
+            Ok((
+                CompleteStr(""),
+                AssemblerInstruction {
+                    opcode: Token::Opcode { code: Opcode::HLT },
+                    operand1: None,
+                    operand2: None,
+                    operand3: None
+                }
+            ))
+        );
+    }
+}
--- a/src/assembler/opcode_parser.rs
+++ b/src/assembler/opcode_parser.rs
@ -1,11 +1,18 @@
 #![allow(unused_imports)]
 use nom::*;
-use nom::{digit, types::CompleteStr};
+use nom::{alpha1, digit, types::CompleteStr};

-use crate::assembler::{Token, Opcode};
+use crate::assembler::{Opcode, Token};

-named!(pub opcode_load<CompleteStr, Token>,
-    do_parse!(tag!("load") >> (Token::Opcode{code: Opcode::LOAD}))
+named!(pub opcode<CompleteStr, Token>,
+  do_parse!(
+      opcode: alpha1 >>
+      (
+        {
+            Token::Opcode{code: Opcode::from(opcode)}
+        }
+      )
+  )
 );

 #[cfg(test)]
@ -15,10 +22,13 @@ mod opcode_parser_test {
    #[test]
    fn test_parser_op_load() {
        // Test that opcode is dected and parsed correctly
-        let result = opcode_load(CompleteStr("load"));
+        let result = opcode(CompleteStr("load"));
        assert_eq!(result.is_ok(), true);
        let (rest, token) = result.unwrap();
        assert_eq!(token, Token::Opcode { code: Opcode::LOAD });
        assert_eq!(rest, CompleteStr(""));
+        let result = opcode(CompleteStr("alod"));
+        let (_, token) = result.unwrap();
+        assert_eq!(token, Token::Opcode { code: Opcode::IGL });
    }
 }
--- a/src/assembler/operand_parser.rs
+++ b/src/assembler/operand_parser.rs
@ -21,7 +21,7 @@ mod reg_parser_test {
    use super::*;

    #[test]
-    fn test_opcode_load() {
+    fn test_opcode() {
        let result = integer_operand(CompleteStr("#10"));
        let (rest, value) = result.unwrap();
        assert_eq!(rest, CompleteStr(""));
--- a/src/assembler/program_parser.rs
+++ b/src/assembler/program_parser.rs
@ -1,7 +1,7 @@
 use nom::types::CompleteStr;
 use nom::*;

-use crate::assembler::instruction_parser::{instruction_one, AssemblerInstruction};
+use crate::assembler::instruction_parser::{instruction, AssemblerInstruction};

 #[derive(Debug, PartialEq)]
 pub struct Program {
@ -20,7 +20,7 @@ impl Program {

 named!(pub program<CompleteStr, Program>,
    do_parse!(
-        instructions: many1!(instruction_one) >>
+        instructions: many1!(instruction) >>
        (
            Program {
                instructions
--- a/src/instruction.rs
+++ b/src/instruction.rs
@ -15,8 +15,9 @@ impl Instruction {

 #[cfg(test)]
 mod instruction_tests {
-    use crate::instruction::*;
    use crate::assembler::Opcode;
+    use crate::instruction::*;
+    use nom::types::CompleteStr;

    #[test]
    fn test_crate_hlt() {
@ -29,4 +30,14 @@ mod instruction_tests {
        let inst = Instruction::new(Opcode::HLT);
        assert_eq!(inst.opcode, Opcode::HLT);
    }
+
+    #[test]
+    fn test_str_to_opcode() {
+        let opcode = Opcode::from(CompleteStr("load"));
+        assert_eq!(opcode, Opcode::LOAD);
+        let opcode = Opcode::from(CompleteStr("add"));
+        assert_eq!(opcode, Opcode::ADD);
+        let opcode = Opcode::from(CompleteStr("illegal"));
+        assert_eq!(opcode, Opcode::IGL);
+    }
 }
--- a/src/repl.rs
+++ b/src/repl.rs
@ -2,11 +2,10 @@ use std;
 use std::io;
 use std::io::Write;

-use crate::vm::VM;
 use crate::assembler::program_parser::program;
+use crate::vm::VM;
 use metacrate::crate_version;
 use rbtag::{BuildDateTime, BuildInfo};
-use nom::types::CompleteStr;

 #[derive(BuildDateTime, BuildInfo)]
 struct BuildTag;
--- a/src/vm.rs
+++ b/src/vm.rs
@ -8,7 +8,9 @@ pub struct VM {
    pc: usize,
    /// The byte of the program being ran
    pub program: Vec<u8>,
-    /// The remainer of the module used in the division opcode
+    /// Memory allocation
+    heap: Vec<u8>,
+    /// The remainder of the module used in the division opcode
    remainder: u32,
    /// The result of the last comparison operation
    equal_flag: bool,
@ -21,6 +23,7 @@ impl VM {
            registers: [0; 32],
            pc: 0,
            program: vec![],
+            heap: vec![],
            remainder: 0,
            equal_flag: false,
        }
@ -71,7 +74,7 @@ impl VM {
    /// various public functions.
    fn execute_instruction(&mut self) -> bool {
        if self.pc >= self.program.len() {
-            return false;
+            return true;
        }
        match self.decode_opcode() {
            Opcode::LOAD => {
@ -81,11 +84,11 @@ impl VM {
            }
            Opcode::HLT => {
                println!("HLT encountered");
-                return false;
+                return true;
            }
            Opcode::IGL => {
                println!("Unrecognized opcode found! Terminating!");
-                return false;
+                return true;
            }
            Opcode::ADD => {
                let reg1 = self.registers[self.next_8_bits() as usize];
@ -204,8 +207,14 @@ impl VM {
                self.next_8_bits();
                self.next_8_bits();
            }
+            Opcode::ALOC => {
+                let reg = self.next_8_bits() as usize;
+                let bytes = self.registers[reg];
+                let new_end = self.heap.len() as i32 + bytes;
+                self.heap.resize(new_end as usize, 0);
+            }
        }
-        true
+        false
    }
 }

@ -260,6 +269,16 @@ mod vm_tests {
        assert_eq!(vm.registers[2], 2);
    }

+    #[test]
+    fn test_aloc_opcode()
+    {
+        let mut vm = get_test_vm();
+        vm.registers[0] = 1024;
+        vm.program = vec![17, 0, 0, 0];
+        vm.run_once();
+        assert_eq!(vm.heap.len(), 1024);
+    }
+
    #[test]
    fn test_eq_opcode() {
        let mut vm = get_test_vm();