diff --git a/.gitignore b/.gitignore index a21312c..28ac70d 100644 --- a/.gitignore +++ b/.gitignore @@ -104,4 +104,6 @@ Mkfile.old dkms.conf # project specific -out/ \ No newline at end of file +out/ +.ccls-cache/ +.vscode/ \ No newline at end of file diff --git a/arch/linux/gui/main.c b/arch/linux/gui/main.c index 7ecb7ea..15445e3 100644 --- a/arch/linux/gui/main.c +++ b/arch/linux/gui/main.c @@ -33,7 +33,5 @@ i32 main() { while(step_vm()) { // do stuff } - - printf("done\n"); return 0; } diff --git a/arch/linux/tui/main.c b/arch/linux/tui/main.c index d14f507..8ee30bf 100644 --- a/arch/linux/tui/main.c +++ b/arch/linux/tui/main.c @@ -14,23 +14,37 @@ bool init_vm() { mp = 0; cp = 0; pc = 0; - interrupt = 0; + interrupt = 0; return true; } u32 syscall(u32 id, u32 args, u32 mem_ptr) { - USED(id); USED(args); - USED(mem_ptr); - return 0; // success + switch(id) { + case SYSCALL_DBG_PRINT: { + printf("%d\n", mem[mem_ptr]); + return 0; + } + } + + return 1; // generic error } i32 main() { + init_vm(); + + // hardcoded add 2 numbers and print debug + int a = mp; mp+=4; + int b = mp; mp+=4; + int c = mp; mp+=4; + code[cp++] = ENCODE_B(OP_LOAD_IMM, a, 1); + code[cp++] = ENCODE_B(OP_LOAD_IMM, b, 2); + code[cp++] = ENCODE_A(OP_ADD_INT, c, b, a); + code[cp++] = ENCODE_A(OP_SYSCALL, SYSCALL_DBG_PRINT, 1, c); + code[cp++] = ENCODE_A(OP_HALT, 0, 0, 0); while(step_vm()) { // do stuff } - - printf("done\n"); return 0; } diff --git a/tools/assembler/assembler.c b/tools/assembler/assembler.c new file mode 100644 index 0000000..e51785e --- /dev/null +++ b/tools/assembler/assembler.c @@ -0,0 +1,7 @@ +#include "assembler.h" +/** + * Emit bytecode to the VM from the source string. + */ +void assemble(char *source, ScopeTable *st) { + +} diff --git a/tools/assembler/assembler.h b/tools/assembler/assembler.h new file mode 100644 index 0000000..6ce7cc4 --- /dev/null +++ b/tools/assembler/assembler.h @@ -0,0 +1,55 @@ +#ifndef UNDAR_IR_ASSEMBLER_H +#define UNDAR_IR_ASSEMBLER_H + +#include "../../vm/libc.h" +#include "lexer.h" + +typedef enum { GLOBAL, LOCAL, VAR } ScopeType; +typedef enum { + VOID, + BOOL, + I8, + I16, + I32, + U8, + U16, + U32, + F8, + F16, + F32, + STR, + PLEX, + ARRAY, + FUNCTION +} SymbolType; + +typedef struct symbol_s Symbol; +typedef struct symbol_tab_s SymbolTable; +typedef struct scope_tab_s ScopeTable; + +#define MAX_SYMBOL_NAME_LENGTH 64 +struct symbol_s { + char name[MAX_SYMBOL_NAME_LENGTH]; + u8 name_length; + SymbolType type; + ScopeType scope; + u32 ref; // vm->mp if global, vm->pc local, register if var + u32 size; // size of symbol +}; + +struct symbol_tab_s { + Symbol symbols[256]; + u8 count; + i32 parent; +}; + +struct scope_tab_s { + SymbolTable *scopes; + u32 count; + u32 capacity; + i32 scope_ref; +}; + +void assemble(char *source, ScopeTable *st); + +#endif diff --git a/tools/assembler/lexer.c b/tools/assembler/lexer.c new file mode 100644 index 0000000..cd62088 --- /dev/null +++ b/tools/assembler/lexer.c @@ -0,0 +1,401 @@ +#include + +#include "../../vm/libc.h" +#include "lexer.h" + +typedef struct { + const char *start; + const char *current; + i32 line; +} Lexer; + +Lexer lexer; + +void init_lexer(const char *source) { + lexer.start = source; + lexer.current = source; + lexer.line = 1; +} + +static bool is_alpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; +} + +static bool is_digit(char c) { return c >= '0' && c <= '9'; } + +static bool is_at_end() { return *lexer.current == '\0'; } + +static char advance() { + lexer.current++; + return lexer.current[-1]; +} + +static char peek() { return *lexer.current; } + +static char peek_next() { + if (is_at_end()) + return '\0'; + return lexer.current[1]; +} + +static bool match(char expected) { + if (is_at_end()) + return false; + if (*lexer.current != expected) + return false; + lexer.current++; + return true; +} + +static Token make_token(TokenType type) { + Token token; + token.type = type; + token.start = lexer.start; + token.length = (i32)(lexer.current - lexer.start); + token.line = lexer.line; + return token; +} + +static Token error_token(const char *message) { + Token token; + token.type = TOKEN_ERROR; + token.start = message; + token.length = (i32)strlen(message); + token.line = lexer.line; + return token; +} + +static void skip_whitespace() { + for (;;) { + char c = peek(); + switch (c) { + case ' ': + case '\r': + case '\t': + advance(); + break; + case '\n': + lexer.line++; + advance(); + break; + case '/': + if (peek_next() == '/') { + // Single-line comment: skip until newline or end of file + advance(); + while (peek() != '\n' && !is_at_end()) + advance(); + } else if (peek_next() == '*') { + // Multi-line comment: skip until '*/' or end of file + advance(); + advance(); + while (!is_at_end()) { + if (peek() == '\n') + lexer.line++; + if (peek() == '*' && peek_next() == '/') { + advance(); + advance(); + break; // Exit loop, comment ended + } + advance(); + } + } else { + return; // Not a comment, let tokenization handle it + } + break; + default: + return; + } + } +} + +static TokenType check_keyword(i32 start, i32 length, const char *rest, + TokenType type) { + if (lexer.current - lexer.start == start + length && + memcmp(lexer.start + start, rest, length) == 0) { + return type; + } + + return TOKEN_IDENTIFIER; +} + +static TokenType identifierType() { + switch (lexer.start[0]) { + case 'a': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'n': + return check_keyword(2, 1, "d", TOKEN_OPERATOR_AND); + case 's': + return check_keyword(2, 0, "", TOKEN_KEYWORD_AS); + } + } + break; + case 'c': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'l': + return check_keyword(2, 3, "ose", TOKEN_KEYWORD_CLOSE); + case 'o': + return check_keyword(2, 3, "nst", TOKEN_KEYWORD_CONST); + } + } + break; + case 'e': + return check_keyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); + case 'f': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'a': + return check_keyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); + case 'o': + return check_keyword(2, 1, "r", TOKEN_KEYWORD_FOR); + case '3': + return check_keyword(1, 1, "2", TOKEN_TYPE_REAL); + } + return check_keyword(1, 7, "unction", TOKEN_KEYWORD_FN); + } + break; + case 'i': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'f': + return check_keyword(2, 0, "", TOKEN_KEYWORD_IF); + case 's': + return check_keyword(2, 0, "", TOKEN_KEYWORD_IS); + case '8': + return check_keyword(2, 0, "", TOKEN_TYPE_I8); + case '1': + return check_keyword(2, 1, "6", TOKEN_TYPE_I16); + case '3': + return check_keyword(2, 1, "2", TOKEN_TYPE_INT); + case 'n': + if (lexer.current - lexer.start > 2) { + switch (lexer.start[2]) { + case 'i': + return check_keyword(3, 2, "t", TOKEN_KEYWORD_INIT); + case 't': + return check_keyword(3, 0, "", TOKEN_TYPE_INT); + } + } + break; + } + } + break; + case 'n': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'a': + return check_keyword(2, 1, "t", TOKEN_TYPE_NAT); + case 'i': + return check_keyword(2, 1, "l", TOKEN_KEYWORD_NIL); + } + } + break; + case 'o': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'p': + return check_keyword(2, 2, "en", TOKEN_KEYWORD_OPEN); + case 'r': + return check_keyword(2, 0, "", TOKEN_OPERATOR_OR); + } + } + break; + case 'p': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'l': + return check_keyword(2, 2, "ex", TOKEN_KEYWORD_PLEX); + } + } + break; + case 'r': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'e': + if (lexer.current - lexer.start > 2) { + switch (lexer.start[2]) { + case 'f': + return check_keyword(3, 4, "resh", TOKEN_KEYWORD_REFRESH); + case 't': + return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); + case 'a': + if (lexer.current - lexer.start > 3) { + switch(lexer.start[3]) { + case 'd': + return check_keyword(4, 0, "", TOKEN_KEYWORD_READ); + case 'l': + return check_keyword(4, 0, "", TOKEN_TYPE_REAL); + } + } + } + } + break; + } + } + break; + case 's': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 't': + return check_keyword(2, 1, "r", TOKEN_TYPE_STR); + } + } + break; + case 't': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'h': + return check_keyword(2, 2, "is", TOKEN_KEYWORD_THIS); + case 'r': + return check_keyword(2, 2, "ue", TOKEN_KEYWORD_TRUE); + } + } + break; + case 'u': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 's': + return check_keyword(2, 1, "e", TOKEN_KEYWORD_USE); + case '8': + return check_keyword(2, 0, "", TOKEN_TYPE_U8); + case '1': + return check_keyword(2, 1, "6", TOKEN_TYPE_U16); + case '3': + return check_keyword(2, 1, "2", TOKEN_TYPE_NAT); + } + } + break; + case 'w': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'h': + return check_keyword(2, 3, "ile", TOKEN_KEYWORD_WHILE); + case 'r': + return check_keyword(2, 3, "ite", TOKEN_KEYWORD_WRITE); + } + } + break; + case 'b': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'y': + return check_keyword(2, 2, "te", TOKEN_TYPE_U8); + case 'o': + return check_keyword(2, 2, "ol", TOKEN_TYPE_U8); + } + } + break; + case 'g': + return check_keyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); + case 'l': + return check_keyword(1, 3, "oop", TOKEN_KEYWORD_LOOP); + case 'd': + return check_keyword(1, 1, "o", TOKEN_KEYWORD_DO); + case 'v': + return check_keyword(1, 3, "oid", TOKEN_TYPE_VOID); + } + + return TOKEN_IDENTIFIER; +} + +static Token identifier() { + while (is_alpha(peek()) || is_digit(peek())) + advance(); + return make_token(identifierType()); +} + +static Token number() { + while (is_digit(peek())) + advance(); + + /* Look for a fractional part. */ + if (peek() == '.' && is_digit(peek_next())) { + /* Consume the ".". */ + advance(); + + while (is_digit(peek())) + advance(); + + return make_token(TOKEN_LITERAL_REAL); + } + + return make_token(TOKEN_LITERAL_INT); +} + +static Token string() { + while (peek() != '"' && !is_at_end()) { + if (peek() == '\n') + lexer.line++; + advance(); + } + + if (is_at_end()) + return error_token("Unterminated string."); + + /* The closing quote. */ + advance(); + return make_token(TOKEN_LITERAL_STR); +} + +Token next_token() { + skip_whitespace(); + lexer.start = lexer.current; + + if (is_at_end()) + return make_token(TOKEN_EOF); + + char c = advance(); + if (is_alpha(c)) + return identifier(); + char next = peek(); + if ((c == '-' && is_digit(next)) || is_digit(c)) + return number(); + + switch (c) { + case '(': + return make_token(TOKEN_LPAREN); + case ')': + return make_token(TOKEN_RPAREN); + case '{': + return make_token(TOKEN_LBRACE); + case '}': + return make_token(TOKEN_RBRACE); + case '[': + return make_token(TOKEN_LBRACKET); + case ']': + return make_token(TOKEN_RBRACKET); + case ';': + return make_token(TOKEN_SEMICOLON); + case ',': + return make_token(TOKEN_COMMA); + case '.': + return make_token(TOKEN_DOT); + case '-': + return make_token(match('>') ? TOKEN_ARROW_RIGHT : TOKEN_MINUS); + case '+': + return make_token(TOKEN_PLUS); + case '/': + return make_token(TOKEN_SLASH); + case '&': + return make_token(match('&') ? TOKEN_AND_AND : TOKEN_AND); + case '#': + return make_token(TOKEN_MESH); + case '$': + return make_token(TOKEN_BIG_MONEY); + case '*': + return make_token(TOKEN_STAR); + case '!': + return make_token(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG); + case '=': + return make_token(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ); + case '<': + return make_token(match('=') ? TOKEN_LTE : TOKEN_LT); + case '>': + return make_token(match('=') ? TOKEN_GTE : TOKEN_GT); + case '"': + return string(); + } + + return error_token("Unexpected character."); +} diff --git a/tools/assembler/lexer.h b/tools/assembler/lexer.h new file mode 100644 index 0000000..7f352f8 --- /dev/null +++ b/tools/assembler/lexer.h @@ -0,0 +1,88 @@ +#ifndef UNDAR_LEXER_H +#define UNDAR_LEXER_H + +typedef enum { + TOKEN_EOF, + TOKEN_IDENTIFIER, + TOKEN_LITERAL_INT, + TOKEN_LITERAL_NAT, + TOKEN_LITERAL_REAL, + TOKEN_LITERAL_STR, + TOKEN_TYPE_I8, + TOKEN_TYPE_I16, + TOKEN_TYPE_INT, + TOKEN_TYPE_U8, + TOKEN_TYPE_U16, + TOKEN_TYPE_NAT, + TOKEN_TYPE_REAL, + TOKEN_TYPE_STR, + TOKEN_TYPE_BOOL, + TOKEN_TYPE_VOID, + TOKEN_KEYWORD_PLEX, + TOKEN_KEYWORD_FN, + TOKEN_KEYWORD_CONST, + TOKEN_KEYWORD_IF, + TOKEN_KEYWORD_IS, + TOKEN_KEYWORD_AS, + TOKEN_KEYWORD_ELSE, + TOKEN_KEYWORD_WHILE, + TOKEN_KEYWORD_FOR, + TOKEN_KEYWORD_RETURN, + TOKEN_KEYWORD_USE, + TOKEN_KEYWORD_INIT, + TOKEN_KEYWORD_THIS, + TOKEN_KEYWORD_GLOBAL, + TOKEN_KEYWORD_OPEN, + TOKEN_KEYWORD_READ, + TOKEN_KEYWORD_WRITE, + TOKEN_KEYWORD_REFRESH, + TOKEN_KEYWORD_CLOSE, + TOKEN_KEYWORD_LOOP, + TOKEN_KEYWORD_DO, + TOKEN_KEYWORD_NIL, + TOKEN_KEYWORD_TRUE, + TOKEN_KEYWORD_FALSE, + TOKEN_OPERATOR_NOT, + TOKEN_OPERATOR_AND, + TOKEN_OPERATOR_OR, + TOKEN_BANG, + TOKEN_BANG_EQ, + TOKEN_EQ, + TOKEN_EQ_EQ, + TOKEN_AND, + TOKEN_AND_AND, + TOKEN_GT, + TOKEN_LT, + TOKEN_GTE, + TOKEN_LTE, + TOKEN_DOT, + TOKEN_COMMA, + TOKEN_COLON, + TOKEN_SEMICOLON, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_STAR, + TOKEN_SLASH, + TOKEN_MESH, + TOKEN_BIG_MONEY, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_LBRACE, + TOKEN_RBRACE, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_ARROW_RIGHT, + TOKEN_ERROR +} TokenType; + +typedef struct { + TokenType type; + const char *start; + int length; + int line; +} Token; + +void init_lexer(const char *source); +Token next_token(); + +#endif diff --git a/vm/vm.c b/vm/vm.c index f859f59..dd1348c 100644 --- a/vm/vm.c +++ b/vm/vm.c @@ -11,7 +11,7 @@ u8 *mem; /* memory */ bool step_vm() { - u32 instruction = code[pc]; + u32 instruction = code[pc++]; u8 opcode = DECODE_OP(instruction); switch (opcode) { @@ -20,8 +20,20 @@ bool step_vm() { return false; } case OP_CALL: { + /* DECODE_A(instruction) */ + /* push return address to child frame */ + /* push local address to return the value to */ + /* push current mp value to reset the heap to */ + /* move mp by args many locals */ + /* jump to dest_ptr */ + return true; } case OP_RETURN: { + /* DECODE_A(instruction) */ + /* copy return value to parent return local */ + /* reset mp to saved mp */ + /* jump to parent frame */ + return true; } case OP_SYSCALL: { DECODE_A(instruction) @@ -34,62 +46,205 @@ bool step_vm() { case OP_LOAD_IMM: { DECODE_B(instruction) u32 rd = fp + dest; - mem[rd] = imm; + WRITE_U32(rd, imm); return true; } case OP_LOAD_UPPER_IMM: { DECODE_B(instruction) u32 rd = fp + dest; - mem[rd] = (mem[rd] | (((u32)(imm)) << 16)); + u32 value = READ_U32(rd); + WRITE_U32(rd, (value | (((u32)(imm)) << 16))); return true; } - case OP_LOAD_IND_8: { - } - case OP_LOAD_IND_16: { - } - case OP_LOAD_IND_32: { - } - case OP_LOAD_ABS_8: { - } - case OP_LOAD_ABS_16: { - } - case OP_LOAD_ABS_32: { - } - case OP_LOAD_OFF_8: { - } - case OP_LOAD_OFF_16: { - } - case OP_LOAD_OFF_32: { - } - case OP_STORE_ABS_8: { - } - case OP_STORE_ABS_16: { - } - case OP_STORE_ABS_32: { - } - case OP_STORE_IND_8: { - } - case OP_STORE_IND_16: { - } - case OP_STORE_IND_32: { - } - case OP_STORE_OFF_8: { - } - case OP_STORE_OFF_16: { - } - case OP_STORE_OFF_32: { - } case OP_MEM_ALLOC: { + DECODE_A(instruction) + u32 size, ldest; + u32 rd = fp + dest; + u32 r1 = fp + src1; + USED(src2); + ldest = READ_U32(rd); + WRITE_U32(ldest, mp); + size = READ_U32(r1); + WRITE_U32(mp, size); + mp += (size + 4); + return true; } - case OP_MEM_CPY: { + case OP_MEM_CPY_8: { + DECODE_A(instruction) + u32 i, count, mdest, msrc; + + u32 rd = fp + dest; + u32 r1 = fp + src1; + u32 r2 = fp + src2; + + mdest = READ_U32(rd); + msrc = READ_U32(r1); + count = READ_U32(r2); + + if (mdest + count >= mp) { + flag = 1; + return true; + } + + for (i = 0; i < count; i ++) { + u8 value = READ_U8(mdest + i); + WRITE_U8(msrc + i, value); + } + + flag = 0; + return true; + } + case OP_MEM_CPY_16: { + DECODE_A(instruction) + u32 i, count, mdest, msrc; + + u32 rd = fp + dest; + u32 r1 = fp + src1; + u32 r2 = fp + src2; + + mdest = READ_U32(rd); + msrc = READ_U32(r1); + count = READ_U32(r2); + + if (mdest + count >= mp) { + flag = 1; + return true; + } + + for (i = 0; i < count; i ++) { + u16 value = READ_U16(mdest + i); + WRITE_U16(msrc + i, value); + } + + flag = 0; + return true; + } + case OP_MEM_CPY_32: { + DECODE_A(instruction) + u32 i, count, mdest, msrc; + + u32 rd = fp + dest; + u32 r1 = fp + src1; + u32 r2 = fp + src2; + + mdest = READ_U32(rd); + msrc = READ_U32(r1); + count = READ_U32(r2); + + if (mdest + count >= mp) { + flag = 1; + return true; + } + + for (i = 0; i < count; i ++) { + u32 value = READ_U32(mdest + i); + WRITE_U32(msrc + i, value); + } + + flag = 0; + return true; } case OP_MEM_SET_8: { + DECODE_A(instruction) + u32 i, start, end; + + u32 rd = fp + dest; + u32 r1 = fp + src1; + u32 r2 = fp + src2; + + u8 value = (u8)READ_U32(r1); + u32 count = READ_U32(r2); + + if (r2 == 0) { + flag = 1; + return true; + } + + start = rd; + end = start + count; + + if (start >= mp || r2 > mp || end > mp) { + flag = 1; + return true; + } + + for (i = start; i < end; i ++) { + WRITE_U8(i, value); + } + + flag = 0; + return true; } case OP_MEM_SET_16: { + DECODE_A(instruction) + u32 i, start, end; + + u32 rd = fp + dest; + u32 r1 = fp + src1; + u32 r2 = fp + src2; + + u16 value = (u16)READ_U32(r1); + u32 count = READ_U32(r2); + + if (r2 == 0) { + flag = 1; + return true; + } + + start = rd; + end = start + count; + + if (start >= mp || r2 > mp || end > mp) { + flag = 1; + return true; + } + + for (i = start; i < end; i += 2) { + WRITE_U16(i, value); + } + + flag = 0; + return true; } case OP_MEM_SET_32: { + DECODE_A(instruction) + u32 i, start, end; + + u32 rd = fp + dest; + u32 r1 = fp + src1; + u32 r2 = fp + src2; + + u32 value = READ_U32(r1); + u32 count = READ_U32(r2); + + if (r2 == 0) { + flag = 1; + return true; + } + + start = rd; + end = start + count; + + if (start >= mp || r2 > mp || end > mp) { + flag = 1; + return true; + } + + for (i = start; i < end; i += 4) { + WRITE_U32(i, value); + } + + flag = 0; + return true; } - case OP_REG_MOV: { + case OP_MOV: { + DECODE_A(instruction) + u32 rd = fp + dest; + u32 r1 = fp + src1; + u32 value = READ_U32(r1); + USED(src2); + WRITE_U32(rd, value); + return true; } case OP_ADD_INT: { MATH_OP(i32, +); @@ -127,18 +282,18 @@ bool step_vm() { u32 r1 = fp + src1; u32 r2 = fp + src2; - i32 src1_whole = (i32)mem[r1] >> 16; - i32 src2_whole = (i32)mem[r2] >> 16; + i32 src1_whole = (i32)READ_U32(r1) >> 16; + i32 src2_whole = (i32)READ_U32(r2) >> 16; - i32 src1_decimal = (i32)mem[r1] & 16; - i32 src2_decimal = (i32)mem[r2] & 16; + i32 src1_decimal = (i32)READ_U32(r1) & 16; + i32 src2_decimal = (i32)READ_U32(r2) & 16; i32 result = 0; result += (src1_whole * src2_whole) << 16; result += (src1_whole * src2_decimal); result += (src1_decimal * src2_whole); result += ((src1_decimal * src2_decimal) >> 16) & 16; - mem[rd] = result; + WRITE_U32(rd, result); return true; } case OP_DIV_REAL: { @@ -148,8 +303,8 @@ bool step_vm() { u32 r1 = fp + src1; u32 r2 = fp + src2; - i32 src1_val = (i32)mem[r1]; - i32 src2_val = (i32)mem[r2]; + i32 src1_val = (i32)READ_U32(r1); + i32 src2_val = (i32)READ_U32(r2); u32 src2_reciprocal = 1; src2_reciprocal <<= 31; @@ -157,68 +312,80 @@ bool step_vm() { result = src1_val * src2_reciprocal; result <<= 1; - mem[rd] = result; + WRITE_U32(rd, result); return true; } case OP_INT_TO_REAL: { DECODE_A(instruction) u32 rd = fp + dest; u32 r1 = fp + src1; + i32 result = (i32)READ_U32(r1) << 16; USED(src2); - mem[rd] = (i32)mem[r1] << 16; + WRITE_U32(rd, result); return true; } case OP_INT_TO_NAT: { DECODE_A(instruction) u32 rd = fp + dest; u32 r1 = fp + src1; + u32 result = (u32)READ_U32(r1); USED(src2); - mem[rd] = (u32)mem[r1]; + WRITE_U32(rd, result); return true; } case OP_NAT_TO_REAL: { DECODE_A(instruction) u32 rd = fp + dest; u32 r1 = fp + src1; + i32 result = ((i32)READ_U32(r1) << 16); USED(src2); - mem[rd] = (i32)mem[r1] << 16; + WRITE_U32(rd, result); return true; } case OP_NAT_TO_INT: { DECODE_A(instruction) u32 rd = fp + dest; u32 r1 = fp + src1; + i32 result = ((i32)READ_U32(r1)); USED(src2); - mem[rd] = (i32)mem[r1]; + WRITE_U32(rd, result); return true; } case OP_REAL_TO_INT: { DECODE_A(instruction) u32 rd = fp + dest; u32 r1 = fp + src1; + i32 result = ((i32)READ_U32(r1) >> 16); USED(src2); - mem[rd] = (i32)mem[r1] >> 16; + WRITE_U32(rd, result); return true; } case OP_REAL_TO_NAT: { DECODE_A(instruction) u32 rd = fp + dest; u32 r1 = fp + src1; + u32 result = ((u32)READ_U32(r1) >> 16); USED(src2); - mem[rd] = (u32)mem[r1] >> 16; + WRITE_U32(rd, result); return true; } case OP_BIT_SHIFT_LEFT: { + MATH_OP_NO_CAST(<<); } case OP_BIT_SHIFT_RIGHT: { + MATH_OP_NO_CAST(>>); } case OP_BIT_SHIFT_R_EXT: { + MATH_OP(i32, >>); } case OP_BIT_AND: { + MATH_OP_NO_CAST(&); } case OP_BIT_OR: { + MATH_OP_NO_CAST(|); } case OP_BIT_XOR: { + MATH_OP_NO_CAST(^); } case OP_JMP_IMM: { DECODE_C(instruction) @@ -226,10 +393,49 @@ bool step_vm() { return true; } case OP_JMP_ABS: { + DECODE_A(instruction) + u32 rd = fp + dest; + u32 jmp_dest = READ_U32(rd); + if (jmp_dest > cp) { + flag = 1; + return true; + } + USED(src1); + USED(src2); + + pc = jmp_dest; + return true; } case OP_JMP_OFF: { + DECODE_A(instruction) + u32 rd = fp + dest; + u32 r1 = fp + src1; + + u32 jmp_dest = READ_U32(rd) + READ_U32(r1); + if (jmp_dest > cp) { + flag = 1; + return true; + } + USED(src2); + + pc = jmp_dest; + return true; } case OP_JMP_FLAG: { + DECODE_A(instruction) + u32 mask; + u32 rd = fp + dest; + u32 jmp_dest = READ_U32(rd); + if (jmp_dest > cp) { + flag = 1; + return true; + } + USED(src1); + USED(src2); + + mask = -(u32)(flag == 0); + pc = (jmp_dest & mask) | (pc & ~mask); + return true; } case OP_JEQ_INT: { COMPARE_AND_JUMP(i32, ==); diff --git a/vm/vm.h b/vm/vm.h index 96efd4a..4f09ea4 100644 --- a/vm/vm.h +++ b/vm/vm.h @@ -36,35 +36,19 @@ typedef enum { OP_HALT, /* halt : A : all zeros : halt execution */ - OP_CALL, /* call : A : dest args fn_ptr : creates a new frame */ - OP_RETURN, /* return : A : dest args : returns from a frame to the parent frame */ + OP_CALL, /* call : A : dest args return : creates a new frame */ + OP_RETURN, /* return : A : dest : returns from a frame to the parent frame */ OP_SYSCALL, /* syscall : A : id args mem_ptr : does a system call based on id with args */ OP_LOAD_IMM, /* load_immediate : B : locals[dest] = const as u16 */ OP_LOAD_UPPER_IMM, /* load_upper_immediate : B : locals[dest] = const as u32 << 16 | u16 */ - OP_LOAD_IND_8, /* load_indirect_8 : A : locals[dest] = memory[locals[src1]] as u8 */ - OP_LOAD_IND_16, /* load_indirect_16 : A : locals[dest] = memory[locals[src1]] as u16 */ - OP_LOAD_IND_32, /* load_indirect_32 : A : locals[dest] = memory[locals[src1]] as u32 */ - OP_LOAD_ABS_8, /* load_absolute_8 : A : locals[dest] = memory[src1] as u8 */ - OP_LOAD_ABS_16, /* load_absolute_16 : A : locals[dest] = memory[src1] as u16 */ - OP_LOAD_ABS_32, /* load_absolute_32 : A : locals[dest] = memory[src1] as u32 */ - OP_LOAD_OFF_8, /* load_offset_8 : A : locals[dest] = memory[locals[src1] + offset] as u8 */ - OP_LOAD_OFF_16, /* load_offset_16 : A : locals[dest] = memory[locals[src1] + offset] as u16 */ - OP_LOAD_OFF_32, /* load_offset_32 : A : locals[dest] = memory[locals[src1] + offset] as u32 */ - OP_STORE_ABS_8, /* store_absolute_8 : A : memory[dest] = src1 && 0xFF */ - OP_STORE_ABS_16, /* store_absolute_16 : A : memory[dest] = src1 && 0xFFFF */ - OP_STORE_ABS_32, /* store_absolute_32 : A : memory[dest] = src1 */ - OP_STORE_IND_8, /* store_indirect_8 : A : memory[dest] = locals[src1] && 0xFF */ - OP_STORE_IND_16, /* store_indirect_16 : A : memory[dest] = locals[src1] && 0xFFFF*/ - OP_STORE_IND_32, /* store_indirect_32 : A : memory[dest] = locals[src1] */ - OP_STORE_OFF_8, /* store_offset_8 : A : memory[locals[dest] + offset] = locals[src1] && 0xFF */ - OP_STORE_OFF_16, /* store_offset_16 : A : memory[locals[dest] + offset] = locals[src1] && 0xFFFF */ - OP_STORE_OFF_32, /* store_offset_32 : A : memory[locals[dest] + offset] = locals[src1] */ OP_MEM_ALLOC, /* alloc : A : memory[dest] = [locals[src1] as size + 4] */ - OP_MEM_CPY, /* memcpy : A : memory[src1 .. src1 + count] = memory[dest .. dest + count] */ + OP_MEM_CPY_8, /* memcpy_8 : A : memory[src1 .. src1 + count] = memory[dest .. dest + count] */ + OP_MEM_CPY_16, /* memcpy_16 : A : memory[src1 .. src1 + count] = memory[dest .. dest + count] */ + OP_MEM_CPY_32, /* memcpy_32 : A : memory[src1 .. src1 + count] = memory[dest .. dest + count] */ OP_MEM_SET_8, /* memset_8 : A : memory[dest .. dest + count] = local[src1] as u8 */ OP_MEM_SET_16, /* memset_16 : A : memory[dest .. dest + count] = local[src1] as u16 */ OP_MEM_SET_32, /* memset_32 : A : memory[dest .. dest + count] = local[src1] as u32 */ - OP_REG_MOV, /* register_move : A : locals[dest] = locals[src1] */ + OP_MOV, /* mov : A : locals[dest] = locals[src1] */ OP_ADD_INT, /* add_int : A : locals[dest] = locals[src1] + locals[src2] */ OP_SUB_INT, /* sub_int : A : locals[dest] = locals[src1] - locals[src2] */ OP_MUL_INT, /* mul_int : A : locals[dest] = locals[src1] * locals[src2] */ @@ -114,6 +98,11 @@ typedef enum { OP_MAX_OPCODE /* not an opcode count of instructions */ } Opcode; +typedef enum { + SYSCALL_DBG_PRINT, /* temporary debugging print, use tunnel later */ + SYSCALL_MAX +} Syscall; + extern u32 pc; /* program counter */ extern u32 cp; /* code pointer */ extern u32 mp; /* memory pointer */ @@ -123,6 +112,12 @@ extern u8 interrupt; /* device interrupt */ extern u32 *code; /* code */ extern u8 *mem; /* memory */ +/** + * Frames + * + * + */ + #define READ_U8(addr) (mem[addr]) #define READ_U16(addr) \ @@ -150,12 +145,12 @@ extern u8 *mem; /* memory */ #define WRITE_U32(addr, value) \ do { \ - if ((addr) + 3 < sizeof(mem)) { \ - mem[(addr)] = (value) & 0xFF; \ - mem[(addr) + 1] = ((value) >> 8) & 0xFF; \ - mem[(addr) + 2] = ((value) >> 16) & 0xFF; \ - mem[(addr) + 3] = ((value) >> 24) & 0xFF; \ - } \ + if (addr + 3 < sizeof(mem)) { \ + mem[addr] = (value) & 0xFF; \ + mem[addr + 1] = ((value) >> 8) & 0xFF; \ + mem[addr + 2] = ((value) >> 16) & 0xFF; \ + mem[addr + 3] = ((value) >> 24) & 0xFF; \ + } \ } while (0) #define MATH_OP(type, op) \ @@ -164,7 +159,21 @@ extern u8 *mem; /* memory */ u32 rd = fp + dest; \ u32 r1 = fp + src1; \ u32 r2 = fp + src2; \ - mem[rd] = (type)mem[r1] op(type) mem[r2]; \ + type result = ((type)READ_U32(r1) op (type)READ_U32(r2)); \ + mem[(rd)] = (result) & 0xFF; \ + mem[(rd) + 1] = ((result) >> 8) & 0xFF; \ + mem[(rd) + 2] = ((result) >> 16) & 0xFF; \ + mem[(rd) + 3] = ((result) >> 24) & 0xFF; \ + return true; \ + } while (0) + +#define MATH_OP_NO_CAST(op) \ + do { \ + DECODE_A(instruction) \ + u32 rd = fp + dest; \ + u32 r1 = fp + src1; \ + u32 r2 = fp + src2; \ + WRITE_U32(rd, (READ_U32(r1) op READ_U32(r2))); \ return true; \ } while (0) @@ -178,9 +187,9 @@ extern u8 *mem; /* memory */ u32 rd = fp + dest; \ u32 r1 = fp + src1; \ u32 r2 = fp + src2; \ - target = mem[rd]; \ - value = (type)mem[r1]; \ - value2 = (type)mem[r2]; \ + target = READ_U32(rd); \ + value = (type)READ_U32(r1); \ + value2 = (type)READ_U32(r2); \ cond = !!(value op value2); \ mask = -(u32)cond; \ pc = (target & mask) | (pc & ~mask); \