varaq-interpreter-go/varaq/scanner.go

437 lines
9.7 KiB
Go
Raw Permalink Normal View History

2022-09-03 23:15:35 -04:00
package varaq
import (
"bufio"
"fmt"
"io/ioutil"
"strconv"
"strings"
)
var keywords = map[string]TokenType{
"false": FALSE,
"ghobe'": FALSE,
2022-09-03 23:15:35 -04:00
"true": TRUE,
"HIja'": TRUE,
2022-09-03 23:15:35 -04:00
"pi": PI,
"e": E,
"pop": POP,
"dup": DUP,
"exch": EXCH,
"clear": CLEAR,
"remember": REMEMBER,
"forget": FORGET,
"dump": DUMP,
"name": NAME,
"set": SET,
"ifyes": IFYES,
"ifno": IFNO,
"choose": CHOOSE,
"eval": EVAL,
"escape": ESCAPE,
"repeat": REPEAT,
"split": SPLIT,
"cons": CONS,
"shatter": SHATTER,
"empty?": EMPTY,
"compose": COMPOSE,
"streq?": STREQ,
"strcut": STRCUT,
"strmeasure": STRMEASURE,
"strtie": STRTIE,
"explode": EXPLODE,
"add": ADD,
"sub": SUB,
"mul": MUL,
"div": DIV,
"idiv": IDIV,
"mod": MOD,
"pow": POW,
"sqrt": SQRT,
"add1": ADD1,
"sub1": SUB1,
"sin": SIN,
"cos": COS,
"tan": TAN,
"atan": ATAN,
"ln": LN,
"log": LOG,
"log3": LOG3,
"clip": CLIP,
"smooth": SMOOTH,
"howmuch": HOWMUCH,
"setrand": SETRAND,
"rand": RAND,
"numberize": NUMBERIZE,
"isolate": ISOLATE,
"mix": MIX,
"contradict": CONTRADICT,
"compl": COMPL,
"shiftright": SHIFTRIGHT,
"shiftleft": SHIFTLEFT,
"gt?": GT,
"lt?": LT,
"eq?": EQ,
"ge?": GE,
"le?": LE,
"ne?": NE,
"null": NULL,
"null?": ISNULL,
"int?": ISINT,
"number?": ISNUMBER,
"negative?": NEGATIVE,
"and": AND,
"or": OR,
"xor": XOR,
"disp": DISP,
"listen": LISTEN,
"complain": COMPLAIN,
"time": TIME,
"gc": GARBAGECOLLECT,
"newline": NEWLINE,
"tab": TAB,
"whereami": WHEREAMI,
"version": VERSION,
"argv": ARGV,
"chu'tut": TAB,
"chu'DonwI'": NEWLINE,
"bep": COMPLAIN,
"chImmoH": CLEAR,
"chIm'a'": EMPTY,
"cher": SET,
"boq": ADD,
"chov": EVAL,
"chuv": MOD,
"cha'": DISP,
"DuD": MIX,
"ghap": XOR,
"ghurmI'": E,
"ghurtaH": LN,
"ghorqu'": SHATTER,
"ghobe'chugh": IFNO,
"HIja'chugh": IFYES,
"Hotlh": DUMP,
"HeHmI'": PI,
"Habwav": IDIV,
"je": AND,
"jor": EXPLODE,
"joq": OR,
"loS'ar": SQRT,
"latlh": DUP,
"boq'egh": MUL,
"boqHa'qa'": POW,
"law''a'": GT,
"law'rap'a'": GE,
"maHghurtaH": LOG,
"mI'moH": NUMBERIZE,
2022-09-03 23:15:35 -04:00
"muv": CONS,
"mobmoH": ISOLATE,
"mIScher": SETRAND,
"mIS": RAND,
"nIHghoS": SHIFTRIGHT,
"nargh": ESCAPE,
"naQmoH": COMPOSE,
"pagh'a'": NULL,
"pong": NAME,
"poSghoS": SHIFTLEFT,
"puS'a'": LT,
"puSrap'a'": LE,
"qaw": REMEMBER,
"qawHa'": FORGET,
"qojmI'": TAN,
"qojHa'": ATAN,
"Qo'moH": COMPL,
"rap'a'": EQ,
"rapbe'a'": NE,
"tlheghrar": STRTIE,
"poD": CLIP,
"Hab": SMOOTH,
"'ar": HOWMUCH,
"SIj": SPLIT,
"boqHa'": SUB,
"tam": EXCH,
"taH'a'": NEGATIVE,
"tlhoch": CONTRADICT,
"tlheghpe'": STRCUT,
"tlheghjuv": STRMEASURE,
"tlheghrap'a'": STREQ,
"vangqa'": REPEAT,
"wIv": CHOOSE,
"woD": POP,
"boqHa''egh": DIV,
"wa'boqHa'": SUB1,
"wa'boq": ADD1,
"wejghurtaH": LOG3,
"'Ij": LISTEN,
"poH": TIME,
// Wrong word in original spec, old one meant "waving hands or flapping"
// Also fixes the conflicting joq issue meaning sin or 'or'
"yu'eghHa'": COS,
"yu'egh": SIN,
// This one has a special case too as it is the same as the '~' operator
"lI'moH": TILDE,
"woDHa'": GARBAGECOLLECT,
"taghDe'": ARGV,
"pongmI'": VERSION,
"nuqDaq_jIH": WHEREAMI,
// All the ones after this are a part of the var'aq superset
2023-03-18 22:26:32 -04:00
"rot": ROT,
"jIr": ROT,
"over": OVER,
"QI": OVER,
"depth": DEPTH,
"juv": DEPTH,
"servehttp": SERVEHTTP,
2023-03-22 22:26:58 -04:00
"readall": READALL,
2022-09-03 23:15:35 -04:00
}
func isDigit(s string) bool {
return (s >= "0" && s <= "9") || s == "-"
}
func isAlpha(s string) bool {
return (s >= "a" && s <= "z") ||
(s >= "A" && s <= "Z") ||
s == "_" || s == "'" || s == "?"
}
func isSpecialChar(s string) bool {
return s == "(" || s == ")" || s == "{" || s == "}"
}
func isAlphaNumeric(s string) bool {
return isAlpha(s) || isDigit(s)
}
func isEOF(scanner *bufio.Scanner) bool {
return scanner.Text() == ""
}
func peek(scanner *bufio.Scanner) string {
return scanner.Text()
}
func next(scanner *bufio.Scanner) string {
scanner.Scan()
return scanner.Text()
}
func match(scanner *bufio.Scanner, expected string) bool {
if isEOF(scanner) {
return false
}
if scanner.Text() != expected {
return false
}
return true
}
func str(scanner *bufio.Scanner, tokens []Token, line_no int) ([]Token, int, error) {
value := ""
for {
char := next(scanner)
if char == "\"" || isEOF(scanner) {
break
}
if char == "\n" {
line_no++
}
value += char
}
if isEOF(scanner) {
return tokens, line_no, fmt.Errorf("unterminated string")
}
return append(tokens, Token{STRING, value, value, line_no}), line_no, nil
}
func num(scanner *bufio.Scanner, tokens []Token, line_no int) ([]Token, int, error) {
value := peek(scanner)
no_advance := false
for {
digit := next(scanner)
if !isDigit(digit) {
break
}
value += digit
}
if peek(scanner) == "." && isDigit(next(scanner)) {
value += "."
for {
digit := peek(scanner)
if !isDigit(digit) {
break
}
value += digit
next(scanner)
}
} else {
no_advance = true
}
num, err := strconv.ParseFloat(value, 64)
if err != nil {
return tokens, line_no, fmt.Errorf("string to rational error")
}
if no_advance {
tokens = append(tokens, Token{NUMBER, value, num, line_no})
return scanToken(scanner, tokens, line_no, true)
}
return append(tokens, Token{NUMBER, value, num, line_no}), line_no, nil
}
func identifier(scanner *bufio.Scanner, tokens []Token, line_no int) ([]Token, int, error) {
value := peek(scanner)
no_advance := false
for {
chr := next(scanner)
if !isAlphaNumeric(chr) {
if isSpecialChar(chr) {
no_advance = true
}
break
}
value += chr
}
typ, prs := keywords[value]
if prs {
if no_advance { // TODO: ugly but golang wont let me do what I want
tokens = append(tokens, Token{typ, value, value, line_no})
return scanToken(scanner, tokens, line_no, true)
}
return append(tokens, Token{typ, value, value, line_no}), line_no, nil
}
if no_advance {
tokens = append(tokens, Token{IDENTIFIER, value, value, line_no})
return scanToken(scanner, tokens, line_no, true)
}
return append(tokens, Token{IDENTIFIER, value, value, line_no}), line_no, nil
}
func include(scanner *bufio.Scanner, tokens []Token, line_no int) ([]Token, int, error) {
value := ""
for {
char := next(scanner)
if char == "\n" || isEOF(scanner) {
break
}
if char != " " && char != "\r" && char != "\t" {
value += char
}
}
if isEOF(scanner) {
return tokens, line_no, fmt.Errorf("unterminated string")
}
code, err := ioutil.ReadFile(value) // the file is inside the local directory
if err != nil {
return nil, 0, err
}
toks, err := Tokenize(string(code))
if err != nil {
return nil, 0, err
}
// need to remove EOF from script import
n := len(toks) - 1
toks = toks[:n]
return append(tokens, toks...), line_no, nil
}
func scanToken(scanner *bufio.Scanner, tokens []Token, line_no int, no_advance bool) ([]Token, int, error) {
var c string
if no_advance {
c = peek(scanner)
} else {
c = next(scanner)
}
switch c {
case "(":
if next(scanner) == "*" {
for {
if next(scanner) == "*" {
if next(scanner) != ")" {
return tokens, line_no, fmt.Errorf("did not close comment at line number %d", line_no)
}
break
}
}
} else {
tokens = append(tokens, Token{LEFTPAREN, c, nil, line_no})
return scanToken(scanner, tokens, line_no, true)
}
case ")":
return append(tokens, Token{RIGHTPAREN, c, nil, line_no}), line_no, nil
case "{":
return append(tokens, Token{LEFTBRACE, c, nil, line_no}), line_no, nil
case "}":
return append(tokens, Token{RIGHTBRACE, c, nil, line_no}), line_no, nil
case "~":
return append(tokens, Token{TILDE, c, nil, line_no}), line_no, nil
case "/":
if match(scanner, "/") { // TODO: make this an import token
next(scanner)
return include(scanner, tokens, line_no)
} else {
return append(tokens, Token{SLASH, c, nil, line_no}), line_no, nil
}
case "*":
return append(tokens, Token{STAR, c, nil, line_no}), line_no, nil
case " ":
case "\r":
case "\t":
// Ignore whitespace.
break
case "\n":
line_no++
case "\"":
return str(scanner, tokens, line_no)
default:
if isDigit(peek(scanner)) {
return num(scanner, tokens, line_no)
} else if isAlpha(peek(scanner)) {
return identifier(scanner, tokens, line_no)
} else {
return tokens, line_no, fmt.Errorf("unixpected character %d", line_no)
}
}
return tokens, line_no, nil
}
func Tokenize(source string) ([]Token, error) {
var err error
line_no := 1
tokens := make([]Token, 0)
scanner := bufio.NewScanner(strings.NewReader(source))
scanner.Split(bufio.ScanBytes)
for {
tokens, line_no, err = scanToken(scanner, tokens, line_no, false)
if isEOF(scanner) {
break
}
if err != nil {
return tokens, err
}
}
tokens = append(tokens, Token{EOF, "", nil, line_no})
return tokens, nil
}