437 lines
9.7 KiB
Go
437 lines
9.7 KiB
Go
package varaq
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
var keywords = map[string]TokenType{
|
|
"false": FALSE,
|
|
"ghobe'": FALSE,
|
|
"true": TRUE,
|
|
"HIja'": TRUE,
|
|
"pi": PI,
|
|
"e": E,
|
|
"pop": POP,
|
|
"dup": DUP,
|
|
"exch": EXCH,
|
|
"clear": CLEAR,
|
|
"remember": REMEMBER,
|
|
"forget": FORGET,
|
|
"dump": DUMP,
|
|
"name": NAME,
|
|
"set": SET,
|
|
"ifyes": IFYES,
|
|
"ifno": IFNO,
|
|
"choose": CHOOSE,
|
|
"eval": EVAL,
|
|
"escape": ESCAPE,
|
|
"repeat": REPEAT,
|
|
"split": SPLIT,
|
|
"cons": CONS,
|
|
"shatter": SHATTER,
|
|
"empty?": EMPTY,
|
|
"compose": COMPOSE,
|
|
"streq?": STREQ,
|
|
"strcut": STRCUT,
|
|
"strmeasure": STRMEASURE,
|
|
"strtie": STRTIE,
|
|
"explode": EXPLODE,
|
|
"add": ADD,
|
|
"sub": SUB,
|
|
"mul": MUL,
|
|
"div": DIV,
|
|
"idiv": IDIV,
|
|
"mod": MOD,
|
|
"pow": POW,
|
|
"sqrt": SQRT,
|
|
"add1": ADD1,
|
|
"sub1": SUB1,
|
|
"sin": SIN,
|
|
"cos": COS,
|
|
"tan": TAN,
|
|
"atan": ATAN,
|
|
"ln": LN,
|
|
"log": LOG,
|
|
"log3": LOG3,
|
|
"clip": CLIP,
|
|
"smooth": SMOOTH,
|
|
"howmuch": HOWMUCH,
|
|
"setrand": SETRAND,
|
|
"rand": RAND,
|
|
"numberize": NUMBERIZE,
|
|
"isolate": ISOLATE,
|
|
"mix": MIX,
|
|
"contradict": CONTRADICT,
|
|
"compl": COMPL,
|
|
"shiftright": SHIFTRIGHT,
|
|
"shiftleft": SHIFTLEFT,
|
|
"gt?": GT,
|
|
"lt?": LT,
|
|
"eq?": EQ,
|
|
"ge?": GE,
|
|
"le?": LE,
|
|
"ne?": NE,
|
|
"null": NULL,
|
|
"null?": ISNULL,
|
|
"int?": ISINT,
|
|
"number?": ISNUMBER,
|
|
"negative?": NEGATIVE,
|
|
"and": AND,
|
|
"or": OR,
|
|
"xor": XOR,
|
|
"disp": DISP,
|
|
"listen": LISTEN,
|
|
"complain": COMPLAIN,
|
|
"time": TIME,
|
|
"gc": GARBAGECOLLECT,
|
|
"newline": NEWLINE,
|
|
"tab": TAB,
|
|
"whereami": WHEREAMI,
|
|
"version": VERSION,
|
|
"argv": ARGV,
|
|
"chu'tut": TAB,
|
|
"chu'DonwI'": NEWLINE,
|
|
"bep": COMPLAIN,
|
|
"chImmoH": CLEAR,
|
|
"chIm'a'": EMPTY,
|
|
"cher": SET,
|
|
"boq": ADD,
|
|
"chov": EVAL,
|
|
"chuv": MOD,
|
|
"cha'": DISP,
|
|
"DuD": MIX,
|
|
"ghap": XOR,
|
|
"ghurmI'": E,
|
|
"ghurtaH": LN,
|
|
"ghorqu'": SHATTER,
|
|
"ghobe'chugh": IFNO,
|
|
"HIja'chugh": IFYES,
|
|
"Hotlh": DUMP,
|
|
"HeHmI'": PI,
|
|
"Habwav": IDIV,
|
|
"je": AND,
|
|
"jor": EXPLODE,
|
|
"joq": OR,
|
|
"loS'ar": SQRT,
|
|
"latlh": DUP,
|
|
"boq'egh": MUL,
|
|
"boqHa'qa'": POW,
|
|
"law''a'": GT,
|
|
"law'rap'a'": GE,
|
|
"maHghurtaH": LOG,
|
|
"mI'moH": NUMBERIZE,
|
|
"muv": CONS,
|
|
"mobmoH": ISOLATE,
|
|
"mIScher": SETRAND,
|
|
"mIS": RAND,
|
|
"nIHghoS": SHIFTRIGHT,
|
|
"nargh": ESCAPE,
|
|
"naQmoH": COMPOSE,
|
|
"pagh'a'": NULL,
|
|
"pong": NAME,
|
|
"poSghoS": SHIFTLEFT,
|
|
"puS'a'": LT,
|
|
"puSrap'a'": LE,
|
|
"qaw": REMEMBER,
|
|
"qawHa'": FORGET,
|
|
"qojmI'": TAN,
|
|
"qojHa'": ATAN,
|
|
"Qo'moH": COMPL,
|
|
"rap'a'": EQ,
|
|
"rapbe'a'": NE,
|
|
"tlheghrar": STRTIE,
|
|
"poD": CLIP,
|
|
"Hab": SMOOTH,
|
|
"'ar": HOWMUCH,
|
|
"SIj": SPLIT,
|
|
"boqHa'": SUB,
|
|
"tam": EXCH,
|
|
"taH'a'": NEGATIVE,
|
|
"tlhoch": CONTRADICT,
|
|
"tlheghpe'": STRCUT,
|
|
"tlheghjuv": STRMEASURE,
|
|
"tlheghrap'a'": STREQ,
|
|
"vangqa'": REPEAT,
|
|
"wIv": CHOOSE,
|
|
"woD": POP,
|
|
"boqHa''egh": DIV,
|
|
"wa'boqHa'": SUB1,
|
|
"wa'boq": ADD1,
|
|
"wejghurtaH": LOG3,
|
|
"'Ij": LISTEN,
|
|
"poH": TIME,
|
|
// Wrong word in original spec, old one meant "waving hands or flapping"
|
|
// Also fixes the conflicting joq issue meaning sin or 'or'
|
|
"yu'eghHa'": COS,
|
|
"yu'egh": SIN,
|
|
// This one has a special case too as it is the same as the '~' operator
|
|
"lI'moH": TILDE,
|
|
"woDHa'": GARBAGECOLLECT,
|
|
"taghDe'": ARGV,
|
|
"pongmI'": VERSION,
|
|
"nuqDaq_jIH": WHEREAMI,
|
|
// All the ones after this are a part of the var'aq superset
|
|
"rot": ROT,
|
|
"jIr": ROT,
|
|
"over": OVER,
|
|
"QI": OVER,
|
|
"depth": DEPTH,
|
|
"juv": DEPTH,
|
|
"servehttp": SERVEHTTP,
|
|
"readall": READALL,
|
|
}
|
|
|
|
func isDigit(s string) bool {
|
|
return (s >= "0" && s <= "9") || s == "-"
|
|
}
|
|
|
|
func isAlpha(s string) bool {
|
|
return (s >= "a" && s <= "z") ||
|
|
(s >= "A" && s <= "Z") ||
|
|
s == "_" || s == "'" || s == "?"
|
|
}
|
|
|
|
func isSpecialChar(s string) bool {
|
|
return s == "(" || s == ")" || s == "{" || s == "}"
|
|
}
|
|
|
|
func isAlphaNumeric(s string) bool {
|
|
return isAlpha(s) || isDigit(s)
|
|
}
|
|
|
|
func isEOF(scanner *bufio.Scanner) bool {
|
|
return scanner.Text() == ""
|
|
}
|
|
|
|
func peek(scanner *bufio.Scanner) string {
|
|
return scanner.Text()
|
|
}
|
|
|
|
func next(scanner *bufio.Scanner) string {
|
|
scanner.Scan()
|
|
return scanner.Text()
|
|
}
|
|
|
|
func match(scanner *bufio.Scanner, expected string) bool {
|
|
if isEOF(scanner) {
|
|
return false
|
|
}
|
|
if scanner.Text() != expected {
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
func str(scanner *bufio.Scanner, tokens []Token, line_no int) ([]Token, int, error) {
|
|
value := ""
|
|
for {
|
|
char := next(scanner)
|
|
if char == "\"" || isEOF(scanner) {
|
|
break
|
|
}
|
|
if char == "\n" {
|
|
line_no++
|
|
}
|
|
value += char
|
|
}
|
|
|
|
if isEOF(scanner) {
|
|
return tokens, line_no, fmt.Errorf("unterminated string")
|
|
}
|
|
|
|
return append(tokens, Token{STRING, value, value, line_no}), line_no, nil
|
|
}
|
|
|
|
func num(scanner *bufio.Scanner, tokens []Token, line_no int) ([]Token, int, error) {
|
|
value := peek(scanner)
|
|
no_advance := false
|
|
|
|
for {
|
|
digit := next(scanner)
|
|
if !isDigit(digit) {
|
|
break
|
|
}
|
|
value += digit
|
|
}
|
|
|
|
if peek(scanner) == "." && isDigit(next(scanner)) {
|
|
value += "."
|
|
for {
|
|
digit := peek(scanner)
|
|
if !isDigit(digit) {
|
|
break
|
|
}
|
|
value += digit
|
|
next(scanner)
|
|
}
|
|
} else {
|
|
no_advance = true
|
|
}
|
|
|
|
num, err := strconv.ParseFloat(value, 64)
|
|
if err != nil {
|
|
return tokens, line_no, fmt.Errorf("string to rational error")
|
|
}
|
|
|
|
if no_advance {
|
|
tokens = append(tokens, Token{NUMBER, value, num, line_no})
|
|
return scanToken(scanner, tokens, line_no, true)
|
|
}
|
|
|
|
return append(tokens, Token{NUMBER, value, num, line_no}), line_no, nil
|
|
|
|
}
|
|
|
|
func identifier(scanner *bufio.Scanner, tokens []Token, line_no int) ([]Token, int, error) {
|
|
value := peek(scanner)
|
|
no_advance := false
|
|
for {
|
|
chr := next(scanner)
|
|
if !isAlphaNumeric(chr) {
|
|
if isSpecialChar(chr) {
|
|
no_advance = true
|
|
}
|
|
break
|
|
}
|
|
|
|
value += chr
|
|
}
|
|
|
|
typ, prs := keywords[value]
|
|
if prs {
|
|
if no_advance { // TODO: ugly but golang wont let me do what I want
|
|
tokens = append(tokens, Token{typ, value, value, line_no})
|
|
return scanToken(scanner, tokens, line_no, true)
|
|
}
|
|
return append(tokens, Token{typ, value, value, line_no}), line_no, nil
|
|
}
|
|
|
|
if no_advance {
|
|
tokens = append(tokens, Token{IDENTIFIER, value, value, line_no})
|
|
return scanToken(scanner, tokens, line_no, true)
|
|
}
|
|
return append(tokens, Token{IDENTIFIER, value, value, line_no}), line_no, nil
|
|
}
|
|
|
|
func include(scanner *bufio.Scanner, tokens []Token, line_no int) ([]Token, int, error) {
|
|
value := ""
|
|
for {
|
|
char := next(scanner)
|
|
if char == "\n" || isEOF(scanner) {
|
|
break
|
|
}
|
|
if char != " " && char != "\r" && char != "\t" {
|
|
value += char
|
|
}
|
|
}
|
|
|
|
if isEOF(scanner) {
|
|
return tokens, line_no, fmt.Errorf("unterminated string")
|
|
}
|
|
|
|
code, err := ioutil.ReadFile(value) // the file is inside the local directory
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
|
|
toks, err := Tokenize(string(code))
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
|
|
// need to remove EOF from script import
|
|
n := len(toks) - 1
|
|
toks = toks[:n]
|
|
|
|
return append(tokens, toks...), line_no, nil
|
|
}
|
|
|
|
func scanToken(scanner *bufio.Scanner, tokens []Token, line_no int, no_advance bool) ([]Token, int, error) {
|
|
var c string
|
|
if no_advance {
|
|
c = peek(scanner)
|
|
} else {
|
|
c = next(scanner)
|
|
}
|
|
switch c {
|
|
case "(":
|
|
if next(scanner) == "*" {
|
|
for {
|
|
if next(scanner) == "*" {
|
|
if next(scanner) != ")" {
|
|
return tokens, line_no, fmt.Errorf("did not close comment at line number %d", line_no)
|
|
}
|
|
break
|
|
}
|
|
}
|
|
} else {
|
|
tokens = append(tokens, Token{LEFTPAREN, c, nil, line_no})
|
|
return scanToken(scanner, tokens, line_no, true)
|
|
}
|
|
case ")":
|
|
return append(tokens, Token{RIGHTPAREN, c, nil, line_no}), line_no, nil
|
|
case "{":
|
|
return append(tokens, Token{LEFTBRACE, c, nil, line_no}), line_no, nil
|
|
case "}":
|
|
return append(tokens, Token{RIGHTBRACE, c, nil, line_no}), line_no, nil
|
|
case "~":
|
|
return append(tokens, Token{TILDE, c, nil, line_no}), line_no, nil
|
|
case "/":
|
|
if match(scanner, "/") { // TODO: make this an import token
|
|
next(scanner)
|
|
return include(scanner, tokens, line_no)
|
|
} else {
|
|
return append(tokens, Token{SLASH, c, nil, line_no}), line_no, nil
|
|
}
|
|
case "*":
|
|
return append(tokens, Token{STAR, c, nil, line_no}), line_no, nil
|
|
case " ":
|
|
case "\r":
|
|
case "\t":
|
|
// Ignore whitespace.
|
|
break
|
|
case "\n":
|
|
line_no++
|
|
case "\"":
|
|
return str(scanner, tokens, line_no)
|
|
default:
|
|
if isDigit(peek(scanner)) {
|
|
return num(scanner, tokens, line_no)
|
|
} else if isAlpha(peek(scanner)) {
|
|
return identifier(scanner, tokens, line_no)
|
|
} else {
|
|
return tokens, line_no, fmt.Errorf("unixpected character %d", line_no)
|
|
}
|
|
}
|
|
|
|
return tokens, line_no, nil
|
|
}
|
|
|
|
func Tokenize(source string) ([]Token, error) {
|
|
var err error
|
|
|
|
line_no := 1
|
|
tokens := make([]Token, 0)
|
|
|
|
scanner := bufio.NewScanner(strings.NewReader(source))
|
|
scanner.Split(bufio.ScanBytes)
|
|
|
|
for {
|
|
tokens, line_no, err = scanToken(scanner, tokens, line_no, false)
|
|
if isEOF(scanner) {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return tokens, err
|
|
}
|
|
}
|
|
|
|
tokens = append(tokens, Token{EOF, "", nil, line_no})
|
|
return tokens, nil
|
|
}
|