package varaq import ( "bufio" "fmt" "io/ioutil" "strconv" "strings" ) var keywords = map[string]TokenType{ "false": FALSE, "ghobe'": FALSE, "true": TRUE, "HIja'": TRUE, "pi": PI, "e": E, "pop": POP, "dup": DUP, "exch": EXCH, "clear": CLEAR, "remember": REMEMBER, "forget": FORGET, "dump": DUMP, "name": NAME, "set": SET, "ifyes": IFYES, "ifno": IFNO, "choose": CHOOSE, "eval": EVAL, "escape": ESCAPE, "repeat": REPEAT, "split": SPLIT, "cons": CONS, "shatter": SHATTER, "empty?": EMPTY, "compose": COMPOSE, "streq?": STREQ, "strcut": STRCUT, "strmeasure": STRMEASURE, "strtie": STRTIE, "explode": EXPLODE, "add": ADD, "sub": SUB, "mul": MUL, "div": DIV, "idiv": IDIV, "mod": MOD, "pow": POW, "sqrt": SQRT, "add1": ADD1, "sub1": SUB1, "sin": SIN, "cos": COS, "tan": TAN, "atan": ATAN, "ln": LN, "log": LOG, "log3": LOG3, "clip": CLIP, "smooth": SMOOTH, "howmuch": HOWMUCH, "setrand": SETRAND, "rand": RAND, "numberize": NUMBERIZE, "isolate": ISOLATE, "mix": MIX, "contradict": CONTRADICT, "compl": COMPL, "shiftright": SHIFTRIGHT, "shiftleft": SHIFTLEFT, "gt?": GT, "lt?": LT, "eq?": EQ, "ge?": GE, "le?": LE, "ne?": NE, "null": NULL, "null?": ISNULL, "int?": ISINT, "number?": ISNUMBER, "negative?": NEGATIVE, "and": AND, "or": OR, "xor": XOR, "disp": DISP, "listen": LISTEN, "complain": COMPLAIN, "time": TIME, "gc": GARBAGECOLLECT, "newline": NEWLINE, "tab": TAB, "whereami": WHEREAMI, "version": VERSION, "argv": ARGV, "chu'tut": TAB, "chu'DonwI'": NEWLINE, "bep": COMPLAIN, "chImmoH": CLEAR, "chIm'a'": EMPTY, "cher": SET, "boq": ADD, "chov": EVAL, "chuv": MOD, "cha'": DISP, "DuD": MIX, "ghap": XOR, "ghurmI'": E, "ghurtaH": LN, "ghorqu'": SHATTER, "ghobe'chugh": IFNO, "HIja'chugh": IFYES, "Hotlh": DUMP, "HeHmI'": PI, "Habwav": IDIV, "je": AND, "jor": EXPLODE, "joq": OR, "loS'ar": SQRT, "latlh": DUP, "boq'egh": MUL, "boqHa'qa'": POW, "law''a'": GT, "law'rap'a'": GE, "maHghurtaH": LOG, "mI'moH": NUMBERIZE, "muv": CONS, "mobmoH": ISOLATE, "mIScher": SETRAND, "mIS": RAND, "nIHghoS": SHIFTRIGHT, "nargh": ESCAPE, "naQmoH": COMPOSE, "pagh'a'": NULL, "pong": NAME, "poSghoS": SHIFTLEFT, "puS'a'": LT, "puSrap'a'": LE, "qaw": REMEMBER, "qawHa'": FORGET, "qojmI'": TAN, "qojHa'": ATAN, "Qo'moH": COMPL, "rap'a'": EQ, "rapbe'a'": NE, "tlheghrar": STRTIE, "poD": CLIP, "Hab": SMOOTH, "'ar": HOWMUCH, "SIj": SPLIT, "boqHa'": SUB, "tam": EXCH, "taH'a'": NEGATIVE, "tlhoch": CONTRADICT, "tlheghpe'": STRCUT, "tlheghjuv": STRMEASURE, "tlheghrap'a'": STREQ, "vangqa'": REPEAT, "wIv": CHOOSE, "woD": POP, "boqHa''egh": DIV, "wa'boqHa'": SUB1, "wa'boq": ADD1, "wejghurtaH": LOG3, "'Ij": LISTEN, "poH": TIME, // Wrong word in original spec, old one meant "waving hands or flapping" // Also fixes the conflicting joq issue meaning sin or 'or' "yu'eghHa'": COS, "yu'egh": SIN, // This one has a special case too as it is the same as the '~' operator "lI'moH": TILDE, "woDHa'": GARBAGECOLLECT, "taghDe'": ARGV, "pongmI'": VERSION, "nuqDaq_jIH": WHEREAMI, // All the ones after this are a part of the var'aq superset "rot": ROT, "jIr": ROT, "over": OVER, "QI": OVER, "depth": DEPTH, "juv": DEPTH, } func isDigit(s string) bool { return (s >= "0" && s <= "9") || s == "-" } func isAlpha(s string) bool { return (s >= "a" && s <= "z") || (s >= "A" && s <= "Z") || s == "_" || s == "'" || s == "?" } func isSpecialChar(s string) bool { return s == "(" || s == ")" || s == "{" || s == "}" } func isAlphaNumeric(s string) bool { return isAlpha(s) || isDigit(s) } func isEOF(scanner *bufio.Scanner) bool { return scanner.Text() == "" } func peek(scanner *bufio.Scanner) string { return scanner.Text() } func next(scanner *bufio.Scanner) string { scanner.Scan() return scanner.Text() } func match(scanner *bufio.Scanner, expected string) bool { if isEOF(scanner) { return false } if scanner.Text() != expected { return false } return true } func str(scanner *bufio.Scanner, tokens []Token, line_no int) ([]Token, int, error) { value := "" for { char := next(scanner) if char == "\"" || isEOF(scanner) { break } if char == "\n" { line_no++ } value += char } if isEOF(scanner) { return tokens, line_no, fmt.Errorf("unterminated string") } return append(tokens, Token{STRING, value, value, line_no}), line_no, nil } func num(scanner *bufio.Scanner, tokens []Token, line_no int) ([]Token, int, error) { value := peek(scanner) no_advance := false for { digit := next(scanner) if !isDigit(digit) { break } value += digit } if peek(scanner) == "." && isDigit(next(scanner)) { value += "." for { digit := peek(scanner) if !isDigit(digit) { break } value += digit next(scanner) } } else { no_advance = true } num, err := strconv.ParseFloat(value, 64) if err != nil { return tokens, line_no, fmt.Errorf("string to rational error") } if no_advance { tokens = append(tokens, Token{NUMBER, value, num, line_no}) return scanToken(scanner, tokens, line_no, true) } return append(tokens, Token{NUMBER, value, num, line_no}), line_no, nil } func identifier(scanner *bufio.Scanner, tokens []Token, line_no int) ([]Token, int, error) { value := peek(scanner) no_advance := false for { chr := next(scanner) if !isAlphaNumeric(chr) { if isSpecialChar(chr) { no_advance = true } break } value += chr } typ, prs := keywords[value] if prs { if no_advance { // TODO: ugly but golang wont let me do what I want tokens = append(tokens, Token{typ, value, value, line_no}) return scanToken(scanner, tokens, line_no, true) } return append(tokens, Token{typ, value, value, line_no}), line_no, nil } if no_advance { tokens = append(tokens, Token{IDENTIFIER, value, value, line_no}) return scanToken(scanner, tokens, line_no, true) } return append(tokens, Token{IDENTIFIER, value, value, line_no}), line_no, nil } func include(scanner *bufio.Scanner, tokens []Token, line_no int) ([]Token, int, error) { value := "" for { char := next(scanner) if char == "\n" || isEOF(scanner) { break } if char != " " && char != "\r" && char != "\t" { value += char } } if isEOF(scanner) { return tokens, line_no, fmt.Errorf("unterminated string") } code, err := ioutil.ReadFile(value) // the file is inside the local directory if err != nil { return nil, 0, err } toks, err := Tokenize(string(code)) if err != nil { return nil, 0, err } // need to remove EOF from script import n := len(toks) - 1 toks = toks[:n] return append(tokens, toks...), line_no, nil } func scanToken(scanner *bufio.Scanner, tokens []Token, line_no int, no_advance bool) ([]Token, int, error) { var c string if no_advance { c = peek(scanner) } else { c = next(scanner) } switch c { case "(": if next(scanner) == "*" { for { if next(scanner) == "*" { if next(scanner) != ")" { return tokens, line_no, fmt.Errorf("did not close comment at line number %d", line_no) } break } } } else { tokens = append(tokens, Token{LEFTPAREN, c, nil, line_no}) return scanToken(scanner, tokens, line_no, true) } case ")": return append(tokens, Token{RIGHTPAREN, c, nil, line_no}), line_no, nil case "{": return append(tokens, Token{LEFTBRACE, c, nil, line_no}), line_no, nil case "}": return append(tokens, Token{RIGHTBRACE, c, nil, line_no}), line_no, nil case "~": return append(tokens, Token{TILDE, c, nil, line_no}), line_no, nil case "/": if match(scanner, "/") { // TODO: make this an import token next(scanner) return include(scanner, tokens, line_no) } else { return append(tokens, Token{SLASH, c, nil, line_no}), line_no, nil } case "*": return append(tokens, Token{STAR, c, nil, line_no}), line_no, nil case " ": case "\r": case "\t": // Ignore whitespace. break case "\n": line_no++ case "\"": return str(scanner, tokens, line_no) default: if isDigit(peek(scanner)) { return num(scanner, tokens, line_no) } else if isAlpha(peek(scanner)) { return identifier(scanner, tokens, line_no) } else { return tokens, line_no, fmt.Errorf("unixpected character %d", line_no) } } return tokens, line_no, nil } func Tokenize(source string) ([]Token, error) { var err error line_no := 1 tokens := make([]Token, 0) scanner := bufio.NewScanner(strings.NewReader(source)) scanner.Split(bufio.ScanBytes) for { tokens, line_no, err = scanToken(scanner, tokens, line_no, false) if isEOF(scanner) { break } if err != nil { return tokens, err } } tokens = append(tokens, Token{EOF, "", nil, line_no}) return tokens, nil }