diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..6a844b4 --- /dev/null +++ b/.clang-format @@ -0,0 +1 @@ +BasedOnStyle: GNU \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..221eaeb --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.ccls-cache/ +*.mod +*.wasm +varaq \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a277148 --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +CC = gcc +CCFLAGS= -g + +all: varaq + +varaq: + $(CC) $(CCFLAGS) -o varaq main.c compiler.c tokenizer.c + +clean: + rm -f *.o varaq diff --git a/README.md b/README.md index ab6584a..48ff01f 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # varaq-wasm-c -Playing around with wasm. - -Creating a compiler in C for var'aq \ No newline at end of file +Playing around with wasm. + +Creating a wasm compiler in C for var'aq + +*very early wip* \ No newline at end of file diff --git a/common.h b/common.h new file mode 100644 index 0000000..b776894 --- /dev/null +++ b/common.h @@ -0,0 +1,15 @@ +#ifndef COMMON_H +#define COMMON_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif \ No newline at end of file diff --git a/compiler.c b/compiler.c new file mode 100644 index 0000000..20f627b --- /dev/null +++ b/compiler.c @@ -0,0 +1,340 @@ +#include "compiler.h" + +#include "common.h" +#include "tokenizer.h" + +Code * +signedLEB128 (size_t num) +{ + bool more = true; + Code *buffer = (Code *)malloc (sizeof (Code)); + buffer->count = 0; + int n = (int)num; + + while (more) + { + uint8_t byte = n & 0x7f; + n >>= 7; + if ((n == 0 && (byte & 0x40) == 0) || (n == -1 && (byte & 0x40) != 0)) + { + more = false; + } + else + { + byte |= 0x80; + } + + size_t old_count = buffer->count; + + uint8_t *tmp = (uint8_t *)calloc ( + (old_count + 1), + sizeof (uint8_t)); // really slow and bad refactor later + memcpy (tmp, buffer->cells, buffer->count * sizeof (uint8_t)); + if (tmp) + { + buffer->cells = tmp; + } + buffer->cells[old_count] = byte; + buffer->count += 1; + } + + return buffer; +} + +Code * +unsignedLEB128 (size_t num) +{ + Code *buffer = (Code *)malloc (sizeof (Code)); + buffer->count = 0; + int n = (int)num; + + do + { + uint8_t byte = n & 0x7f; + n >>= 7; + if (n != 0) + { + byte |= 0x80; + } + + size_t old_count = buffer->count; + + uint8_t *tmp = (uint8_t *)calloc ( + (old_count + 1), + sizeof (uint8_t)); // really slow and bad refactor later + memcpy (tmp, buffer->cells, buffer->count * sizeof (uint8_t)); + if (tmp) + { + buffer->cells = tmp; + } + buffer->cells[old_count] = byte; + buffer->count += 1; + } + while (n != 0); + + return buffer; +} + +Code * +append_byte (Code *tape, uint8_t data) +{ + size_t old_count = tape->count; + + uint8_t *tmp = (uint8_t *)calloc ((old_count + 1), sizeof (uint8_t)); + if (old_count > 0) + { + memcpy (tmp, tape->cells, tape->count * sizeof (uint8_t)); + } + if (tmp) + { + tape->cells = tmp; + } + tape->cells[old_count] = data; + tape->count += 1; + return tape; +} +Code * +append_f64 (Code *tape, double data) +{ + size_t old_count = tape->count; + + uint8_t *tmp + = (uint8_t *)calloc ((old_count + sizeof (data)), sizeof (uint8_t)); + if (old_count > 0) + { + memcpy (tmp, tape->cells, tape->count * sizeof (uint8_t)); + } + if (tmp) + { + tape->cells = tmp; + } + + memcpy ((tape->cells + old_count), (unsigned char *)&data, sizeof (data)); + tape->count += sizeof (data); + return tape; +} + +Code * +append (Code *tape, Code *data) +{ + size_t old_count = tape->count; + + uint8_t *tmp + = (uint8_t *)calloc ((old_count + data->count), sizeof (uint8_t)); + memcpy (tmp, tape->cells, tape->count * sizeof (uint8_t)); + if (tmp) + { + tape->cells = tmp; + } + + memcpy ((tape->cells + old_count), data->cells, + data->count * sizeof (uint8_t)); + tape->count += data->count; + return tape; +} + +Code * +encodeString (char *string) +{ + Code *buffer = (Code *)malloc (sizeof (Code)); + buffer->cells = (uint8_t *)malloc (sizeof (uint8_t)); + buffer->cells[0] = strlen (string); + buffer->count = 1; + + uint8_t *tmp = (uint8_t *)malloc ((1 + strlen (string)) * sizeof (char)); + memcpy (tmp, buffer->cells, buffer->count * sizeof (uint8_t)); + if (tmp) + { + buffer->cells = tmp; + } + + memcpy ((buffer->cells + 1), string, strlen (string) * sizeof (char)); + buffer->count += strlen (string); + return buffer; +} + +Code * +encodeVector (Code *data) +{ + size_t count = data->override ? data->override_count : data->count; + Code *buffer = unsignedLEB128 (count); + append (buffer, data); + return buffer; +} + +Code * +createSection (uint8_t section, Code *data) +{ + Code *buffer = (Code *)malloc (sizeof (Code)); + buffer->cells = (uint8_t *)malloc (sizeof (uint8_t)); + buffer->cells[0] = section; + buffer->count = 1; + return append (buffer, encodeVector (data)); +} + +Code * +demo_function_compile () +{ + Code *add_args_code = (Code *)calloc (1, sizeof (Code)); + append_byte (add_args_code, f32); + append_byte (add_args_code, f32); + + Code *add_return_code = (Code *)calloc (1, sizeof (Code)); + append_byte (add_return_code, f32); + + Code *add_function_type = (Code *)calloc (1, sizeof (Code)); + append_byte (add_function_type, FUNCTION); + + append (add_function_type, encodeVector (add_args_code)); + append (add_function_type, encodeVector (add_return_code)); + add_function_type->override = true; + add_function_type->override_count = 1; + + Code *type_section = createSection (TYPE, encodeVector (add_function_type)); + + Code *return_type_code = (Code *)calloc (1, sizeof (Code)); + append_byte (return_type_code, 0x00); + return_type_code->override = true; + return_type_code->override_count = 1; + + Code *func_section = createSection (FUNC, encodeVector (return_type_code)); + + Code *exp = encodeString ("main"); + append_byte (exp, EXPORT_FUNC); + append_byte (exp, 0x00); + exp->override = true; + exp->override_count = 1; + Code *export_section = createSection (EXPORT, encodeVector (exp)); + + Code *code = (Code *)calloc (1, sizeof (Code)); + append_byte (code, LOCAL_GET); + append (code, unsignedLEB128 (0)); + append_byte (code, LOCAL_GET); + append (code, unsignedLEB128 (1)); + append_byte (code, F32_ADD); + + Code *body = (Code *)calloc (1, sizeof (Code)); + append_byte (body, EMPTY_ARRAY); + append (body, code); + append_byte (body, END); + + Code *function_body = (Code *)calloc (1, sizeof (Code)); + append (function_body, encodeVector (body)); + function_body->override = true; + function_body->override_count = 1; + Code *code_section = createSection (CODE, encodeVector (function_body)); + + Code *tape = (Code *)malloc (sizeof (Code)); + tape->cells = calloc (8, sizeof (uint8_t)); + tape->cells[0] = 0; + tape->cells[1] = 'a'; + tape->cells[2] = 's'; + tape->cells[3] = 'm'; + tape->cells[4] = 1; + tape->cells[5] = 0; + tape->cells[6] = 0; + tape->cells[7] = 0; + tape->count = 8; + + append (tape, type_section); + append (tape, func_section); + append (tape, export_section); + append (tape, code_section); + + return tape; +} + +Code * +demo_add_compile () +{ + Code *add_args_code = (Code *)calloc (1, sizeof (Code)); + Code *add_return_code = (Code *)calloc (1, sizeof (Code)); + append_byte (add_return_code, f64); + + Code *add_function_type = (Code *)calloc (1, sizeof (Code)); + append_byte (add_function_type, FUNCTION); + + append (add_function_type, encodeVector (add_args_code)); + append (add_function_type, encodeVector (add_return_code)); + add_function_type->override = true; + add_function_type->override_count = 1; + + Code *type_section = createSection (TYPE, encodeVector (add_function_type)); + + Code *return_type_code = (Code *)calloc (1, sizeof (Code)); + append_byte (return_type_code, 0x00); + return_type_code->override = true; + return_type_code->override_count = 1; + + Code *func_section = createSection (FUNC, encodeVector (return_type_code)); + + Code *exp = encodeString ("main"); + append_byte (exp, EXPORT_FUNC); + append_byte (exp, 0x00); + exp->override = true; + exp->override_count = 1; + Code *export_section = createSection (EXPORT, encodeVector (exp)); + + Code *code = (Code *)calloc (1, sizeof (Code)); + append_byte (code, F64_CONST); + append_f64 (code, 6.7); + append_byte (code, F64_CONST); + append_f64 (code, 8.5); + append_byte (code, F64_ADD); + + Code *body = (Code *)calloc (1, sizeof (Code)); + append_byte (body, EMPTY_ARRAY); + append (body, code); + append_byte (body, END); + + Code *function_body = (Code *)calloc (1, sizeof (Code)); + append (function_body, encodeVector (body)); + function_body->override = true; + function_body->override_count = 1; + Code *code_section = createSection (CODE, encodeVector (function_body)); + + Code *tape = (Code *)malloc (sizeof (Code)); + tape->cells = calloc (8, sizeof (uint8_t)); + tape->cells[0] = 0; + tape->cells[1] = 'a'; + tape->cells[2] = 's'; + tape->cells[3] = 'm'; + tape->cells[4] = 1; + tape->cells[5] = 0; + tape->cells[6] = 0; + tape->cells[7] = 0; + tape->count = 8; + + append (tape, type_section); + append (tape, func_section); + append (tape, export_section); + append (tape, code_section); + + return tape; +} + +Code * +compile (char *buffer) +{ + Code *tape = (Code *)malloc (sizeof (Code)); + tape->cells = calloc (8, sizeof (uint8_t)); + tape->cells[0] = 0; + tape->cells[1] = 'a'; + tape->cells[2] = 's'; + tape->cells[3] = 'm'; + tape->cells[4] = 1; + tape->cells[5] = 0; + tape->cells[6] = 0; + tape->cells[7] = 0; + tape->count = 8; + + initTokenizer (buffer); + Token t = nextToken (); + while (t.type != TOKEN_EOF) + { + debug_printToken (t); + t = nextToken (); + } + + return tape; +} diff --git a/compiler.h b/compiler.h new file mode 100644 index 0000000..90b2279 --- /dev/null +++ b/compiler.h @@ -0,0 +1,259 @@ +#ifndef COMPILER_H +#define COMPILER_H + +#include +#include +#include +#include +#include + +typedef enum Opcodes +{ + UNREACHABLE = 0, + NOP = 1, + BLOCK = 2, + LOOP = 3, + IF = 4, + ELSE = 5, + TRY = 6, // PROPOSED + CATCH = 7, // PROPOSED + THROW = 8, // PROPOSED + RETHROW = 9, // PROPOSED + BR_ON_EXN = 10, // PROPOSED + END = 11, + BR = 12, + BR_IF = 13, + BR_TABLE = 14, + RETURN = 15, + CALL = 16, + CALL_INDIRECT = 17, + RETURN_CALL = 18, // PROPOSED + RETURN_CALL_INDIRECT = 19, // PROPOSED + DROP = 26, + SELECT = 27, + SELECT_T = 28, // PROPOSED + LOCAL_GET = 32, + LOCAL_SET = 33, + LOCAL_TEE = 34, + GLOBAL_GET = 35, + GLOBAL_SET = 36, + TABLE_GET = 37, // PROPOSED + TABLE_SET = 38, // PROPOSED + I32_LOAD = 40, + I64_LOAD = 41, + F32_LOAD = 42, + F64_LOAD = 43, + I32_LOAD8_S = 44, + I32_LOAD8_U = 45, + I32_LOAD16_S = 46, + I32_LOAD16_U = 47, + I64_LOAD8_S = 48, + I64_LOAD8_U = 49, + I64_LOAD16_S = 50, + I64_LOAD16_U = 51, + I64_LOAD32_S = 52, + I64_LOAD32_U = 53, + I32_STORE = 54, + I64_STORE = 55, + F32_STORE = 56, + F64_STORE = 57, + I32_STORE8 = 58, + I32_STORE16 = 59, + I64_STORE8 = 60, + I64_STORE16 = 61, + I64_STORE32 = 62, + MEMORY_SIZE = 63, + MEMORY_GROW = 64, + I32_CONST = 65, + I64_CONST = 66, + F32_CONST = 67, + F64_CONST = 68, + I32_EQZ = 69, + I32_EQ = 70, + I32_NE = 71, + I32_LT_S = 72, + I32_LT_U = 73, + I32_GT_S = 74, + I32_GT_U = 75, + I32_LE_S = 76, + I32_LE_U = 77, + I32_GE_S = 78, + I32_GE_U = 79, + I64_EQZ = 80, + I64_EQ = 81, + I64_NE = 82, + I64_LT_S = 83, + I64_LT_U = 84, + I64_GT_S = 85, + I64_GT_U = 86, + I64_LE_S = 87, + I64_LE_U = 88, + I64_GE_S = 89, + I64_GE_U = 90, + F32_EQ = 91, + F32_NE = 92, + F32_LT = 93, + F32_GT = 94, + F32_LE = 95, + F32_GE = 96, + F64_EQ = 97, + F64_NE = 98, + F64_LT = 99, + F64_GT = 100, + F64_LE = 101, + F64_GE = 102, + I32_CLZ = 103, + I32_CTZ = 104, + I32_POPCNT = 105, + I32_ADD = 106, + I32_SUB = 107, + I32_MUL = 108, + I32_DIV_S = 109, + I32_DIV_U = 110, + I32_REM_S = 111, + I32_REM_U = 112, + I32_AND = 113, + I32_OR = 114, + I32_XOR = 115, + I32_SHL = 116, + I32_SHR_S = 117, + I32_SHR_U = 118, + I32_ROTL = 119, + I32_ROTR = 120, + I64_CLZ = 121, + I64_CTZ = 122, + I64_POPCNT = 123, + I64_ADD = 124, + I64_SUB = 125, + I64_MUL = 126, + I64_DIV_S = 127, + I64_DIV_U = 128, + I64_REM_S = 129, + I64_REM_U = 130, + I64_AND = 131, + I64_OR = 132, + I64_XOR = 133, + I64_SHL = 134, + I64_SHR_S = 135, + I64_SHR_U = 136, + I64_ROTL = 137, + I64_ROTR = 138, + F32_ABS = 139, + F32_NEG = 140, + F32_CEIL = 141, + F32_FLOOR = 142, + F32_TRUNC = 143, + F32_NEAREST = 144, + F32_SQRT = 145, + F32_ADD = 146, + F32_SUB = 147, + F32_MUL = 148, + F32_DIV = 149, + F32_MIN = 150, + F32_MAX = 151, + F32_COPYSIGN = 152, + F64_ABS = 153, + F64_NEG = 154, + F64_CEIL = 155, + F64_FLOOR = 156, + F64_TRUNC = 157, + F64_NEAREST = 158, + F64_SQRT = 159, + F64_ADD = 160, + F64_SUB = 161, + F64_MUL = 162, + F64_DIV = 163, + F64_MIN = 164, + F64_MAX = 165, + F64_COPYSIGN = 166, + I32_WRAP_I64 = 167, + I32_TRUNC_F32_S = 168, + I32_TRUNC_F32_U = 169, + I32_TRUNC_F64_S = 170, + I32_TRUNC_F64_U = 171, + I64_EXTEND_I32_S = 172, + I64_EXTEND_I32_U = 173, + I64_TRUNC_F32_S = 174, + I64_TRUNC_F32_U = 175, + I64_TRUNC_F64_S = 176, + I64_TRUNC_F64_U = 177, + F32_CONVERT_I32_S = 178, + F32_CONVERT_I32_U = 179, + F32_CONVERT_I64_S = 180, + F32_CONVERT_I64_U = 181, + F32_DEMOTE_F64 = 182, + F64_CONVERT_I32_S = 183, + F64_CONVERT_I32_U = 184, + F64_CONVERT_I64_S = 185, + F64_CONVERT_I64_U = 186, + F64_PROMOTE_F32 = 187, + I32_REINTERPRET_F32 = 188, + I64_REINTERPRET_F64 = 189, + F32_REINTERPRET_I32 = 190, + F64_REINTERPRET_I64 = 191, + OP_NULL = 0xD0, + IS_NULL = 0xD1, + REF_FUNC = 0xD2, +} Opcodes; + +typedef enum Types +{ + i32 = 0x7F, + i64 = 0x7E, + f32 = 0x7D, + f64 = 0x7C, + v128 = 0x7B, + funcref = 0x70, + externref = 0x6F, +} Types; + +typedef enum FunctionTypes +{ + EMPTY_ARRAY = 0, + FUNCTION = 96 +} FunctionTypes; + +typedef enum ExportTypes +{ + EXPORT_FUNC = 0, + EXPORT_TABLE = 1, + EXPORT_MEM = 2, + EXPORT_GLOBAL = 3 +} ExportTypes; + +typedef enum Section +{ + CUSTOM = 0, + TYPE = 1, + IMPORT = 2, + FUNC = 3, + TABLE = 4, + MEMORY = 5, + GLOBAL = 6, + EXPORT = 7, + START = 8, + ELEMENT = 9, + CODE = 10, + DATA = 11 +} Section; + +typedef struct Code Code; +struct Code +{ + uint8_t *cells; + size_t count; + bool override; + size_t override_count; +}; +Code *signedLEB128 (size_t num); +Code *unsignedLEB128 (size_t num); +Code *append_byte (Code *tape, uint8_t data); +Code *append (Code *tape, Code *data); +Code *encodeString (char *string); +Code *encodeVector (Code *data); +Code *createSection (uint8_t section, Code *data); +Code *demo_function_compile (); +Code *demo_add_compile (); +Code *compile (char *buffer); + +#endif diff --git a/docs/current.html b/docs/current.html new file mode 100644 index 0000000..4f4eecf --- /dev/null +++ b/docs/current.html @@ -0,0 +1,141 @@ +var'aq Reference Implementation 9/20/2000 + +

var'aq Release Notes

+


+maintained by Brian Connors
+revised 2 January 2001

+ +

Version Information

+ +This is the status of the var'aq project as of 20 September 2000.

+ +This is the Azetbur release, after Chancellor Gorkon's daughter and successor in Star Trek VI: The Undiscovered Country.

+ +

Status Summary

+ +We are moving ahead! Console I/O (on a very rudimentary level) is here, as well as much of the mathematical functionality and string handling. Lists are not here yet, and a more robust I/O model must await a better understanding of Klingon hacker culture to understand issues like authentication and how distributed processing might be used.

+ +This release is approaching usability. Files are not here yet (still under discussion), but there is console-level I/O and enough string support to do something useful.

+ +j proctor from the varaq-dev mailing list has contributed some sample code and a more flexible translator program than the one I wrote and distributed. His interpreter will be found in the translators directory.

+ +

Interpreter Status

+ +The interpreter is available with both Klingon and English keywords; however, keep in mind that maintenance is done mostly on the English version and the translator program is not ready for prime time. This will be fixed eventually.

+ +The following keywords are currently supported:

+ +

    +
  • Control Structures +
      +
    • ifyes/HIja'chugh +
    • ifno/ghobe'chugh +
    • choose/wIv +
    • ~ (quote/lI'moH) +
    • name/pong +
    • set/cher +
    • repeat/vangqa' +
    • eval/chov +
    +

    +

  • Stack Operations +
      +
    • pop/woD +
    • dup/latlh +
    • exch/tam +
    • clear/chImmoH +
    • remember/qaw +
    • forget/qawHa' +
    +

    +

  • Arithmetic Operators +
      +
    • add/boq +
    • sub/boqHa' +
    • mul/boq'egh +
    • div/boqHa'egh +
    • mod/chuv +
    • pow/law'qa'moH +
    • rand/mIS +
    • add1/wa'boq +
    • sub1/wa'boqHa' +
    • pi/HeHmI' +
    • e/ghurmi' +
    • clip/poD +
    • smooth/Hab +
    • howmuch/'ar +
    +

    +

  • Trig/Log operators +
      +
    • sin/joq +
    • cos/joqHa' +
    • tan/qojmI' +
    • atan/qojHa' +
    • ln/ghurtaH +
    +

    +

  • Relational Operators

    + +All relational operators have been implemented. Thanks to j proctor for null?/pagh'a', negative?/taH'a', int?/HabmI''a', and number/mI''a'.

    + +

  • I/O Operators

    + +The var'aq I/O model remains to be defined, but the basic console I/O functions are now present.

    + +

  • List Operators

    + +List support does not exist yet.

    + +

  • String Operators

    + +The current set of operators is sufficient to implement the others in the spec, but they are a lower priority. +

      +
    • strtie/tlheghrar +
    • streq?/tlheghrap'a' +
    • strcut/tlhleghpe' +
    • strmeasure/tlheghjuv +
    • compose/naQmoH +
    +
+ +

Implementation Issues and/or Bug Fixes

+ +

    +
  • The interpreter now knows to ignore hashbang (i.e. #!/usr/bin/varaq-whatever) lines. They can double as end-of-line comments if you like, but you really shouldn't; these are here primarily to allow the forthcoming vqmake program to spit out program files that a Unix shell can execute directly (which would be .vqx files on any other platform). +
  • Took the "\t" out of disp/cha' to facilitate a possible CGI conversion. Better formatted output will be a priority very shortly. +
  • The interpreter also doesn't see lines beginning with //. This is to allow for static linking of .vql files (though if someone wants to implement var'aq "shared libraries" I'll be glad to add the revs to the interpreter). +
  • Cleaned up the execution logic a bit -- the code to execute a proc object was already in there about four times and was about to go in again for compose/naqmoH, so I abstracted it into a routine called execblock() per Chris' advice (RTFC). +
  • Merged in j proctor's additions. +
+ +

Specification Status

+ +A few changes:

+

    +
  • A number of operators have been added: +
      +
    • //name -- equivalent to C #include. +
    • Numerous "constants" (actually more like environment variables). +
    +
  • Several operators have been renamed: +
      +
    • strcat is now strtie to bring it in line with the Klingon tlheghrar. +
    • The basic arithmetic operators have been given new +Klingon names to coincide with recent additions to the Klingon linguistic +canon. +
    +
  • More coinages in the section headings. +
+ +

Acknowledgements

+ +The varaq-dev mailing +list, especially j proctor for +his code contributions and Alan Anderson for providing the canonical +Klingon math operators, and to Glenn Gaslin for the impetus to finally +get this thing out the freakin' door.

+ +


+ +Click here to go to the var'aq home page. diff --git a/docs/var'aq Filesystem Supplement.html b/docs/var'aq Filesystem Supplement.html new file mode 100644 index 0000000..5c09973 --- /dev/null +++ b/docs/var'aq Filesystem Supplement.html @@ -0,0 +1,273 @@ + + + + + + + + +var'aq Filesystem Supplement + + + + +
The Wayback Machine - https://web.archive.org/web/20091026220614/http://geocities.com/connorbd/varaq/proposals/vqfs.html
+
+
+ +
+
+

Filesystem Functions in var'aq


+

De'ghomjangwI'mey

+ +

F.1 Introduction

+ +

F.1.1 vfs Described

+ +This page describes a simple filesystem extension to the var'aq +programming language. It is a fairly simple scheme that gives little or no +thought to the intricacies of security on a var'aq-based system; +its primary functions are limited to creating, reading, writing, and +appending files and it makes no pretensions of being a production-quality +filesystem.

+ +Certain non-Earthly features do exist, however; there is no concept of a +file descriptor inherent in the design. File reads and writes are done by +copying text data (no binary data at this stage) into and out of strings. +(A close equivalent to a file descriptor in vfs would be a variable with +an attached pathname; this is a fairly common idiom in systems that must +keep constant access to specific files, but is not explicitly a part of +the filesystem.)

+ +

F.1.2 Pathnames/SammoHpongmey

+ +This document makes frequent reference to pathnames (SammoHpongmey +or beacon-names). The precise meaning of this term approximates the Earth +understanding of the term, though it may also include out-of-band +information such as authentication tickets and content filters. + +

F.1.3 vfs Error Codes

+ +All vfs functions have basically similar error semantics. Since the system +is stateless, any function that cannot find the file it's looking for will +return a null pathname (SammoH Hegh, dead beacon). + + +

F.2 Creating and Deleting Files

+ +

F.2.1 fcreate/cher_De'ghom

+ +pathname cher_De'ghom -

+ +Creates a file with the specified pathname. + +

F.2.2 flock/Doq_De'ghom

+ +pathname Doq_De'ghom -

+ +Restricts write access to the file at pathname to the current +process. Generally used in roughly the same place as an fopen() in +C, though the semantics are drastically different.

+ +

F.2.3 funlock/DoqHa'_De'ghom

+ +pathname DoqHa'_De'ghom -

+ +Unlocks the file at pathname. This is generally handled by the +filesystem garbage collector at process termination time, but it is +considered good practice to take care of it yourself, especially in an +embedded environment.

+ +

F.2.4 fdelete/HoH_De'ghom

+ +pathname HoH_De'ghom -

+ +Removes the file at pathname.

+ +

F.3 Reading and Writing Files

+ +

F.3.1 fread/jotlh_De'ghom

+ +pathname jotlh_De'ghom str

+ +Copies the contents of the file at pathname into str; +returns null pathname on failure.

+ +

F.3.2 fwrite/tatlh_De'ghom

+ +str pathname tatlh_De'ghom -

+ +Replaces the contents of the file at pathname with str.

+ + + + +1 + + \ No newline at end of file diff --git a/docs/varaqfaq.html b/docs/varaqfaq.html new file mode 100644 index 0000000..16bff53 --- /dev/null +++ b/docs/varaqfaq.html @@ -0,0 +1,232 @@ +Everything you were about to ask... +

The var'aq FAQ List


+created by Brian Connors
+created 19 May 2000
+last updated 17 July 2000
+
+This document attempts to answer some of the more likely questions about +var'aq: what it is, where to get it, where to find more about it. It's +not a tutorial or a spec; there are other documents that describe that.

+ +

    +
  1. What is Var'aq?

    +Var'aq (more properly, var'aq) is sort of a fanfic programming language +based on the Klingon language used on the Star Trek television series and movies. +Klingon, created by Marc Okrand and "maintained", in a sense, by the Klingon Language Institute independently of +Paramount's auspices, is really its own separate deal; this is merely a fan's +attempt to give a little more richness to the culture as well as exercising a +love of languages and a desire to learn more Perl.

    + +

  2. Where can I find out more about it?

    + +The var'aq home page is located at http://www.geocities.com/connorbd/varaq. +You should be able to find pretty much everything interesting about the +language here, including specifications, sample code, and implementation +notes.

    + +As of this writing, the interpreter is in a basically functional state but +implements less than half of the specification. Just something you should keep +in mind.

    + +

  3. I heard something +about a "Klingon Forth". Is this it? And why isn't it called +loSDIch?

    +Yes, in a way. It's a stack-based RPN language like Forth or PostScript; the +reason for this has nothing to do with an original desire to emulate one of +those languages, but simply the unusual object-verb-subject syntax of Klingon. +This sort of dictated the required form of the language right up front, ruling +out a more traditional ALGOL-like syntax (based on English). Stack-based +languages are actually easier to parse anyway, especially in Perl: just chomp +and process. It is also an impure functional language in the same vein as +Lisp or ML; it supports local variables, but it is really intended to do +everything off the stack.

    + +As for calling it loSDIch (Klingon for fourth), that would be an +obvious joke title to anyone who actually spoke Klingon; this being at least +a semi-serious exercise in artificial culture development, such a title would +be noticeably silly at best. var'aq is actually completely meaningless, +though it suggests identification with a famous Klingon mathematician or +computer scientist in sort of the same way as Pascal recalls Blaise Pascal or +Ada recalls Ada Lovelace. In any case, the name var'aq came before the form of the +language. (In any case, var'aq is based more directly on +PostScript anyway. But they're all part of the same family.)

    + +

  4. So what is this thing eventually going to be able to +do?

    + +Eventually? A lot. The intent is to offer such things as concurrency and even +distributed processing support at some point (imagine that, a toy language +designed for a Beowulf cluster), perhaps even basic windowing support or the +like. Right now, I'm just shooting for such fancy features as, say, +functions. Or loops. String support. That sort of thing.

    + +

  5. Describe var'aq for me in terms +of other languages. You know, like a car or a beer or something like +that.

    +As stated above, var'aq's closest cyberlinguistic relative is probably +PostScript, with a dash of Lisp thrown in. (This, incidentally, is sort of a +Perl artifact, since Perl data typing is incredibly lax. It's just the easiest +way to write it.)

    + +Chris Pressey, creator of the notorious Befunge language, +maintains a list of programming languages described as cars; in those terms, +var'aq would be described thusly: A 2000 VW Turbo Beetle with lots +of amateurishly drawn Star Trek graphics painted 60s-style on the doors, a +Starfleet Academy sticker in the window, and a custom car radio and A/C system +run completely off an HP calculator.

    + +In terms of genetics, var'aq is the bastard child of a back-room tryst +between PostScript and Lisp after a Star Trek convention.

    + +In terms of beer... var'aq is bloodwine. Serve hot, drink carefully +because it'll mess you up if you don't.

    + +There is a var'aq 99 Bottles Of Beer program, but since it won't yet +parse I won't be posting it right yet.

    + +

  6. Why doesn't this construct translate to its PostScript/Forth +equivalent?

    + +The question is one of verisimilitude. The likelihood of a Klingon concept +being an exact translation of its English equivalent isn't always good. +Consequently, pure translation of an Earth language might make for a cute +joke, but it would sacrifice plausibility. A prime example is the +qaw/qawHa' instructions, which perform the same function as +PostScript's mark/cleartomark instructions but literally translate +to remember/forget; the idea is that the metaphor chosen in Klingon +might more reflect the purpose of marking the stack than the actual act. +Incidentally, It's quite true that many of the idioms chosen for +var'aq are +anything but obvious. This is the reason why; though mathematics is +considered universal, it's not too likely that everything would be +described in the same way. (That said, I did cheat in a few places; for +example, the word for logarithm is a direct translation from the Greek +logarithmos, meaning roughly "logic-number".)

    + + +For a rather thorough and creative discussion on the issues involved +in translation, you might wish to look at Le ton beau de Marot by +Douglas Hofstadter (the author of the hacker classic Goedel, Escher, +Bach: An Eternal Golden Braid), an intricate and well-written look at +the pitfalls of translation between languages.

    + + +

  7. Does Paramount know about this?

    + +Not until someone sends me Michael Okuda's email address. (NB Michael Okuda is +the visual effects guy that created the modern Starfleet look and feel. I +think he'd be interested, but I make no assumptions about officialdom.)

    + +

  8. Does the KLI know about this?

    + +As a matter of fact, yes. Mark +Shoulson is the project's Head Linguistic Consultant and is in +great measure responsible for getting the spec to reflect real Klingon +constructions.

    + +

  9. Why isn't the Klingon version guaranteed to be in sync with the English +version?

    + +Good question. The answer is that I don't speak Klingon; as a linguistic work +of art, it's a beauty, but I don't have much reason to learn it. As a result, +the Klingon version is mechanically translated via a Perl filter from English +to Klingon so I don't have to waste time synchronizing two separate source +bases.

    + +

  10. Will there ever be...

    + +

      +
    • ...a var'aq-to-C compiler?

      + +Not likely. Such a beast would essentially spew out function calls to simulate +var'aq operators and control structures and would therefore be a +gigantic mess.

      + +

    • ...a version not written in Perl?

      + +Eventually, mainly because I'd love to get this running on a PalmPilot. The +Perl implementation is phenomenally ugly anyway, so a serious rewrite in C +would definitely be in order at some point. Some might, for example, +actually find var'aq useful for the occasional scripting job, and +running an interpreter on an interpreter is if nothing else a great way to +waste tremendous amounts of time.

      + +

    • ...a non-Unix version?

      + +var'aq should run as is on any platform that can handle Perl. Mac, +Unix, Windows, whatever. Next question.

      + +

    + +
  11. Who is responsible for this?

    + +The principal members of the team as of this version of the FAQ are:

    +

      +
    • Brian Connors -- Brian created +the +concept and is currently doing most of the implementation work and +documentation. He's mostly a Perl-Linux-MacOS hacker and is currently +available for employment; see his resume if you're +looking. He also considers himself something of an Open Source activist and considers +var'aq his most worthwhile contribution to the movement so far. + +
    • Chris Pressey -- +Chris +knocked around a number of ideas for the var'aq system in the early +planning stages of the project and provided the first draft of Bearfood, a +very, very small Forth interpreter in Perl that provided the guts of the +procedure definition code. Chris' biggest area of expertise is pushing +the frontiers of programming language design; check out his Cat's Eye Technologies Esoteric +Topics web page if you'd like to see some of his better-known work +(especially Befunge, a language that may have displaced Intercal as one +of the most perverse in existence). + +
    • Mark +Shoulson -- One +of the higher-ranking members of the Klingon +Language Institute, Mark, being the only one of us who actually speaks +Klingon, is our linguistics consultant. His role so far has been making +sure that the Klingon words in the spec actually mean what they're +supposed to mean, a job important enough that he gets coauthor credit with +Brian and Chris. +
    + +Shouts out go to j proctor and Alan Anderson from the varaq-dev +mailing list for their contributions as well. +

    + +

  12. Can I copy/borrow var'aq?

    + +The spec is as open as any such spec gets. Feel free to implement your +own; if you want to use our code it's freeware under the Mozilla Public License (Why not GPL? +That's a separate +document...). As mentioned above, Brian's heavy into that open source +thing, so naturally in peer review we trust. Of course, you should +acknowledge us, and we'd obviously love a cut of anything you happen to +make off our work...

    + +

  13. Where can I find out more about var'aq?

    + +You can go to the var'aq home page +at Yahoo! Geocities to find out everything there is to know about +var'aq. At the website, you'll find all sorts of useful information +(most of which is included with the distribution) as well as instructions +for subscribing to the varaq-dev mailing list at eGroups. Good lu...er... Qapla'! + +

+
+ +Click here to return to the var'aq home +page.
+Click here to return to +2266 Research Triangle. diff --git a/docs/varaqspec.html b/docs/varaqspec.html new file mode 100644 index 0000000..1037eed --- /dev/null +++ b/docs/varaqspec.html @@ -0,0 +1,918 @@ +var'aq Interim Specification 2 January 2001 +

var'aq Preliminary Specification


+Brian Connors
+29 May 2000
+last updated 02 January 2001
+
+ +

0 Introduction

+ +This specification documents the +programming language var'aq. var'aq is a programming +language created as part of an exercise to imagine the hacker culture of +Star Trek's Klingon race. The +Klingon culture was chosen because it is probably the best-realized of all +such cultures in the Science Fiction/Fantasy genres of literature, and the +Klingon language is sufficiently different from English to make language +design a significant challenge.

+ +

0.1 Language Levels

+ +var'aq is divided into three implementation levels, each a subset +of its successor. An implementation conforming to the current +var'aq spec should be labeled with the level of conformance.

+ +

    +
  • Level 0 -- The focus of this preliminary spec, a Level 0 +var'aq implementation minimally contains all operators and data +structures listed in this specification. Level 0 covers all basic +mathematical, relational, and logical operators and a minimal set of +Turing-complete control constructs and I/O operators. A Level 0 +implementation does not necessarily have to be written in Klingon, but +if not an external Klingon-to-English translation filter is required. +
  • Level 1 -- NOT YET DOCUMENTED -- A Level 1 +implementation constitutes a full text-mode implementation of +var'aq, including basic concurrency support, stream-based +file I/O, support for the +var'aq Standard MPI Binding (not yet available), and full support +for Klingon-language constructs. Also includes support for packages +(jangwI'ghom). + +
  • Level 2 -- NOT YET DOCUMENTED -- A Level 2 +implementation has all of the above, as well as package support, virtual +device support +(minimally, printer, framebuffer, and network connection as well as +console), and is suitable for use as a scripting or systems programming +language. A POSIX binding is optional but highly recommended. +
+

+ +

0.2 Legalese, Credit, Etc.

+ +var'aq was concieved by Brian +Connors and implemented by Brian with help from Chris Pressey and Mark Shoulson. It is an independent +project and is not connected with the Star Trek people (i.e. +Paramount/Viacom).

+ +This document is (c)2000-2001 Brian Connors and may be distributed freely as +long as this copyright notice is retained. You may freely implement for +private use a var'aq implementation with no restriction; you must +contact me for certification (i.e. I make sure you're following the spec) +in order to distribute your implementation as freeware. This specification +may not be used for commercial purposes without a separate licensing +arrangement.

+ +

0.3 Apologies

+ +You'll notice that I tend to drift in and out of character in this document. +The only thing I can say to this is that character isn't a great concern in a +specification document; I may do an extensive revision later on, but as I +write this var'aq isn't even alpha-quality code, and the spec is +subject to change anyway. Just enjoy and try to keep up; we'll worry about +"realism" later. Thank you for your patience.

+ +

1 Language Overview

+ +var'aq is a stack-based, RPN programming language with points of +similarity to Lisp, Forth, and PostScript. It is more of a functional +language than an imperative (Algol-like) language; most operations are +dependent on the stack and bypass the local variable store altogether.

+ +

1.1 Notation Used In This Specification

+ +All operator names will be given in English and Klingon. The notation and +format of the operator entries are cribbed from Adobe's PostScript Language +Specification.

+ +

1.2 Basic Concepts

+ +var'aq is a fairly simple, RPN-based language where pretty much +everything is an operator. It is fairly loosely typed and little +distinction is made between code and data. Typechecking is not strictly +required (and in fact does not exist in the current reference +implementation) but is encouraged.

+ +

1.3 Data Types

+ +var'aq recognizes only a few data types so as to keep the programming +model as simple as possible.

+ +

1.3.1 number/mI'

+ +var'aq numbers are a bit nonspecific as to representation; there is no +support for complex numbers in a Level 0 implementation (Level 1 and above do +include support). In general, integers (HabmI') and reals are +considered interchangeable where possible, and the interpreter is expected to +use whichever is more efficient. Note that integer operations such as +Habwav and the bitwise operators will generally silently truncate the +operands (optionally giving a runtime warning if so requested by the user).

+ +

1.3.2 function/jangwI'

+ +A function in var'aq is understood to be much the same thing as a +lambda closure, i.e. a procedure with a return value (thus the Klingon term +jangwI', meaning "answerer"). Functions are defined using the { } +operators, and may be assigned names using the pong operator.

+ +

1.3.3 list/ghomHom

+ +var'aq lists (ghomHom, meaning "cluster" (sort of)) are in some +ways very similar to those of Lisp.

+ +

1.3.4 string/tlhegh

+ +Strings in var'aq (and Klingon programming in general) seem to be +considered something of a black art; Klingon computer science appears to have +no real concept of anything like regular expressions, and as a result decent +string handling is the territory of those trusted to write things like +language compilers (ngoq poSmoH? The idea would baffle a Klingon... so +insecure, so random...). Brute-force string comparisons seem to be the order +of the day in current practice. That said, var'aq strings also have a +couple of important properties: a) white space is not significant; b) they can +be decomposed into lists using the joq operator; and c) they can be +used as literal names (though it's not a terribly good idea). + +

1.4 Basic Assumptions

+ +

1.4.1 Number Representations

+ +All standard var'aq systems are assumed to be binary machines using +eight or sixteen-bit bytes (word size is unimportant). Negative integer +values are represented as two's-complement. A floating-point implementation is +not specified, though the Klingon floating-point standard is not drastically +different from IEEE floating point (differing bit positions, primarily). If a +standard float must be chosen, go with IEEE double-precision.

+ +

1.4.2 Garbage Collection

+ +All var'aq implementations are required to use garbage collection +(woDHa', roughly meaning "retrieve" or "unwaste"). No standard +algorithm is specified, and this requirement may be ignored if the implementor +uses an environment where this is assumed (Perl, for example).

+ +

1.4.3 Filetypes

+ +Klingon military computer systems use a sort of modular-database storage +scheme in which the concept of "file" doesn't mean a whole lot. However, on +systems where files are the common way of doing business, the following +extensions and MIME types are standard:

+ +

    +
  • .vq -- application/varaq -- a standard var'aq source file +
  • .vqe -- application/varaq-engl -- an English-keyword +var'aq source file +
  • .vql -- application/vqlib -- a var'aq library source file +
  • .vqx -- application/vqexe -- a var'aq executable resource +file +
+ +Note that this is being specified here mainly as a matter of convenience. The +var'aq resource file format has not yet been defined.

+ +

2 Language Basics

+ +This section describes the fundamental var'aq language constructs +and data types.

+ +

2.1 Stack Operations

+ +These operations directly manipulate the var'aq operand stack. The +operand stack can hold any of four kinds of data: numbers (real or +integer), strings, functions, or arrays. It is best described as +"translucent", similar to the transparent stack of Forth or PostScript but +somewhat more restricted. The internal data representation of the stack is +not available to the programmer.

+ +

2.1.1 pop/woD

+ +obj woD -

+ +Pops and discards the top item on the stack. The literal meaning is +discard.

+ +Errors: stackUnderflow

+ +

2.1.2 dup/latlh

+ +obj latlh obj obj

+ +Duplicates the top object on the stack.

+ +Errors: stackUnderflow

+ +

2.1.3 exch/tam

+ +obj1 obj2 tam obj2 obj1

+ +Inverts the order of the top two objects on the stack.

+ +Errors: stackUnderflow

+ +

2.1.4 clear/chImmoH

+ +... obj chIm -

+ +Empties the stack.

+ +Errors: none

+ +

2.1.5 remember/qaw

+ +- qaw flag

+ +Puts a flag (like PostScript's mark) on the stack. The internal +representation of the flag is not available to the programmer.

+ +Errors: none

+ +

2.1.6 forget/qawHa'

+ +... flag ... qawHa' ...

+ +Clears the stack down to the flag and pops the flag. If there is no flag +present, the stack is emptied completely.

+ +Errors: none

+ +

2.1.7 dump/Hotlh (lit. scan)

+ +... Hotlh ...

+ +Prints the contents of the operand stack to STDOUT without changing them. +Note: the Hotlh operator is a debugging operator and is not +intended for use in programs; it is merely documented here because it +might be useful to a var'aq developer. In particular, the output +format of this operator is implementation-defined and will not be +specified in this document. Hotlh may be redefined to take such +arguments as the implementor feels appropriate.

+ +Errors: implementation-defined. + +

2.2 Data/Code Operations

+ +var'aq, like many similar languages, does not distinguish between +code and data. These operations include operators to associate names with +objects and executable procedures, as well as operators to define and +manage data structures. Note that variables and procedures live in a +common namespace, since the act of pushing the content of a variable is +essentially the same as executing the variable's name.

+ +

2.2.1 ~ (quote/lI'moH)

+ +- ~ obj obj

+ +The ~ operator is a special form, as it is not a +postfix operator. When the interpreter encounters a ~, it pushes the next +token on the stack as is regardless of whether it is a defined name. +(Attempting to push an undefined name without a ~ will generate an +undefinedName error.)

+ +The literal meaning of this operator's name is "make useful". + +Errors: none

+ +

2.2.2 {

+ +Begins the creation of an anonymous procedure. The process is +implementation-dependent.

+ +

2.2.3 }

+ +- } proc

+ +Completes procedure construction and pushes a reference to the completed +procedure on the stack. Does not execute the procedure.

+ +Errors: noDefinedProc + +

2.2.4 name/pong

+ +obj id pong -

+ +Associates obj with id and places it in the system lookup +space. Conventionally used to associate new operator names with procedure +objects.

+ +Example: ~ add3 { chel chel cha' } pong

+Pushes the name add3 and a procedure object on the stack, then +binds the name to the procedure.

+ +Errors: stackUnderflow, noDefinedProc

+ +

2.2.5 set/cher

+ +obj id cher -

+ +Reassigns the value of a value already in the system lookup space. Used +primarily for variable assignments.

+ +Errors: stackUnderflow, noSuchName

+ +

2.2.6 (* ... *) (comment)

+ +Marks a comment in a program. All such comments are treated as single tokens +and ignored.

+ +

2.2.7 //name

+ +Causes the interpreter to import a file with the name name.vql and +execute it as if it is part of the currently executing program. This can be +handled by an external static linker if there is no shlib-like facility in the +interpreter. Essentially equivalent to #include in C.

+ +

2.3 Control Flow

+var'aq supports a small but sufficient supply of conditional and +iterative operators.

+ +

2.3.1 ifyes/HIja'chugh

+ +bool proc HIja'chugh -

+ +Pops the proc object off the stack, then evaluates the boolean. If it's +true, the proc object is evaluated; otherwise, it's thrown out.

+ +Errors: stackUnderflow, noDefinedProc

+ +

2.3.2 ifno/ghobe'chugh

+ +bool proc ghobe'chugh -

+ +Similar to HIja'chugh above, but executes proc only if bool is false. +

+ +Errors: stackUnderFlow, noDefinedProc

+ +

2.3.3 choose/wIv

+ +bool wIv bool bool

+ +Duplicates a boolean value on top of the stack. Allows paired +HI'ja'chugh/ghobe'chugh clauses.

+ +Note: To the untrained eye, it may seem as though wIv and latlh are +identical. This is true in the reference implementation, but may not be in any +version that actually does some level of type checking. This bit of +syntactic sugar should never be relied upon; always use wIv in this +situation.

+ +

2.3.4 eval/chov

+proc chov -

+ +Pops a proc object off the stack and executes it.

+ +Errors: stackUnderflow, noDefinedProc

+ +

2.3.5 escape/nargh

+ +bool nargh -

+ +Exit the current procedure. Useful for exit conditions on +loops. Will terminate the current session if used top-level.

+ +

2.3.6 repeat/vangqa'

+ +val proc vangqa' -

+ +Pops val and proc off the stack and executes proc val times.

+ +

2.4 List Operations

+ +var'aq supports a series of operators for management of lists +(ghomHom, which seems to mean something like "cluster"). These +primitives are the language's primary way of managing aggregate objects and +work much like similar operators in LISP; a more sophisticated paradigm, such +as OO extensions or the like, can be built with these operators.

+ +Note that "objects" as they stand in var'aq are largely singletons as +in JavaScript; there is no inherent concept of object-orientation or anything +like it in standard var'aq. + +

2.4.1 (

+ +Begins a list definition.

+ +

2.4.2 )

+ +( item1 item2 ... ) list

+ +Creates a list and pushes it onto the stack.

+ +

2.4.3 split/SIj

+ +list SIj item1 list

+ +Pops a list off the stack and returns the first item and the rest of the +list.

+ +

2.4.4 cons/muv

+ +list item1 ... muv list

+ +Takes an object and adds it to the head of a list. Equivalent to the LISP +cons operator. + +

2.4.5 shatter/ghorqu'

+ +list ghorqu' item1 item2 ...

+ +Reduces a list to its component elements and pushes them on the stack in +order.

+ +Note: The precise meaning of the construction ghorqu' is +a bit obscure; the rendering shatter is idiomatic and may derive from a +nonstandard dialect. Standard Klingon would generally prefer jor, +meaning explode.)

+ +

2.4.6 empty?/chIm'a'

+ +list chIm'a' bool

+ +Examines a list on the stack and returns 1 if its value is null (pagh), +a 0 if it contains anything. + +Note: some implementations also have an +operator known as bite/chop, equivalent to the Lisp cdr. This is +not required in any standard var'aq implementation and can easily be +rendered by the function

+


+
+~ chop { SIj woD } pong
+
+

+ +

2.5 String Operators
+tlheghjangwI'mey

+ +String handling in var'aq is generally thought to be somewhat +deficient by Earth standards; all strings are handled as if whitespace is not +significant, and string management is a bit primitive. Substrings are +understood, as are very basic forms of pattern matching, but Klingon computer +science seems to regard string-handling facilities such as regular expressions +as something of a black art, left only to those responsible for writing +compilers and that sort of thing.

+ +

2.5.1 strtie/tlheghrar

+ +str1 str2 tlheghrar str3

+ +Concatenates the top two strings on the stack into one.

+ +

2.5.2 compose/naQmoH

+ +mark str1 str2 ... strn naQmoH strn'

+ +Pops objects (executing proc objects if necessary) off the stack until a +marker (placed by qaw) is hit and combines them into one string. + +

2.5.3 streq?/tlheghrap'a'

+ +str1 str2 tlheghrap'a' bool

+ +Pops the top two strings on the stack, compares them, and returns 1 if +identical, 0 if not. + +

2.5.4 strcut/tlheghpe'

+ +str startval endval tlheghpe' substr

+ +Pops two values and a string and returns the section of the string between +character startval and character endval.

+ +

2.5.5 strmeasure/tlheghjuv

+ +str tlheghjuv val

+ +Pops a string off the stack and returns its length in characters.

+ +

2.5.6 explode/jor

+ +str jor list

+ +Separates individual "words" in a string by whitespace.

+ +

3 Mathematical Operators
+mI'jangwI'mey

+ +Klingon mathematical study is somewhat less sophisticated than Federation +standard, but it covers all the important concepts. A full set of arithmetic +and basic trigonometric operations is available.

+ +

3.1 Arithmetic Operations
+toghwI'mey

+ +Arithmetic operators usually work with real numbers unless otherwise +stated. The number operators (sec 3.3) can convert them to integers if +necessary.

+ +(note: verisimilitude would require that the Klingon understanding of +math not necessarily coincide with ours. But I think it's safe to say that +this basic set of operations is enough to at least get a Klingon +battlecruiser out of spacedock.)

+ +

3.1.1 add/boq

+ +a b boq sum

+ +Pops the top two values on the stack and replaces them with their sum.

+ +Note that the four basic operations are based around the term boq, +which literally means "to ally with". The metaphor is a bit strained but +is well-established enough that most Klingons do not think twice about it.

+ +

3.1.2 sub/boqHa'

+ +a b boqHa' difference

+ +Pops the top two values on the stack and replaces them with a - b.

+ +

3.1.3 mul/boq'egh

+ +a b boq'egh product

+ +Pops the top two values on the stack and replaces them with their +product.

+ +

3.1.4 div/boqHa''egh

+ +a b wav quotient

+ +Pops the top two values on the stack and replaces them with a/b.

+ +

3.1.5 idiv/HabboqHa''egh (lit. full-divide)

+ +a b HabboqHa''egh quotient

+ +Pops the top two values on the stack and replaces them with the results +of an integer division operation.

+ +

3.1.6 mod/chuv (lit. leftover)

+ +a b chuv remainder

+ +Pops the top two values and returns the remainder of a mod b.

+ +

3.1.7 pow/boqHa'qa' (lit. remultiply)

+ +base exp boqHa'qa' real

+ +Pops the top two values and returns base^exp.

+ +

3.1.8 sqrt/loS'ar (lit. fourth-howmuch)

+ + angle loS'ar real

+ +Returns the square root of val.

+ +

3.1.9 add1/wa'boq

+ +a wa'boq a+1

+ +Increments the top value on the stack by one.

+ +

3.1.10 sub1/wa'boqHa'

+ +a wa'boqHa' a-1

+ +Decrements the top value on the stack by one.

+ +

3.2 Trigonometric and Logarithmic Operators
+SIHpojjangwI'mey 'ej ghurjangwI'mey

+ +The standard Klingon unit of arc measure is the vatlhvI' (hundredth-part), +which is the same term used for percentage. However, Klingon +mathematicians are familiar with the concept of radians (botlhchuq, +center-distance) and all known var'aq implementations work in +radians for input and output.

+ +

3.2.1 sin/joq (lit. wave)

+ +angle joq real

+ +Returns the sine of val.

+ +

3.2.2 cos/joqHa' (lit. counter-wave)

+ +angle joqHa' cos(val)

+ +Returns the cosine of val.

+ +

3.2.3 tan/qojmI' (lit. cliffnumber)

+ +angle qojmI' tan(val)

+ +Returns the tangent of val.

+ +

3.2.4 atan/qojHa' (lit. anticliff)

+ +num den qojHa' angle

+ +Returns the arctangent of num / den.

+ +

3.2.5 ln/ghurtaH

+ +num ghurtaH real

+ +Returns the natural log of num. + +

3.2.6 log/maHghurtaH

+ +num maHghurtaH real

+ +Returns the base 10 log of num.

+ +

3.2.7 log3/wejghurtaH

+ +num wejghurtaH real

+ +Returns the base 3 log of num. (This function is actually +considered a level 1 function, and is believed to exist only for +historical purposes. Its use is very rare except among programmers whose +native language is Standard High Klingon (which historically used a base 3 +number system) and is unknown among other users.) + +

3.3 Numerical Operators and Constants

+ +This section describes operators that operate on numbers themselves, as +well as common system-level constants. (Note that some of these +functions look like verbs in English and adjectives in Klingon; the +idea is that where we might say 1.3 clip to get 1 a Klingon +would be thinking the clipped 1.3.

+ +

3.3.1 clip/poD

+ +real poD int

+ +Removes the fractional portion of a real number (equivalent to +floor(real).

+ +

3.3.2 smooth/Hab (lit. smooth)

+ +real Hab int

+ +Rounds a number to the nearest integer.

+ +

3.3.3 howmuch/'ar

+ +num 'ar num2

+ +Returns the absolute value of num.

+ +

3.3.4 setrand/mIScher

+ +num mIScher -

+ +Sets the random number generator seed value to num. Not common, +since most var'aq implementations have a rather arcane formula for +picking a pseudo-random seed value (security reasons, presumably).

+ +

3.3.5 rand/mIS

+ +num mIS real

+ +Returns a random real number in the range 0 to num. If there is no +meaningful input on the stack, + +

3.3.6 pi/HeHmI'

+ +Pushes the value pi (~3.14159...) onto the stack. The Klingon name +literally means "edge-number".

+ +

3.3.7 e/ghurmI'

+ +Pushes the value e onto the stack. The Klingon name literally means +"growth-number".

+ +

3.3.8 int?/HabmI''a'

+ +val HabmI''a' bool + +Pops the top value on the stack and returns 1 if it is an integer, 0 if +not.

+ +

3.3.9 number?/mI''a'

+ +val mI''a' bool

+ +Pops the top value off the stack and returns 1 if it's a number, 0 if it's +something else. + +

3.3.10 numberize/mI'moH

+ +str mi'moH val

+ +Pops a string off the stack, converts it into a numerical value, and returns +it.

+ +

3.4 Bitwise operators

+ +Though var'aq makes no clear distinction between integers and reals, it +is nevertheless useful to be able to manipulate a number on the bit level. The +following operators assume that their operands will always be treated as +integers; effects on floating-point values are undefined, and may be +disallowed at the implementor's discretion.

+ +Note: The var'aq bitwise operators are quite controversial as of +this writing (they are considered inappropriately low-level) and may be +removed or altered in future versions of this specification.

+ +It is to be noted that the Klingon coinages for the operation (especially +tlhoch (contradict) for xor) are unusually obscure even for Klingon +hackerspeak and probably reflect fairly profound differences in shades of +meaning.

+ +

3.4.1 isolate/mobmoH

+ +a b mobmoH result

+ +Performs a bitwise AND on a and b.

+ +

3.4.2 mix/DuD

+ +a b DuD result

+ +Performs a bitwise OR on a and b.

+ +

3.4.3 contradict/tlhoch

+ +a b tlhoch result

+ +Performs a bitwise XOR on a and b.

+ +

3.4.4 compl/Qo'moH

+ +val Qo'moH ~val

+ +Returns the one's-complement of val. Note: The literal meaning is +something like "make it say no".

+ +

3.4.5 shiftright/nIHghoS

+ +a b nIHghoS result

+ +Shifts a right b places, preserving the sign of the value.

+ +

3.4.6 shiftleft/poSghoS

+ +a b poSghoS result

+ +Shifts a left b places.

+ +

4 Relational and Logical Operators

+ +

4.1 Relational Operators and Predicate Functions
+yu'jangwI'mey

+ +The standard convention for anything that returns a boolean argument is to +end the keyword in the interrogative suffix -'a', which in general +is analogous to Lisp's well-established -p (plain old Lisp) or -? (Scheme +and Dylan) predicate conventions; the English versions of the keywords +follow the Scheme convention for consistency with the Klingon. The +tlhIngan Hubbeq Say'ghun'ogh +paq (KDF Programmer's Style Guide) requires this convention; see that +document for further information.

+ +

4.1.1 gt?/law''a'

+ +a b law''a' bool

+ +Pops a and b off the stack, compares them, and returns a boolean value of +true if a is larger.

+ +

4.1.2 lt?/puS'a'

+ +a b puS'a' bool

+ +Pops a and b off the stack, compares them, and returns a boolean value of +true if a is smaller.

+ +

4.1.3 eq?/rap'a'

+ +a b rap'a' bool

+ +Pops a and b off the stack, compares them, and returns a boolean value of +true if a is the same as b.

+ +

4.1.4 ge?/law'rap'a'

+ +a b law'rap'a' bool

+ +Pops a and b off the stack, compares them, and returns a boolean value of +true if a is greater than or equal to b.

+ +

4.1.5 le?/puSrap'a'

+ +a b puSrap'a' bool

+ +Pops a and b off the stack, compares them, and returns a boolean value of +true if a is less than or equal to b.

+ +

4.1.6 ne?/rapbe'a'

+ +a b rapbe'a' bool

+ +Pops a and b off the stack, compares them, and returns a boolean value of +true if a is not equal to b.

+ +

4.1.7 null?/pagh'a'

+ +obj pagh'a' bool

+ +Examines the top object on the stack and returns a 1 if null, a 0 if not.

+ +

4.1.8 negative?/taH'a'

+ +val taH'a' bool

+ +Pops the top number on the stack and returns a 1 if less than 0, a 0 if +not.

+ +

4.2 Logical Operators
+vItjangwI'mey

+ +Note that these are strictly logical operators, not bitwise.

+ +

4.2.1 and/je

+ +a b je bool

+ +Evaluates b and a and returns a 1 if both are true, a 0 if not.

+ +

4.2.2 or/joq

+ +a b joq bool

+ +Evaluates b and a and returns a 1 if one or both are true, a 0 if both are +false.

+ +

4.2.3 xor/ghap

+ +a b ghap bool

+ +Evaluates b and a and returns a 1 if only one is true, a 0 otherwise.

+ +

5 Input/Output and File Operators

+ +The var'aq Level 0 specification essentially handles console I/O +and files in a manner very similar to the UNIX model. + +

5.1 Console I/O

+ +The console I/O model at this point is very simple: write, read, error.

+ +

5.1.1 disp/cha'

+ +obj cha' -

+ +Pops the top object on the stack and writes it to STDOUT. Note that +certain types of objects will generate meaningless output, particularly +anonymous proc objects.

+ +

5.1.2 listen/'Ij

+ +- 'Ij str

+ +Reads one line of input and stores it as a string on top of the stack.

+ +

5.1.3 complain/bep

+ +str bep -

+ +Pops str and prints it to stderr.

+ +

6 System Variables
+patSarwI'mey

+ +This section describes var'aq keywords that do no more than put set +values on the stack. Many of them are not precisely constants but more like +environment variables.

+ +

6.1 I/O-related Constants

+ +

6.1.1 newline/chu'DonwI'

+ +Prints a carriage return.

+ +

6.1.2 tab/chu'tut

+ +Advances by one tab stop.

+ +

6.2 Environment Constants

+ +

6.2.1 whereami/nuqDaq_jIH

+ +Represents the location of the current host device. On Earth implementations, +usually returns the IP address of the machine the interpreter is running +on.

+ +

6.2.2 version/pongmI'

+ +Returns the current interpreter version number. The reference interpreter +returns a date stamp.

+ +

6.2.3 argv/taghDe'

+ +Pushes the command line arguments on the stack as a list.

diff --git a/http-server-test.sh b/http-server-test.sh new file mode 100755 index 0000000..159039f --- /dev/null +++ b/http-server-test.sh @@ -0,0 +1,4 @@ +#!/bin/sh +rm public/module.wasm +./varaq tests/add.vq public/module.wasm +http-server -cors \ No newline at end of file diff --git a/main.c b/main.c new file mode 100644 index 0000000..abe5fa0 --- /dev/null +++ b/main.c @@ -0,0 +1,37 @@ +#include "common.h" +#include "compiler.h" +#include "tokenizer.h" + +int +main (int argc, char **argv) +{ + if (argc != 3) + { + printf ("usage: varaq input.vq output.wasm"); + return EXIT_FAILURE; + } + + char *buffer; + int length = 0; + int sp = open (argv[1], O_RDONLY); + + if (sp) + { + length = lseek (sp, (size_t)0, SEEK_END); + lseek (sp, (size_t)0, SEEK_SET); + buffer = malloc (length); + if (buffer) + { + read (sp, buffer, length * sizeof (char)); + } + close (sp); + } + + Code *prog = compile (buffer); + + int tp = open (argv[2], O_CREAT | O_WRONLY, S_IRWXU | S_IRGRP | S_IROTH); + write (tp, prog->cells, prog->count); + close (tp); + + return EXIT_SUCCESS; +} diff --git a/public/favicon.ico b/public/favicon.ico new file mode 100644 index 0000000..236b619 Binary files /dev/null and b/public/favicon.ico differ diff --git a/public/index.html b/public/index.html new file mode 100644 index 0000000..6746a60 --- /dev/null +++ b/public/index.html @@ -0,0 +1,30 @@ + + + + + + + + +

+
+
+ + + +
+
+ + + diff --git a/public/style.css b/public/style.css new file mode 100644 index 0000000..592355d --- /dev/null +++ b/public/style.css @@ -0,0 +1,150 @@ +* { + box-sizing: border-box; +} + +@media (prefers-color-scheme: light) { + body { + background-color: white; + color: black; + margin-top: 0px; + margin-left: 0px; + margin-right: 0px; + } + /* Header/logo Title */ + .header { + padding: 60px; + text-align: center; + background: #ddd; + color: black; + width: 100%; + } + + /* Style the top navigation bar */ + .navbar { + display: flex; + flex-flow: row wrap; + justify-content: flex-start; + background-color: #eee; + width: 100%; + } + + /* Style the navigation bar links */ + .navbar a { + color: black; + padding: 10px; + text-decoration: none; + text-align: center; + } + + /* Change color on hover */ + .navbar a:hover { + background-color: #ddd; + color: black; + } + + .navbar a.active { + background-color: #ccc; + color: #222; + } + + /* Column container */ + .row { + display: flex; + flex-wrap: wrap; + } + + /* Main column */ + .main { + flex: 70%; + background-color: white; + padding: 20px; + } + + /* Footer */ + .footer { + padding: 20px; + text-align: center; + background: #ddd; + position: fixed; + left: 0; + bottom: 0; + width: 100%; + } +} + +@media (prefers-color-scheme: dark) { + body { + background-color: black; + color: #ddd; + margin-top: 0px; + margin-left: 0px; + margin-right: 0px; + } + /* Header/logo Title */ + .header { + padding: 60px; + text-align: center; + background: #111; + color: #ddd; + width: 100%; + } + + /* Style the top navigation bar */ + .navbar { + display: flex; + flex-flow: row wrap; + justify-content: flex-start; + background-color: #222; + width: 100%; + } + + /* Style the navigation bar links */ + .navbar a { + color: #ddd; + padding: 14px 20px; + text-decoration: none; + text-align: center; + } + + /* Change color on hover */ + .navbar a:hover { + background-color: #111; + color: #ddd; + } + + .navbar a.active { + background-color: #ccc; + color: #222; + } + + /* Column container */ + .row { + display: flex; + flex-wrap: wrap; + } + + /* Main column */ + .main { + flex: 70%; + background-color: #000; + padding: 20px; + } + + /* Footer */ + .footer { + padding: 20px; + text-align: center; + background: #111; + position: fixed; + left: 0; + bottom: 0; + width: 100%; + } +} + +@media screen and (max-width: 700px) { + .row, + .navbar { + flex-direction: column; + } +} diff --git a/tests/add.vq b/tests/add.vq new file mode 100644 index 0000000..620d2ad --- /dev/null +++ b/tests/add.vq @@ -0,0 +1 @@ +1 2 add disp \ No newline at end of file diff --git a/tests/test.vq b/tests/test.vq new file mode 100644 index 0000000..aa2102d --- /dev/null +++ b/tests/test.vq @@ -0,0 +1,23 @@ +(* this is a comment *) + +// "test/setting.vq" + +~ fish { + 0 ~ num set + time + + 5 e sub pi dup mul le? + exch + disp + disp + + "Enter name:" disp + "hello" " " strtie + listen strtie + disp + + num 1 add ~ num set + num disp +} name + +fish diff --git a/tokenizer.c b/tokenizer.c new file mode 100644 index 0000000..69ffb12 --- /dev/null +++ b/tokenizer.c @@ -0,0 +1,747 @@ +#include "tokenizer.h" +#include "common.h" + +/* + Adapted from Section 6.6 of The C Programming Language + by Brian Kernighan and Dennis Ritchie +*/ +typedef struct Map Map; +struct Map +{ + struct Map *next; + char *keyword; + TokenType token; +}; + +#define HASHSIZE 150 +static Map *hashtab[HASHSIZE]; + +unsigned int +hash (char *s) +{ + unsigned int hashval; + for (hashval = 0; *s != '\0'; s++) + hashval = *s + 31 * hashval; + return hashval % HASHSIZE; +} + +Map * +lookup (char *s) +{ + Map *np; + for (np = hashtab[hash (s)]; np != NULL; np = np->next) + if (strcmp (s, np->keyword) == 0) + return np; + return NULL; +} + +TokenType +get (char *s) +{ + Map *np; + for (np = hashtab[hash (s)]; np != NULL; np = np->next) + if (strcmp (s, np->keyword) == 0) + return np->token; + return TOKEN_IDENTIFIER; +} + +Map * +put (char *keyword, TokenType token) +{ + Map *np; + unsigned int hashval; + if ((np = lookup (keyword)) == NULL) + { + np = (Map *)malloc (sizeof (*np)); + if (np == NULL || (np->keyword = strdup (keyword)) == NULL) + return NULL; + hashval = hash (keyword); + np->next = hashtab[hashval]; + hashtab[hashval] = np; + } + np->token = token; + return np; +} + +void +initMap () +{ + put ("and", TOKEN_AND); + put ("atan", TOKEN_ATAN); + put ("add", TOKEN_ADD); + put ("bep", TOKEN_COMPLAIN); + put ("complain", TOKEN_COMPLAIN); + put ("compl", TOKEN_COMPL); + put ("compose", TOKEN_COMPOSE); + put ("contradict", TOKEN_CONTRADICT); + put ("cons", TOKEN_CONS); + put ("cos", TOKEN_COS); + put ("chImmoH", TOKEN_CLEAR); + put ("chIm'a'", TOKEN_EMPTY); + put ("cher", TOKEN_SET); + put ("boq", TOKEN_ADD); + put ("choose", TOKEN_CHOOSE); + put ("chov", TOKEN_EVAL); + put ("chuv", TOKEN_MOD); + put ("cha'", TOKEN_DISP); + put ("clear", TOKEN_CLEAR); + put ("dup", TOKEN_DUP); + put ("dump", TOKEN_DUMP); + put ("disp", TOKEN_DISP); + put ("div", TOKEN_DIV); + put ("DuD", TOKEN_MIX); + put ("e", TOKEN_E); + put ("exch", TOKEN_EXCH); + put ("eval", TOKEN_EVAL); + put ("escape", TOKEN_ESCAPE); + put ("empty?", TOKEN_EMPTY); + put ("explode", TOKEN_EXPLODE); + put ("eq?", TOKEN_EQ); + put ("forget", TOKEN_FORGET); + put ("gt?", TOKEN_GT); + put ("ge?", TOKEN_GE); + put ("ghap", TOKEN_XOR); + put ("ghurmI'", TOKEN_E); + put ("ghurtaH", TOKEN_LN); + put ("ghorqu'", TOKEN_SHATTER); + put ("ghobe'chugh", TOKEN_IFNO); + put ("HIja'chugh", TOKEN_IFYES); + put ("Hotlh", TOKEN_DUMP); + put ("HeHmI'", TOKEN_PI); + put ("Habwav", TOKEN_IDIV); + put ("HabmI''a'", TOKEN_INT); + put ("idiv", TOKEN_IDIV); + put ("int?", TOKEN_INT); + put ("isolate", TOKEN_ISOLATE); + put ("ifyes", TOKEN_IFYES); + put ("ifno", TOKEN_IFNO); + put ("je", TOKEN_AND); + put ("jor", TOKEN_EXPLODE); + put ("joq", TOKEN_OR); + put ("ln", TOKEN_LN); + put ("lt?", TOKEN_LT); + put ("le?", TOKEN_LE); + put ("listen", TOKEN_LISTEN); + put ("loS'ar", TOKEN_SQRT); + put ("log", TOKEN_LOG); + put ("log3", TOKEN_LOG3); + put ("latlh", TOKEN_DUP); + put ("law'moH", TOKEN_MUL); + put ("law'qa'moH", TOKEN_POW); + put ("law''a'", TOKEN_GT); + put ("law'rap'a'", TOKEN_GE); + put ("maHghurtaH", TOKEN_LOG); + put ("mix", TOKEN_MIX); + put ("mi'moH", TOKEN_NUMBERIZE); + put ("muv", TOKEN_CONS); + put ("mul", TOKEN_MUL); + put ("mod", TOKEN_MOD); + put ("mobmoH", TOKEN_ISOLATE); + put ("mIScher", TOKEN_SETRAND); + put ("mIS", TOKEN_RAND); + put ("mI''a'", TOKEN_FLOAT); + put ("nIHghoS", TOKEN_SHIFTRIGHT); + put ("ne?", TOKEN_NE); + put ("negative?", TOKEN_NEGATIVE); + put ("name", TOKEN_NAME); + put ("nargh", TOKEN_ESCAPE); + put ("naQmoH", TOKEN_COMPOSE); + put ("number?", TOKEN_ISNUMBER); + put ("numberize", TOKEN_NUMBERIZE); + put ("null?", TOKEN_NULL); + put ("or", TOKEN_OR); + put ("pi", TOKEN_PI); + put ("pagh'a'", TOKEN_NULL); + put ("pop", TOKEN_POP); + put ("pong", TOKEN_NAME); + put ("pow", TOKEN_POW); + put ("poSghoS", TOKEN_SHIFTLEFT); + put ("puS'a'", TOKEN_LT); + put ("puSrap'a'", TOKEN_LE); + put ("qaw", TOKEN_REMEMBER); + put ("qawHa'", TOKEN_FORGET); + put ("qojmI'", TOKEN_TAN); + put ("qojHa'", TOKEN_ATAN); + put ("Qo'moH", TOKEN_COMPL); + put ("remember", TOKEN_REMEMBER); + put ("repeat", TOKEN_REPEAT); + put ("rand", TOKEN_RAND); + put ("rap'a'", TOKEN_EQ); + put ("rapbe'a'", TOKEN_NE); + put ("set", TOKEN_SET); + put ("split", TOKEN_SPLIT); + put ("shatter", TOKEN_SHATTER); + put ("strcut", TOKEN_STRCUT); + put ("streq?", TOKEN_STREQ); + put ("strmeasure", TOKEN_STRMEASURE); + put ("strtie", TOKEN_STRTIE); + put ("tlheghrar", TOKEN_STRTIE); + put ("sub", TOKEN_SUB); + put ("sub1", TOKEN_SUB1); + put ("sqrt", TOKEN_SQRT); + put ("sin", TOKEN_SIN); + put ("clip", TOKEN_CLIP); + put ("poD", TOKEN_CLIP); + put ("smooth", TOKEN_SMOOTH); + put ("Hab", TOKEN_SMOOTH); + put ("howmuch", TOKEN_HOWMUCH); + put ("'ar", TOKEN_HOWMUCH); + put ("setrand", TOKEN_SETRAND); + put ("shift right", TOKEN_SHIFTRIGHT); + put ("shift left", TOKEN_SHIFTLEFT); + put ("SIj", TOKEN_SPLIT); + put ("boqHa'", TOKEN_SUB); + put ("tam", TOKEN_EXCH); + put ("tan", TOKEN_TAN); + put ("taH'a'", TOKEN_NEGATIVE); + put ("tlhoch", TOKEN_CONTRADICT); + put ("tlheghpe'", TOKEN_STRCUT); + put ("tlheghjuv", TOKEN_STRMEASURE); + put ("tlheghrap'a'", TOKEN_STREQ); + put ("vangqa'", TOKEN_REPEAT); + put ("wIv", TOKEN_CHOOSE); + put ("woD", TOKEN_POP); + put ("wav", TOKEN_DIV); + put ("wa'teq", TOKEN_SUB1); + put ("wa'chel", TOKEN_ADD1); + put ("wejghurtaH", TOKEN_LOG3); + put ("xor", TOKEN_XOR); + put ("\'Ij", TOKEN_LISTEN); + put ("time", TOKEN_TIME); + put ("poH", TOKEN_TIME); + // Wrong word in original spec, old one meant "waving hands or flapping" + // Also fixes the conflicting joq issue meaning sin or 'or' + put ("yu'eghHa'", TOKEN_COS); + put ("yu'egh", TOKEN_SIN); + // This one has a special case too as it is the same as the '~' operator + put ("lI'moH", TOKEN_TILDE); + put ("woDHa'", TOKEN_GARBAGE_COLLECT); + put ("gc", TOKEN_GARBAGE_COLLECT); +} + +typedef struct Tokenizer Tokenizer; +struct Tokenizer +{ + char *start; + char *current; + int32_t line; +}; + +Tokenizer tokenizer; + +void +initTokenizer (char *src) +{ + tokenizer.start = src; + tokenizer.current = src; + tokenizer.line = 1; +} + +static bool +isAlpha (char c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' + || c == '\'' || c == '?'; +} + +static bool +isDigit (char c) +{ + return (c >= '0' && c <= '9') || c == '-'; +} + +static bool +isAtEnd () +{ + return *tokenizer.current == '\0'; +} + +static Token +makeToken (TokenType type) +{ + Token token; + token.type = type; + token.start = tokenizer.start; + token.length = (int32_t)(tokenizer.current - tokenizer.start); + token.line = tokenizer.line; + return token; +} + +static Token +errorToken (char *msg) +{ + Token token; + token.type = TOKEN_ERROR; + token.start = msg; + token.length = (int32_t)strlen (msg); + token.line = tokenizer.line; + return token; +} + +static char +advance () +{ + tokenizer.current++; + return tokenizer.current[-1]; +} + +static char +peek () +{ + return *tokenizer.current; +} + +static char +peekNext () +{ + if (isAtEnd ()) + return '\0'; + return tokenizer.current[1]; +} + +static bool +match (char expected) +{ + if (isAtEnd ()) + return false; + if (*tokenizer.current != expected) + return false; + tokenizer.current++; + return true; +} + +static void +skipWhitespace () +{ + for (;;) + { + char c = peek (); + switch (c) + { + case ' ': + case '\r': + case '\t': + advance (); + break; + case '\n': + tokenizer.line++; + advance (); + break; + case '/': + if (peekNext () == '/') + { + // Ignore the preprocessor import until end of the line. + while (peek () != '\n' && !isAtEnd ()) + advance (); + } + else + { + return; + } + break; + case '(': + if (peekNext () == '*') + { + advance (); // consume ( + advance (); // consume * + while (!isAtEnd () && peek () != '*' && peekNext () != ')') + advance (); // Consume contents + advance (); // consume * + advance (); // consume ) + } + break; + default: + return; + } + } +} + +static TokenType +checkKeyword (int start, int length, char *rest, TokenType type) +{ + if (tokenizer.current - tokenizer.start == start + length + && memcmp (tokenizer.start + start, rest, length) == 0) + { + return type; + } + + return TOKEN_IDENTIFIER; +} + +static TokenType +identifierType () +{ + char *check; + int32_t size = tokenizer.current - tokenizer.start; + check = (char *)malloc (sizeof (size)); + strncpy (check, tokenizer.start, size); + check[size] = '\0'; + TokenType t = get (check); + free (check); + return t; +} + +static Token +identifier () +{ + while (isAlpha (peek ()) || isDigit (peek ())) + advance (); + return makeToken (identifierType ()); +} + +static Token +number () +{ + bool is_float = false; + while (isDigit (peek ())) + advance (); + + // Look for a fractional part. + if (peek () == '.' && isDigit (peekNext ())) + { + is_float = true; + // Consume the ".". + advance (); + + while (isDigit (peek ())) + advance (); + } + + return makeToken (is_float ? TOKEN_FLOAT + : TOKEN_INT); // or measure if ends in postscript +} + +static Token +string () +{ + while (peek () != '"' && !isAtEnd ()) + { + if (peek () == '\n') + tokenizer.line++; + advance (); + } + + if (isAtEnd ()) + return errorToken ("Unterminated string."); + + // The closing quote. + advance (); + return makeToken (TOKEN_STRING); +} + +Token +nextToken () +{ + skipWhitespace (); + tokenizer.start = tokenizer.current; + if (isAtEnd ()) + return makeToken (TOKEN_EOF); + + char c = advance (); + if (isAlpha (c)) + return identifier (); + if (isDigit (c)) + return number (); + switch (c) + { + case '(': + return makeToken (TOKEN_LEFT_PAREN); + case ')': + return makeToken (TOKEN_RIGHT_PAREN); + case '{': + return makeToken (TOKEN_LEFT_BRACE); + case '}': + return makeToken (TOKEN_RIGHT_BRACE); + case '-': + return makeToken (TOKEN_NEGATIVE); + case '~': + return makeToken (TOKEN_TILDE); + case '/': + return makeToken (TOKEN_SLASH); + case '"': + return string (); + } + + return errorToken ("Unexpected character."); +} + +void +debug_printToken (Token t) +{ + switch (t.type) + { + case TOKEN_LEFT_PAREN: + printf ("TOKEN_LEFT_PAREN line_no=%d\n", t.line); + break; + case TOKEN_RIGHT_PAREN: + printf ("TOKEN_RIGHT_PAREN line_no=%d\n", t.line); + break; + case TOKEN_LEFT_BRACE: + printf ("TOKEN_LEFT_BRACE line_no=%d\n", t.line); + break; + case TOKEN_RIGHT_BRACE: + printf ("TOKEN_RIGHT_BRACE line_no=%d\n", t.line); + break; + case TOKEN_TILDE: + printf ("TOKEN_TILDE line_no=%d\n", t.line); + break; + case TOKEN_SLASH: + printf ("TOKEN_SLASH line_no=%d\n", t.line); + break; + case TOKEN_MINUS: + printf ("TOKEN_MINUS line_no=%d\n", t.line); + break; + case TOKEN_IDENTIFIER: + printf ("TOKEN_IDENTIFIER line_no=%d\n", t.line); + break; + case TOKEN_STRING: + printf ("TOKEN_STRING line_no=%d\n", t.line); + break; + case TOKEN_FLOAT: + printf ("TOKEN_FLOAT line_no=%d\n", t.line); + break; + case TOKEN_LIST: + printf ("TOKEN_LIST line_no=%d\n", t.line); + break; + case TOKEN_ERROR: + printf ("TOKEN_ERROR line_no=%d\n", t.line); + break; + case TOKEN_FALSE: + printf ("TOKEN_FALSE line_no=%d\n", t.line); + break; + case TOKEN_TRUE: + printf ("TOKEN_TRUE line_no=%d\n", t.line); + break; + case TOKEN_PI: + printf ("TOKEN_PI line_no=%d\n", t.line); + break; + case TOKEN_E: + printf ("TOKEN_E line_no=%d\n", t.line); + break; + case TOKEN_EOF: + printf ("TOKEN_EOF line_no=%d\n", t.line); + break; + case TOKEN_POP: + printf ("TOKEN_POP line_no=%d\n", t.line); + break; + case TOKEN_DUP: + printf ("TOKEN_DUP line_no=%d\n", t.line); + break; + case TOKEN_EXCH: + printf ("TOKEN_EXCH line_no=%d\n", t.line); + break; + case TOKEN_CLEAR: + printf ("TOKEN_CLEAR line_no=%d\n", t.line); + break; + case TOKEN_REMEMBER: + printf ("TOKEN_REMEMBER line_no=%d\n", t.line); + break; + case TOKEN_FORGET: + printf ("TOKEN_FORGET line_no=%d\n", t.line); + break; + case TOKEN_DUMP: + printf ("TOKEN_DUMP line_no=%d\n", t.line); + break; + case TOKEN_NAME: + printf ("TOKEN_NAME line_no=%d\n", t.line); + break; + case TOKEN_SET: + printf ("TOKEN_SET line_no=%d\n", t.line); + break; + case TOKEN_IFYES: + printf ("TOKEN_IFYES line_no=%d\n", t.line); + break; + case TOKEN_IFNO: + printf ("TOKEN_IFNO line_no=%d\n", t.line); + break; + case TOKEN_CHOOSE: + printf ("TOKEN_CHOOSE line_no=%d\n", t.line); + break; + case TOKEN_EVAL: + printf ("TOKEN_EVAL line_no=%d\n", t.line); + break; + case TOKEN_ESCAPE: + printf ("TOKEN_ESCAPE line_no=%d\n", t.line); + break; + case TOKEN_REPEAT: + printf ("TOKEN_REPEAT line_no=%d\n", t.line); + break; + case TOKEN_SPLIT: + printf ("TOKEN_SPLIT line_no=%d\n", t.line); + break; + case TOKEN_CONS: + printf ("TOKEN_CONS line_no=%d\n", t.line); + break; + case TOKEN_SHATTER: + printf ("TOKEN_SHATTER line_no=%d\n", t.line); + break; + case TOKEN_EMPTY: + printf ("TOKEN_EMPTY line_no=%d\n", t.line); + break; + case TOKEN_COMPOSE: + printf ("TOKEN_COMPOSE line_no=%d\n", t.line); + break; + case TOKEN_STREQ: + printf ("TOKEN_STREQ line_no=%d\n", t.line); + break; + case TOKEN_STRCUT: + printf ("TOKEN_STRCUT line_no=%d\n", t.line); + break; + case TOKEN_STRMEASURE: + printf ("TOKEN_STRMEASURE line_no=%d\n", t.line); + break; + case TOKEN_STRTIE: + printf ("TOKEN_STRTIE line_no=%d\n", t.line); + break; + case TOKEN_EXPLODE: + printf ("TOKEN_EXPLODE line_no=%d\n", t.line); + break; + case TOKEN_ADD: + printf ("TOKEN_ADD line_no=%d\n", t.line); + break; + case TOKEN_SUB: + printf ("TOKEN_SUB line_no=%d\n", t.line); + break; + case TOKEN_MUL: + printf ("TOKEN_MUL line_no=%d\n", t.line); + break; + case TOKEN_DIV: + printf ("TOKEN_DIV line_no=%d\n", t.line); + break; + case TOKEN_IDIV: + printf ("TOKEN_IDIV line_no=%d\n", t.line); + break; + case TOKEN_MOD: + printf ("TOKEN_MOD line_no=%d\n", t.line); + break; + case TOKEN_POW: + printf ("TOKEN_POW line_no=%d\n", t.line); + break; + case TOKEN_SQRT: + printf ("TOKEN_SQRT line_no=%d\n", t.line); + break; + case TOKEN_ADD1: + printf ("TOKEN_ADD1 line_no=%d\n", t.line); + break; + case TOKEN_SUB1: + printf ("TOKEN_SUB1 line_no=%d\n", t.line); + break; + case TOKEN_SIN: + printf ("TOKEN_SIN line_no=%d\n", t.line); + break; + case TOKEN_COS: + printf ("TOKEN_COS line_no=%d\n", t.line); + break; + case TOKEN_TAN: + printf ("TOKEN_TAN line_no=%d\n", t.line); + break; + case TOKEN_ATAN: + printf ("TOKEN_ATAN line_no=%d\n", t.line); + break; + case TOKEN_LN: + printf ("TOKEN_LN line_no=%d\n", t.line); + break; + case TOKEN_LOG: + printf ("TOKEN_LOG line_no=%d\n", t.line); + break; + case TOKEN_LOG3: + printf ("TOKEN_LOG3 line_no=%d\n", t.line); + break; + case TOKEN_CLIP: + printf ("TOKEN_CLIP line_no=%d\n", t.line); + break; + case TOKEN_SMOOTH: + printf ("TOKEN_SMOOTH line_no=%d\n", t.line); + break; + case TOKEN_HOWMUCH: + printf ("TOKEN_HOWMUCH line_no=%d\n", t.line); + break; + case TOKEN_SETRAND: + printf ("TOKEN_SETRAND line_no=%d\n", t.line); + break; + case TOKEN_RAND: + printf ("TOKEN_RAND line_no=%d\n", t.line); + break; + case TOKEN_INT: + printf ("TOKEN_INT line_no=%d\n", t.line); + break; + case TOKEN_NUMBERIZE: + printf ("TOKEN_NUMBERIZE line_no=%d\n", t.line); + break; + case TOKEN_ISOLATE: + printf ("TOKEN_ISOLATE line_no=%d\n", t.line); + break; + case TOKEN_MIX: + printf ("TOKEN_MIX line_no=%d\n", t.line); + break; + case TOKEN_CONTRADICT: + printf ("TOKEN_CONTRADICT line_no=%d\n", t.line); + break; + case TOKEN_COMPL: + printf ("TOKEN_COMPL line_no=%d\n", t.line); + break; + case TOKEN_SHIFTRIGHT: + printf ("TOKEN_SHIFTRIGHT line_no=%d\n", t.line); + break; + case TOKEN_SHIFTLEFT: + printf ("TOKEN_SHIFTLEFT line_no=%d\n", t.line); + break; + case TOKEN_GT: + printf ("TOKEN_GT line_no=%d\n", t.line); + break; + case TOKEN_LT: + printf ("TOKEN_LT line_no=%d\n", t.line); + break; + case TOKEN_EQ: + printf ("TOKEN_EQ line_no=%d\n", t.line); + break; + case TOKEN_GE: + printf ("TOKEN_GE line_no=%d\n", t.line); + break; + case TOKEN_LE: + printf ("TOKEN_LE line_no=%d\n", t.line); + break; + case TOKEN_NE: + printf ("TOKEN_NE line_no=%d\n", t.line); + break; + case TOKEN_NULL: + printf ("TOKEN_NULL line_no=%d\n", t.line); + break; + case TOKEN_NEGATIVE: + printf ("TOKEN_NEGATIVE line_no=%d\n", t.line); + break; + case TOKEN_ISNULL: + printf ("TOKEN_ISNULL line_no=%d\n", t.line); + break; + case TOKEN_ISINT: + printf ("TOKEN_ISINT line_no=%d\n", t.line); + break; + case TOKEN_ISNUMBER: + printf ("TOKEN_ISNUMBER line_no=%d\n", t.line); + break; + case TOKEN_AND: + printf ("TOKEN_AND line_no=%d\n", t.line); + break; + case TOKEN_OR: + printf ("TOKEN_OR line_no=%d\n", t.line); + break; + case TOKEN_XOR: + printf ("TOKEN_XOR line_no=%d\n", t.line); + break; + case TOKEN_DISP: + printf ("TOKEN_DISP line_no=%d\n", t.line); + break; + case TOKEN_LISTEN: + printf ("TOKEN_LISTEN line_no=%d\n", t.line); + break; + case TOKEN_COMPLAIN: + printf ("TOKEN_COMPLAIN line_no=%d\n", t.line); + break; + case TOKEN_TIME: + printf ("TOKEN_TIME line_no=%d\n", t.line); + break; + case TOKEN_GARBAGE_COLLECT: + printf ("TOKEN_GARBAGE_COLLECT line_no=%d\n", t.line); + break; + } +} \ No newline at end of file diff --git a/tokenizer.h b/tokenizer.h new file mode 100644 index 0000000..0d634c2 --- /dev/null +++ b/tokenizer.h @@ -0,0 +1,126 @@ +#ifndef TOKENIZER_H +#define TOKENIZER_H + +#include "common.h" + +typedef enum TokenType +{ + // Single char tokens + TOKEN_LEFT_PAREN, + TOKEN_RIGHT_PAREN, + TOKEN_LEFT_BRACE, + TOKEN_RIGHT_BRACE, + TOKEN_TILDE, + TOKEN_SLASH, + TOKEN_MINUS, + // Literals + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_FLOAT, + TOKEN_LIST, + TOKEN_ERROR, + TOKEN_FALSE, + TOKEN_TRUE, + TOKEN_PI, + TOKEN_E, + TOKEN_EOF, + // Keywords + TOKEN_POP, + TOKEN_DUP, + TOKEN_EXCH, + TOKEN_CLEAR, + TOKEN_REMEMBER, + TOKEN_FORGET, + TOKEN_DUMP, + TOKEN_NAME, + TOKEN_SET, + TOKEN_IFYES, + TOKEN_IFNO, + TOKEN_CHOOSE, + TOKEN_EVAL, + TOKEN_ESCAPE, + TOKEN_REPEAT, + TOKEN_SPLIT, + TOKEN_CONS, + TOKEN_SHATTER, + TOKEN_EMPTY, + TOKEN_COMPOSE, + TOKEN_STREQ, + TOKEN_STRCUT, + TOKEN_STRMEASURE, + TOKEN_STRTIE, + TOKEN_EXPLODE, + TOKEN_ADD, + TOKEN_SUB, + TOKEN_MUL, + TOKEN_DIV, + TOKEN_IDIV, + TOKEN_MOD, + TOKEN_POW, + TOKEN_SQRT, + TOKEN_ADD1, + TOKEN_SUB1, + TOKEN_SIN, + TOKEN_COS, + TOKEN_TAN, + TOKEN_ATAN, + TOKEN_LN, + TOKEN_LOG, + TOKEN_LOG3, + TOKEN_CLIP, + TOKEN_SMOOTH, + TOKEN_HOWMUCH, + TOKEN_SETRAND, + TOKEN_RAND, + TOKEN_INT, + TOKEN_NUMBERIZE, + TOKEN_ISOLATE, + TOKEN_MIX, + TOKEN_CONTRADICT, + TOKEN_COMPL, + TOKEN_SHIFTRIGHT, + TOKEN_SHIFTLEFT, + TOKEN_GT, + TOKEN_LT, + TOKEN_EQ, + TOKEN_GE, + TOKEN_LE, + TOKEN_NE, + TOKEN_NULL, + TOKEN_NEGATIVE, + TOKEN_ISNULL, + TOKEN_ISINT, + TOKEN_ISNUMBER, + TOKEN_AND, + TOKEN_OR, + TOKEN_XOR, + TOKEN_DISP, + TOKEN_LISTEN, + TOKEN_COMPLAIN, + TOKEN_TIME, + TOKEN_GARBAGE_COLLECT, +} TokenType; + +typedef struct Token Token; +struct Token +{ + TokenType type; + char *start; + int32_t length; + int32_t line; +}; + +typedef struct SourceCode SourceCode; +struct SourceCode +{ + size_t size; + char *buf; + int count; +}; + +void initTokenizer (char *src); +Token nextToken (); +void initMap (); +void debug_printToken (Token t); + +#endif