diff options
| -rw-r--r-- | .gitignore | 2 | ||||
| -rw-r--r-- | Makefile | 38 | ||||
| -rw-r--r-- | arena.h | 149 | ||||
| -rw-r--r-- | doc/demo.txt | 94 | ||||
| -rw-r--r-- | doc/test.txt | 119 | ||||
| -rw-r--r-- | impl.c | 6 | ||||
| -rw-r--r-- | lex.c | 184 | ||||
| -rw-r--r-- | lex.h | 81 | ||||
| -rw-r--r-- | main.c | 161 | ||||
| -rw-r--r-- | str.h | 152 | ||||
| -rw-r--r-- | strio.h | 234 | ||||
| -rw-r--r-- | test.lang | 3 | ||||
| -rw-r--r-- | utf8.h | 64 |
13 files changed, 1287 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d7756c2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +a.out +*.o diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8693d09 --- /dev/null +++ b/Makefile @@ -0,0 +1,38 @@ +EXE = a.out +RUNARGS = test.lang + +CFLAGS = -std=c23 -Wall -Wextra -Wpedantic ${CFLAGS_${DEBUG}} +LDFLAGS = ${LDFLAGS_${DEBUG}} +LDLIBS = + +OBJ != find -type f -name '*.c' | sed 's/\.c$$/.o/' + +DEBUG = 0 +GDB != which gf2 2> /dev/null || which gdb + +CFLAGS_1 = -g3 -fsanitize=undefined +LDFLAGS_1 = -g3 -fsanitize=undefined +LDFLAGS_0 = -Os -s + +PREFIX ?= ${HOME}/.local +BINDIR = ${PREFIX}/bin + +.PHONY: run all clean install uninstall + +all: ${EXE} +run: ${EXE} + ./${EXE} ${RUNARGS} +debug: ${EXE} + ${GDB} -ex start --args ./${EXE} ${RUNARGS} + +clean: + rm -fv ${EXE} ${OBJ} + +${EXE}: ${OBJ} + ${CC} ${LDFLAGS} ${OBJ} -o ${EXE} ${LDLIBS} + +install: ${EXE} + cp ${EXE} ${BINDIR}/${EXE} + +uninstall: + rm ${BINDIR}/${EXE} @@ -0,0 +1,149 @@ +#ifndef ARENA_H +#define ARENA_H + +#include <stdint.h> +#include <stddef.h> +#include <string.h> + +typedef struct ArenaPg { + struct ArenaPg *prev, *next; + char *beg, *end; + char data[]; +} ArenaPg; + +typedef struct { + ArenaPg *pg; + char *beg; +} ArenaMark; + +typedef struct { + ArenaPg *cur, *tail; +} Arena; + +#define new(a, t)\ + (t*)arena_zeroed(arena_alloc(a, sizeof(t), _Alignof(t)), sizeof(t)) + +#define new_arr(a, t, n)\ + arena_alloc(a, sizeof(t) * n, _Alignof(t)) + +#define resize(a, p, old, new)\ + arena_realloc(a, p, (old) * sizeof(*(p)), (new) * sizeof(*(p)),\ + _Alignof(__typeof__(*(p)))) + +void arena_free(Arena *a); + +void arena_save(Arena *a, ArenaMark *m); +void arena_load(Arena *a, ArenaMark *m); + +void arena_reset(Arena *a); +void arena_reserve(Arena *a, ptrdiff_t n); + +void *arena_alloc(Arena *a, ptrdiff_t n, ptrdiff_t align); +void *arena_realloc(Arena *a, void *ptr, ptrdiff_t old, ptrdiff_t new, ptrdiff_t align); +void *arena_zeroed(void *p, size_t n); + +#define ARENA_BACKEND_MALLOC 0 +#define ARENA_BACKEND_MMAP 1 + +#ifndef ARENA_BACKEND +#if defined(__linux__) +# define ARENA_BACKEND ARENA_BACKEND_MMAP +#else +# define ARENA_BACKEND ARENA_BACKEND_MALLOC +#endif +#endif + +#ifdef ARENA_IMPL + +#include <stdio.h> +#include <stdlib.h> +static void arena_pg_alloc_fail(void) { + fprintf(stderr, "failed to allocate arena page\n"); + abort(); +} + +#if ARENA_BACKEND == ARENA_BACKEND_MMAP +#include <sys/mman.h> +#include <unistd.h> +#define ARENA_PG_SIZE sysconf(_SC_PAGESIZE) +static inline void *arena_pg_alloc(ptrdiff_t n) { + void *p = mmap(NULL, n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + return p == MAP_FAILED ? NULL : p; +} +static inline void arena_pg_free(void *ptr, ptrdiff_t n) { munmap(ptr, n); } +#elif ARENA_BACKEND == ARENA_BACKEND_MALLOC +#define ARENA_PG_SIZE 8192 +static inline void *arena_pg_alloc(ptrdiff_t n) { return malloc(n); } +static inline void arena_pg_free(void *ptr, ptrdiff_t n) { free(ptr); (void)n; } +#endif + +void arena_free(Arena *a) { + while (a->tail) { + a->cur = a->tail->prev; + arena_pg_free(a->tail, (uintptr_t)(a->tail->end - (char*)a->tail)); + a->tail = a->cur; + } +} + +void arena_reserve(Arena *a, ptrdiff_t n) { + while (a->cur && a->cur->beg + n >= a->cur->end) a->cur = a->cur->next; + if (a->cur) return; + ptrdiff_t cap = n + sizeof(ArenaPg); + cap += (uintptr_t)-cap & (ARENA_PG_SIZE - 1); + ArenaPg *p = arena_pg_alloc(cap); + if (!p) arena_pg_alloc_fail(); + p->next = NULL; + p->prev = a->tail; + p->beg = p->data; + p->end = (char*)p + cap; + if (a->tail) a->tail->next = p; + a->cur = (a->tail = p); +} + +void *arena_alloc(Arena *a, ptrdiff_t n, ptrdiff_t align) { + arena_reserve(a, n + (align - 1)); + char *ptr = a->cur->beg + (-(uintptr_t)a->cur->beg & (align - 1)); + a->cur->beg = ptr + n; + return ptr; +} + +void *arena_realloc(Arena *a, void *ptr, ptrdiff_t old, ptrdiff_t new, ptrdiff_t align) { + if (a->cur && ptr == a->cur->beg - old && (char*)ptr + new < a->cur->end) { + a->cur->beg += new - old; + return ptr; + } else { + void *p = arena_alloc(a, new, align); + if (ptr) memcpy(p, ptr, old); + return p; + } +} + +void *arena_zeroed(void *p, size_t n) { + memset(p, 0, n); + return p; +} + +void arena_reset(Arena *a) { + if (!a->cur) return; + while (a->cur->prev) { + a->cur->beg = a->cur->data; + a->cur = a->cur->prev; + } + a->cur->beg = a->cur->data; +} + +void arena_save(Arena *a, ArenaMark *m) { + m->pg = a->cur; + if (a->cur) m->beg = a->cur->beg; +} + +void arena_load(Arena *a, ArenaMark *m) { + while (a->cur && a->cur != m->pg) { + a->cur->beg = a->cur->data; + a->cur = a->cur->prev; + } + if (a->cur) a->cur->beg = m->beg; +} + +#endif +#endif diff --git a/doc/demo.txt b/doc/demo.txt new file mode 100644 index 0000000..58c0839 --- /dev/null +++ b/doc/demo.txt @@ -0,0 +1,94 @@ +type vec2 = [2]f32 + +type Sprite = enum u8 { + BLANK, + PLAYER +} + +type Player = struct { + pos, vel vec2 +} + +extern { + blit proc(spr Sprite, x, y i32), + exp func(v f32) f32 +} + +proc frame(dt f32) { + /* unary .. can splat an array as parameters */ + blit(.PLAYER, ..plr.pos) + plr.pos += plr.vel * dt + plr.vel *= exp(-dt * 5) +} + +/* statements + + type Ident = T Alias Ident to type T + extern Ident T Define an external variable (or function!) of the given type + extern { a A, b B... } Block of extern declarations, formatted like a struct + +*/ + +/* expressions + + N Integer + N.N | NeN | N.NeN Float + [-+~] A Unary operator on term A + A [+-/*%&|^] B Binary operator on term A and expression B + A := B Type inference assignment + A, B, C := D + T { ... } Literal of type T; mostly only makes sense for structs, + but this can be used to do something like u8 { 2 } too. + +/* types + + IDENT type of the given identifier + ^T pointer to type T + enum { FOO, BAR... } enum + enum T { FOO, BAR... } enum with backing type T, which must be an integer type + struct { a A, b B... } struct + A..B range (kinda an anonymous enum) + A and B must be ordinal types (enum value, integer, or character) + [N]T array of T of length N + [E]T array of T indexed by enum or range E + []T slice + (Ta, Tb...) tuple of given types; can be destructured to multiple assignment + set[E] a set of enum or range E + proc(a A, b B...) function pointer + func(a A, b B...) T function pointer with return value T + +*/ + +Expr = LValue | Literal +LValue = Ident | LValue '.' Ident +Literal = Number +Number = Integer | Float +Integer = /[0-9]+/ +Float = Integer 'e' Integer | Integer '.' Integer | Integer '.' Integer 'e' Integer + + IDENT Variable + LVALUE . IDENT Member access + [0-9]+ Integer + [0-9]+ . [0-9]+ Float + [0-9]+ . [0-9]+ e [0-9]+ Float + F(E1, E2...) function call with the given expression arguments + +*/ + +/* import first tries to scan local directory for the package, then standard + * system directory; in said directory it will look for an IR cache file, + * or compile a new one if it's not present (or if its last-modified time + * is older than the compiler executable), which is imported with a name + * equivalent to the last part of the path. + * + * codegen is only performed on procedures that get called from main and their + * own dependencies, recursively. this allows to keep output binaries small. + * maybe supply some sort of export qualifier to do something similar for + * procedures that aren't main. + */ + +import "io" + +proc main { + io.print("Hello, world!") +} diff --git a/doc/test.txt b/doc/test.txt new file mode 100644 index 0000000..da8f501 --- /dev/null +++ b/doc/test.txt @@ -0,0 +1,119 @@ +type string = []char + +func split(src: string, by: char, a: ^Arena): [dyn]string do + let r: [dyn]string = {} + let s: string = "" + for c in src do + if c == by then + r.push(s, a) + s = "" + else + s.push(c, a) + end + end + return r +end + +proc main do + let a = Arena() + let lines = split(stdin.readAll(@a), '\n', @a) + for i in 0..len(lines) do + print(lines[i]) + end +end + +--- + +fn next_line(src, line: ^[]u8) -> bool { + if len(src) < 1 { + return false + } + let idx = find(src, '\n') + if idx <> -1 { + line^ := src^[0..idx] + } else { + line^ := src^ + } + src^ = src^[len(line)..] + return true +} + +fn main { + let a = Arena() + let buf = readall(stdin, &a) + let line: []u8 + while next_line(&buf, &line) { + print(line) + } +} + +--- + +/* builtin slice type */ + +fn print(str []u8) { + for i in str { + putchar(str[i]) + } + putchar('\n') +} + +/* iterators are just functions that return an initial state object and a + * step function that acts on it to produce a value */ + +type LineIterFn = fn(it ^[]u8) ([]u8, bool) + +fn lines(src []u8) ([]u8, LineIterFn) { + fn step(it ^[]u8) []u8, bool { + if len(it^) < 1 { return {}, false } + idx := find(it^, '\n') + line := if idx == -1 { it^ } else { it^[0..idx] } + it^ = it^[len(line)..] + return line, true + } + return src, step +} + +/* if multiple values used in if/while condition, only the last one is checked */ + +fn main { + a := Arena {} + buf := readall(stdin, &a) + for line in lines(buf) { + print(line) + } +} + +/* generics? */ + +fn reverse[T any](buf []T) ([]T, fn(it ^[]T) (T, bool)) { + fn step(it ^[]T) (T, bool) { + if len(^it) < 1 { return {}, false } + n := len(it^) - 1 + v := it^[n] + it^ := it^[0..n] + return v, true + } + return buf, step +} + +fn foo { + s := "Hello, world!" + // print(reverse(s)) <- wouldn't actually work, because iterators can't be cast back to slices + a := Arena {} + print(collect(reverse(s), &a)) +} + +struct MapIter[In any, Out any] { + ctx: rawptr, + iter: fn(it rawptr) In, + map: fn(x ^In) Out +} + +fn map[In any, Out any](p rawptr, f func(it rawptr) In, f func(x ^In) Out) (MapIter[In, Out], fn(it ^MapIter[In, Out]) (Out, bool)) { + fn step(it ^MapIter[In, Out]) (Out, bool) { + v, ok := it.iter(it.ctx) + if !ok { return {}, false } + return it.map(&v), true + } +} @@ -0,0 +1,6 @@ +#define STR_IMPL +#define STRIO_IMPL +#define ARENA_IMPL +#include "str.h" +#include "strio.h" +#include "arena.h" @@ -0,0 +1,184 @@ +#include <stdlib.h> +#include <stdio.h> + +#include "lex.h" +#include "arena.h" +#include "strio.h" + +void lex_start(Lexer *l, const char *path) { + l->filename = str_dup(str_from_cstr(path), &l->arena); + FILE *f = fopen(path, "r/o"); + if (!f) { + fprintf(stderr, "Couldn't open file %s\n", path); + exit(1); + } + if (read_all(f, &l->buf, &l->arena)) { + fprintf(stderr, "Couldn't read file %s\n", path); + fclose(f); + exit(1); + } + lex_next(l); +} + +void lex_free(Lexer *l) { + arena_free(&l->arena); +} + +Str lex_mask_str(Lexer *l, TokMask t) { + Str s = S(""); + for (Token i = 0; i < TOK_MAX; i++) { + if (t & TMASK(i)) { + if (s.n > 0) str_cat(&s, S(" or "), &l->arena); + str_cat(&s, str_from_cstr(lex_tok_str[i]), &l->arena); + } + } + return s; +} + +void lex_expect(Lexer *l, TokMask t) { + lex_next(l); + lex_expected(l, t); +} + +void lex_expect_not(Lexer *l, TokMask t) { + lex_next(l); + lex_expected_not(l, t); +} + +void lex_expected(Lexer *l, TokMask t) { + if (!(TMASK(l->tok) & t)) { + lex_error(l, LE_ERROR, str_fmt(&l->arena, "Expected %S but got %s", lex_mask_str(l, t), lex_tok_str[l->tok])); + } +} + +void lex_expected_not(Lexer *l, TokMask t) { + if (TMASK(l->tok) & t) { + lex_error(l, LE_ERROR, str_fmt(&l->arena, "Unexpected %s", lex_tok_str[l->tok])); + } +} + +void lex_pos(Lexer *l, int *line, int *col) { + int ln = 0, c = 0; + for (int i = 0; i < l->ofs; i++) { + if (l->buf.s[i] == '\n') { + ln++; + c = 0; + } else { + c++; + if (l->buf.s[i] == '\t') { + c += (unsigned)-c & 7; + } + } + } + *line = ln; + *col = c; +} + +void lex_error(Lexer *l, LexErr e, Str msg) { + int line, col; + l->ofs -= l->ident.n; + lex_pos(l, &line, &col); + + fprintf(stderr, "%s", e == LE_ERROR ? "\x1b[1;31m" : "\x1b[1;33m"); + + fprintf(stderr, "%.*s:%d:%d: %.*s\n\n", + (int)l->filename.n, l->filename.s, line + 1, col + 1, (int)msg.n, msg.s); + + { + int ofs = l->ofs; + int line_start = ofs; + while (line_start > 0 && l->buf.s[line_start - 1] != '\n') line_start--; + int line_end = line_start; + while (line_end < l->buf.n && l->buf.s[line_end] != '\n') line_end++; + fprintf(stderr, "%.*s\n", line_end - line_start, &l->buf.s[line_start]); + for (int i = 0; i < col; i++) putchar(' '); + for (int i = ofs; i < ofs + l->ident.n && i < line_end; i++) putchar('^'); + putchar('\n'); + } + + fprintf(stderr, "\x1b[0m\n"); + + if (e == LE_ERROR) { + exit(1); + } +} + +static inline int is_digit(int c) { + return c >= '0' && c <= '9'; +} + +static inline int is_ident_first_char(int c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; +} + +static inline int is_ident_next_char(int c) { + return is_ident_first_char(c) || is_digit(c); +} + +Token ident_to_keyword(Str ident) { + /* evil & stupid hack to avoid keeping a separate table of keywords */ + for (Token t = 0; t < TOK_MAX; t++) { + if (str_eql(str_from_cstr(lex_tok_str[t]), ident)) { + return t; + } + } + return TOK_IDENT; +} + +#define T(t) (l->tok = t) +void lex_next(Lexer *l) { + int i = l->ofs; + while (i < l->buf.n && is_space(l->buf.s[i])) { + i++; + } + int start_ofs = i; + l->ident = (Str) { &l->buf.s[start_ofs], 0 }; + if (i >= l->buf.n) { + l->tok = TOK_EOF; + return; + } + char c = l->buf.s[i++]; + l->tok = TOK_MAX; + if (is_ident_first_char(c)) { + T(TOK_IDENT); + while (i < l->buf.n && is_ident_next_char(l->buf.s[i])) i++; + } else if (is_digit(c)) { + T(TOK_LIT_NUM); + while (i < l->buf.n && (is_digit(l->buf.s[i]) || l->buf.s[i] == '.' || l->buf.s[i] == 'e')) i++; + } else { + switch (c) { +#define X(a,b) case b: T(a); break; + LEX_TOK_CHAR_LIST +#undef X + case '\'': + T(TOK_LIT_CHAR); + if (i < l->buf.n && l->buf.s[i] == '\\') i += 2; + else i++; + if (i >= l->buf.n) lex_error(l, LE_ERROR, S("Unterminated character literal")); + if (l->buf.s[i] != '\'') lex_error(l, LE_ERROR, S("Overlong character literal")); + i++; + break; + case '"': + T(TOK_LIT_STR); + for (;;) { + if (i >= l->buf.n) { + lex_error(l, LE_ERROR, S("Unterminated string literal")); + } + if (l->buf.s[i] == '\\') { + i += 2; + continue; + } + if (l->buf.s[i++] == '"') break; + } + break; + } + } + if (l->tok == TOK_MAX) { + lex_error(l, LE_ERROR, S("Invalid token")); + } + l->ident.n = i - start_ofs; + l->ofs = i; + if (l->tok == TOK_IDENT) { + l->tok = ident_to_keyword(l->ident); + } +} @@ -0,0 +1,81 @@ +#ifndef LEX_H +#define LEX_H + +#include "str.h" + +#define LEX_TOK_LIST\ + X(TOK_EOF, "end-of-file")\ + X(TOK_IDENT, "identifier")\ + X(TOK_PROC, "proc")\ + X(TOK_LET, "let")\ + X(TOK_VAR, "var")\ + X(TOK_CONST, "const")\ + X(TOK_RETURN, "return")\ + X(TOK_LBRACE, "{")\ + X(TOK_RBRACE, "}")\ + X(TOK_LPAREN, "(")\ + X(TOK_RPAREN, ")")\ + X(TOK_PLUS, "+")\ + X(TOK_MINUS, "-")\ + X(TOK_ASTERISK, "*")\ + X(TOK_SLASH, "/")\ + X(TOK_EQUALS, "=")\ + X(TOK_COLON, ":")\ + X(TOK_COMMA, ",")\ + X(TOK_LIT_STR, "string literal")\ + X(TOK_LIT_CHAR, "character literal")\ + X(TOK_LIT_NUM, "numeric literal") + +#define LEX_TOK_CHAR_LIST\ + X(TOK_LBRACE, '{')\ + X(TOK_RBRACE, '}')\ + X(TOK_LPAREN, '(')\ + X(TOK_RPAREN, ')')\ + X(TOK_PLUS, '+')\ + X(TOK_MINUS, '-')\ + X(TOK_ASTERISK, '*')\ + X(TOK_SLASH, '/')\ + X(TOK_EQUALS, '=')\ + X(TOK_COLON, ':')\ + X(TOK_COMMA, ',') + +typedef enum { +#define X(n, s) n, + LEX_TOK_LIST +#undef X + TOK_MAX, +} Token; + +static const char *lex_tok_str[TOK_MAX] = { +#define X(n, s) s, + LEX_TOK_LIST +#undef X +}; + +#define TMASK(t) (1L << t) +typedef unsigned long TokMask; + +typedef struct { + Token tok; + Str ident; + Str filename, buf; + Arena arena; + int ofs; +} Lexer; + +typedef enum { + LE_WARN, + LE_ERROR +} LexErr; + +void lex_start(Lexer *l, const char *path); +void lex_free(Lexer *l); +void lex_expected(Lexer *l, TokMask t); +void lex_expected_not(Lexer *l, TokMask t); +void lex_expect(Lexer *l, TokMask t); /* next -> expected */ +void lex_expect_not(Lexer *l, TokMask t); /* next -> expected_not */ +void lex_error(Lexer *l, LexErr e, Str msg); +void lex_pos(Lexer *l, int *line, int *col); +void lex_next(Lexer *l); + +#endif @@ -0,0 +1,161 @@ +#include <stdio.h> +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "lex.h" + +/* node graph */ + +typedef enum { + N_START, + N_STOP, + N_LIT_INT, + N_OP_ADD, + N_OP_SUB, + N_OP_MUL, + N_OP_DIV, + N_OP_NEG, +} NodeType; + +typedef struct Node { + NodeType type; + struct Node **inputs, **outputs; +} Node; + +typedef struct { + Node *start, *stop; +} ProcGraph; + +/* parsing */ + +void parse_expr(Lexer *l); + +void parse_stmt_let(Lexer *l) { + lex_expect(l, TMASK(TOK_IDENT)); + Str name = l->ident; + lex_expect(l, TMASK(TOK_EQUALS) | TMASK(TOK_COLON)); + lex_next(l); + parse_expr(l); + printf("name_bind %.*s\n", (int)name.n, name.s); +} + +void parse_stmt(Lexer *l) { + /* TODO */ + (void)l; + switch (l->tok) { + case TOK_LET: + parse_stmt_let(l); + break; + case TOK_RETURN: + lex_next(l); + parse_expr(l); + puts("return"); + break; + default: + lex_expected(l, TMASK(TOK_RBRACE)); + } +} + +void parse_proc(Lexer *l) { + lex_expect(l, TMASK(TOK_IDENT)); + lex_expect(l, TMASK(TOK_LBRACE)); + lex_next(l); + while (l->tok != TOK_RBRACE) { + lex_expected_not(l, TMASK(TOK_EOF)); + parse_stmt(l); + } + lex_expected(l, TMASK(TOK_RBRACE)); + lex_next(l); +} + +void parse_term(Lexer *l) { + int negate_after = l->tok == TOK_MINUS; + if (TMASK(l->tok) & (TMASK(TOK_MINUS) | TMASK(TOK_PLUS))) { + lex_next(l); + } + if (l->tok == TOK_LPAREN) { + lex_next(l); + parse_expr(l); + lex_expected(l, TMASK(TOK_RPAREN)); + lex_next(l); + } else { + lex_expected(l, TMASK(TOK_LIT_NUM) | TMASK(TOK_LIT_STR) | TMASK(TOK_LIT_CHAR) | TMASK(TOK_IDENT)); + switch (l->tok) { + case TOK_IDENT: + printf("var %.*s\n", (int)l->ident.n, l->ident.s); + break; + case TOK_LIT_STR: + printf("lit_str %.*s\n", (int)l->ident.n, l->ident.s); + break; + case TOK_LIT_CHAR: + printf("lit_char %.*s\n", (int)l->ident.n, l->ident.s); + break; + case TOK_LIT_NUM: + printf("lit_num %.*s\n", (int)l->ident.n, l->ident.s); + break; + default: + break; + } + lex_next(l); + } + if (negate_after) puts("negate"); +} + +void parse_expr(Lexer *l) { + parse_term(l); + if (l->tok == TOK_LPAREN) { + lex_next(l); + puts("args_start"); + for (;;) { + parse_expr(l); + lex_expected(l, TMASK(TOK_COMMA) | TMASK(TOK_RPAREN)); + if (l->tok == TOK_RPAREN) break; + lex_next(l); + } + lex_next(l); + puts("args_end"); + puts("func_call"); + } + if (TMASK(l->tok) & (TMASK(TOK_PLUS) | TMASK(TOK_MINUS) | TMASK(TOK_ASTERISK) | TMASK(TOK_SLASH))) { + Token t = l->tok; + lex_next(l); + parse_expr(l); + switch (t) { + case TOK_PLUS: puts("add"); break; + case TOK_MINUS: puts("subtract"); break; + case TOK_ASTERISK: puts("multiply"); break; + case TOK_SLASH: puts("divide"); break; + default: break; + } + } +} + +void parse_decl(Lexer *l) { + switch (l->tok) { + case TOK_PROC: + parse_proc(l); + break; + default: + lex_error(l, LE_ERROR, S("Invalid declaration statement")); + } +} + +void parse_unit(Lexer *l) { + while (l->tok != TOK_EOF) { + parse_decl(l); + } +} + +int main(int argc, const char **argv) { + if (argc != 2) { + fprintf(stderr, "Usage: %s FILE\n", argv[0]); + return 1; + } + Lexer l = { 0 }; + lex_start(&l, argv[1]); + parse_unit(&l); + lex_free(&l); + return 0; +} @@ -0,0 +1,152 @@ +#ifndef STR_H +#define STR_H + +#include <string.h> +#include <stddef.h> + +#include "arena.h" + +typedef struct { + char *s; + ptrdiff_t n; +} Str; + +typedef struct { + Str head, tail; +} Cut; + +#define S(s) (Str){s,sizeof(s)-1} + +char *str_to_cstr(Str s, Arena *a); +Str str_from_cstr(const char *s); +int str_eql(Str a, Str b); +int str_starts(Str a, Str b); +int str_ends(Str a, Str b); +void str_catc(Str *a, char b, Arena *m); +Str str_skip(Str a, ptrdiff_t n); +int is_space(char c); +Str str_trim_left(Str a); +Str str_trim_right(Str a); +Str str_trim(Str a); +Cut str_cut(Str s, char c); +Str str_findc(Str s, char c); +Str str_find(Str haystack, Str needle); +int str_contains(Str a, Str b); +Str str_dup(Str a, Arena *m); +void str_cat(Str *a, Str b, Arena *m); +Str str_replace_end(Str s, Str a, Str b, Arena *m); + +#ifdef STR_IMPL + +/* conversions */ + +char *str_to_cstr(Str s, Arena *a) { + char *r = new_arr(a, char, s.n + 1); + memcpy(r, s.s, s.n); + r[s.n] = 0; + return r; +} + +Str str_from_cstr(const char *s) { + return (Str) { (char*)s, strlen(s) }; +} + +/* pure functions */ + +int str_eql(Str a, Str b) { + return a.n == b.n && !memcmp(a.s, b.s, b.n); +} + +int str_starts(Str a, Str b) { + return a.n >= b.n && !memcmp(a.s, b.s, b.n); +} + +int str_ends(Str a, Str b) { + return a.n >= b.n && !memcmp(&a.s[a.n - b.n], b.s, b.n); +} + +void str_catc(Str *a, char b, Arena *m) { + a->s = resize(m, a->s, a->n, a->n + 1); + a->s[a->n++] = b; +} + +Str str_skip(Str a, ptrdiff_t n) { + return (Str) { a.s + n, a.n - n }; +} + +int is_space(char c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; +} + +Str str_trim_left(Str a) { + while (a.n > 0 && is_space(a.s[0])) a.s++, a.n--; + return a; +} + +Str str_trim_right(Str a) { + while (a.n > 0 && is_space(a.s[a.n - 1])) a.n--; + return a; +} + +Str str_trim(Str a) { + return str_trim_left(str_trim_right(a)); +} + +/* splitting, searching */ + +Cut str_cut(Str s, char c) { + char *p = memchr(s.s, c, s.n); + if (!p) { + return (Cut) { s, { &s.s[s.n], 0 } }; + } else { + return (Cut) { + { s.s, p - s.s }, + { p + 1, &s.s[s.n] - (p + 1) } + }; + } +} + +Str str_findc(Str s, char c) { + char *p = memchr(s.s, c, s.n); + return p ? (Str) { p, s.n - (p - s.s) } : (Str) { &s.s[s.n], 0 }; +} + +Str str_find(Str haystack, Str needle) { + if (needle.n < 1) return haystack; + while (haystack.n > 0) { + haystack = str_findc(haystack, needle.s[0]); + if (str_starts(haystack, needle)) break; + if (haystack.n > 0) haystack = str_skip(haystack, 1); + } + return haystack; +} + +int str_contains(Str a, Str b) { + return str_find(a, b).n > 0; +} + +/* allocating */ + +Str str_dup(Str a, Arena *m) { + char *s = new_arr(m, char, a.n); + memcpy(s, a.s, a.n); + a.s = s; + return a; +} + +void str_cat(Str *a, Str b, Arena *m) { + a->s = resize(m, a->s, a->n, a->n + b.n); + memcpy(&a->s[a->n], b.s, b.n); + a->n += b.n; +} + +Str str_replace_end(Str s, Str a, Str b, Arena *m) { + if (!str_ends(s, a)) return s; + char *p = new_arr(m, char, s.n + b.n - a.n); + memcpy(p, s.s, s.n - a.n); + memcpy(p + s.n - a.n, b.s, b.n); + return (Str) { p, s.n + b.n - a.n }; +} + +#endif +#endif @@ -0,0 +1,234 @@ +#ifndef STRIO_H +#define STRIO_H + +#include <stdarg.h> + +#include "str.h" +#include "arena.h" + +int read_all(FILE *f, Str *buf, Arena *a); +int next_line(Str *src, Str *line); +void str_putf(Str s, FILE *f); +void str_put(Str s); + +int str_to_u64(Str s, uint64_t *out); +int str_to_i64(Str s, int64_t *out); + +void str_cat_i64(Str *out, int64_t c, char pad_char, int min_width, Arena *a); +void str_cat_u64(Str *out, uint64_t c, char pad_char, int min_width, Arena *a); +void str_cat_fmtv(Str *out, Arena *arena, const char *fmt, va_list ap); +void str_cat_fmt(Str *out, Arena *arena, const char *fmt, ...); + +Str str_fmtv(Arena *arena, const char *fmt, va_list ap); +Str str_fmt(Arena *arena, const char *fmt, ...); +const char *cstr_fmt(Arena *arena, const char *fmt, ...); + +#ifdef STRIO_IMPL + +#include <stdio.h> +#include <stdint.h> + +static inline long read_all_file_size(FILE *f) { + fseek(f, 0, SEEK_END); + long t = ftell(f); + fseek(f, 0, SEEK_SET); + return t > 0 ? t : -1; +} + +int read_all(FILE *f, Str *buf, Arena *a) { + if (!f) return -1; + long sz = read_all_file_size(f); + if (sz < 1) { + ptrdiff_t cap = 4096; + buf->s = new_arr(a, char, cap); + buf->n = 0; + while (!feof(f)) { + size_t n = fread(&buf->s[buf->n], 1, cap - buf->n, f); + if (n < 1) break; + buf->n += n; + if (buf->n >= cap) { + size_t c = cap; + while (buf->n >= cap) cap <<= 1; + buf->s = resize(a, buf->s, c, cap); + } + } + } else { + buf->n = sz; + buf->s = new_arr(a, char, sz); + size_t sz = fread(buf->s, 1, buf->n, f); + if (sz < (size_t)buf->n) return -1; + } + return ferror(f) ? -1 : 0; +} + +int next_line(Str *src, Str *line) { + if (src->n < 1) return 0; + line->s = src->s; + char *newln = memchr(src->s, '\n', src->n); + line->n = newln ? newln - src->s : src->n; + src->s += line->n + 1; + src->n -= line->n + 1; + if (line->n > 0 && line->s[line->n-1] == '\r') line->n--; + return 1; +} + +void str_putf(Str s, FILE *f) { + fwrite(s.s, 1, s.n, f); +} + +void str_put(Str s) { + str_putf(s, stdout); +} + +/* formatted conversion */ + +int str_to_u64(Str s, uint64_t *out) { + if (s.n < 1) return -1; + uint64_t acc = 0; + for (int i = 0; i < s.n; i++) { + char c = s.s[i]; + if (!(c >= '0' && c <= '9')) return -1; + acc = (acc * 10) + (c - '0'); + } + *out = acc; + return 0; +} + +int str_to_i64(Str s, int64_t *out) { + int64_t sign = 1; + if (s.n > 0 && s.s[0] == '-') { + s = str_skip(s, 1); + sign = -1; + } + uint64_t u64; + if (str_to_u64(s, &u64)) return -1; + *out = u64 * sign; + return 0; +} + +static void str_cat_u64_(char buf[32], int *n, uint64_t c) { + int i = 0; + buf[31] = '\0'; + do { + buf[32 - ++i] = (c % 10) + '0'; + c /= 10; + } while (c); + *n = i; +} + +void str_cat_u64(Str *out, uint64_t c, char pad_char, int min_width, Arena *a) { + int n; + /* more than enough for the largest 64-bit number + * log_10(1 << 64) ~= 19.3 digits max */ + char buf[32]; + str_cat_u64_(buf, &n, c); + while (n < min_width && ++n < 32) buf[32-n] = pad_char; + str_cat(out, (Str) { &buf[sizeof(buf) - n], n }, a); +} + +void str_cat_i64(Str *out, int64_t c, char pad_char, int min_width, Arena *a) { + /* more than enough for the largest 64-bit number + * log_10(1 << 64) ~= 19.3 digits max */ + int n, neg = 0; + char buf[32]; + if (c < 0) neg = 1, c = -c; + str_cat_u64_(buf, &n, c); + if (neg) buf[sizeof(buf) - ++n] = '-'; + while (n < min_width && ++n < 32) buf[32-n] = pad_char; + str_cat(out, (Str) { &buf[sizeof(buf) - n], n }, a); +} + +/* IMPORTANT: this is not and will not be printf() compatible + * + * %s - c string + * %S - Str + * %i - int32 + * %I - int64 + * %u - uint32 + * %U - uin64 + * + **/ +void str_cat_fmtv(Str *out, Arena *arena, const char *fmt, va_list ap) { + size_t n = strlen(fmt); + for (size_t i = 0; i < n; i++) { + const char *mch = memchr(&fmt[i], '%', n - i); + if (!mch) { + str_cat(out, (Str) { (char*)&fmt[i], n - i }, arena); + break; + } + size_t skip = mch - &fmt[i]; + if (mch != &fmt[i]) { + str_cat(out, (Str) { (char*)&fmt[i], skip }, arena); + i += skip; + } + if (i + 1 < n) { + int zero_pad = 0, min_width = 0; + i++; + if (fmt[i] == '0') { + zero_pad = 1; + i++; + } + while (i < n && fmt[i] >= '0' && fmt[i] <= '9') { + min_width = min_width * 10 + (fmt[i] - '0'); + i++; + } + if (i >= n) break; + switch (fmt[i]) { + case 's': + str_cat(out, str_from_cstr(va_arg(ap, const char *)), arena); + break; + case 'S': + str_cat(out, va_arg(ap, Str), arena); + break; + case 'i': + str_cat_i64(out, va_arg(ap, int32_t), zero_pad?'0':' ', min_width, arena); + break; + case 'I': + str_cat_i64(out, va_arg(ap, int64_t), zero_pad?'0':' ', min_width, arena); + break; + case 'u': + str_cat_u64(out, va_arg(ap, uint32_t), zero_pad?'0':' ', min_width, arena); + break; + case 'U': + str_cat_u64(out, va_arg(ap, uint64_t), zero_pad?'0':' ', min_width, arena); + break; + default: + str_catc(out, fmt[i], arena); + break; + } + } + } +} + +void str_cat_fmt(Str *out, Arena *arena, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + str_cat_fmtv(out, arena, fmt, ap); + va_end(ap); +} + +Str str_fmtv(Arena *arena, const char *fmt, va_list ap) { + Str s = { 0 }; + str_cat_fmtv(&s, arena, fmt, ap); + return s; +} + +Str str_fmt(Arena *arena, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + Str r = str_fmtv(arena, fmt, ap); + va_end(ap); + return r; +} + +const char *cstr_fmt(Arena *arena, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + Str r = str_fmtv(arena, fmt, ap); + str_catc(&r, '\0', arena); + va_end(ap); + return r.s; +} + +#endif +#endif diff --git a/test.lang b/test.lang new file mode 100644 index 0000000..7559516 --- /dev/null +++ b/test.lang @@ -0,0 +1,3 @@ +proc main { + return 1 +} @@ -0,0 +1,64 @@ +#ifndef UTF8_H +#define UTF8_H + +#include "str.h" + +#define UTF8_INVALID (unsigned)-1 + +int utf8_len(unsigned cp); +unsigned utf8_next(Str *s); +void utf8_to_buf(unsigned cp, char *buf, int n); + +#ifdef UTF8_IMPL + +#include <stdbit.h> +#include <stdint.h> + +unsigned utf8_next(Str *s) { + if (s->n < 1) return 0; + static const uint8_t niblen[16] = { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,3,4 }; + static const uint8_t cpmask[4] = { 0x7f, 0x3f, 0x1f, 0xf }; + int len = niblen[(uint8_t)*s->s >> 4]; + if (!len) { s->n--; return *s->s++; } + if (s->n < len || (s->s[0] & (0x80 >> len))) return UTF8_INVALID; + unsigned cp = (unsigned)*s->s & cpmask[len]; + for (int i = 1; i < len; i++) { + if ((s->s[i] & 0xc0) != 0x80) return UTF8_INVALID; + cp = (cp << 6) | (s->s[i] & 0x3f); + } + s->s += len, s->n -= len; + return cp; +} + +unsigned utf8_next_unchecked(Str *s) { + if (s->n < 1) return 0; + static const uint8_t niblen[16] = { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,3,4 }; + static const uint8_t cpmask[4] = { 0x7f, 0x3f, 0x1f, 0xf }; + int len = niblen[(uint8_t)*s->s >> 4]; + if (!len) { s->n--; return *s->s++; } + unsigned cp = (unsigned)*s->s & cpmask[len]; + for (int i = 1; i < len; i++) cp = (cp << 6) | (s->s[i] & 0x3f); + s->s += len, s->n -= len; + return cp; +} + +int utf8_len(unsigned cp) { + static const uint8_t tbl[33] = { + 6,6,6,6,6,6, 5,5,5,5,5, 4,4,4,4,4, + 3,3,3,3,3, 2,2,2,2, 1,1,1,1,1,1,1,1, + }; + return tbl[stdc_leading_zeros(cp)]; +} + +void utf8_to_buf(unsigned cp, char *buf, int n) { + if (n == 1) { + *buf = cp; + return; + } + static const uint8_t tbl[5] = { 0b11000000, 0b11100000, 0b11110000, 0b11111000, 0b11111100 }; + for (int i = n; --i;) buf[i] = 0x80 | (cp & 0x3f), cp >>= 6; + buf[0] = tbl[n - 2] | cp; +} + +#endif +#endif |
