#include #include #include "lex.h" #include "arena.h" #include "strio.h" void lex_start(Lexer *l, const char *path) { l->filename = str_dup(str_from_cstr(path), &l->arena); FILE *f = fopen(path, "r/o"); if (!f) { fprintf(stderr, "Couldn't open file %s\n", path); exit(1); } if (read_all(f, &l->buf, &l->arena)) { fprintf(stderr, "Couldn't read file %s\n", path); fclose(f); exit(1); } lex_next(l); } void lex_free(Lexer *l) { arena_free(&l->arena); } Str lex_mask_str(Lexer *l, TokMask t) { Str s = S(""); for (Token i = 0; i < TOK_MAX; i++) { if (t & TMASK(i)) { if (s.n > 0) str_cat(&s, S(" or "), &l->arena); str_cat(&s, str_from_cstr(lex_tok_str[i]), &l->arena); } } return s; } void lex_expect(Lexer *l, TokMask t) { lex_next(l); lex_expected(l, t); } void lex_expect_not(Lexer *l, TokMask t) { lex_next(l); lex_expected_not(l, t); } void lex_expected(Lexer *l, TokMask t) { if (!(TMASK(l->tok) & t)) { lex_error(l, LE_ERROR, str_fmt(&l->arena, "Expected %S but got %s", lex_mask_str(l, t), lex_tok_str[l->tok])); } } void lex_expected_not(Lexer *l, TokMask t) { if (TMASK(l->tok) & t) { lex_error(l, LE_ERROR, str_fmt(&l->arena, "Unexpected %s", lex_tok_str[l->tok])); } } void lex_pos(Lexer *l, int *line, int *col) { int ln = 0, c = 0; for (int i = 0; i < l->ofs; i++) { if (l->buf.s[i] == '\n') { ln++; c = 0; } else { c++; if (l->buf.s[i] == '\t') { c += (unsigned)-c & 7; } } } *line = ln; *col = c; } void lex_error(Lexer *l, LexErr e, Str msg) { int line, col; l->ofs -= l->ident.n; lex_pos(l, &line, &col); fprintf(stderr, "%s", e == LE_ERROR ? "\x1b[1;31m" : "\x1b[1;33m"); fprintf(stderr, "%.*s:%d:%d: %.*s\n\n", (int)l->filename.n, l->filename.s, line + 1, col + 1, (int)msg.n, msg.s); { int ofs = l->ofs; int line_start = ofs; while (line_start > 0 && l->buf.s[line_start - 1] != '\n') line_start--; int line_end = line_start; while (line_end < l->buf.n && l->buf.s[line_end] != '\n') line_end++; fprintf(stderr, "%.*s\n", line_end - line_start, &l->buf.s[line_start]); for (int i = 0; i < col; i++) putchar(' '); for (int i = ofs; i < ofs + l->ident.n && i < line_end; i++) putchar('^'); putchar('\n'); } fprintf(stderr, "\x1b[0m\n"); if (e == LE_ERROR) { exit(1); } } static inline int is_digit(int c) { return c >= '0' && c <= '9'; } static inline int is_ident_first_char(int c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; } static inline int is_ident_next_char(int c) { return is_ident_first_char(c) || is_digit(c); } Token ident_to_keyword(Str ident) { /* evil & stupid hack to avoid keeping a separate table of keywords */ for (Token t = 0; t < TOK_MAX; t++) { if (str_eql(str_from_cstr(lex_tok_str[t]), ident)) { return t; } } return TOK_IDENT; } #define T(t) (l->tok = t) void lex_next(Lexer *l) { int i = l->ofs; while (i < l->buf.n && is_space(l->buf.s[i])) { i++; } int start_ofs = i; l->ident = (Str) { &l->buf.s[start_ofs], 0 }; if (i >= l->buf.n) { l->tok = TOK_EOF; return; } char c = l->buf.s[i++]; l->tok = TOK_MAX; if (is_ident_first_char(c)) { T(TOK_IDENT); while (i < l->buf.n && is_ident_next_char(l->buf.s[i])) i++; } else if (is_digit(c)) { T(TOK_LIT_NUM); while (i < l->buf.n && (is_digit(l->buf.s[i]) || l->buf.s[i] == '.' || l->buf.s[i] == 'e')) i++; } else { switch (c) { #define X(a,b) case b: T(a); break; LEX_TOK_CHAR_LIST #undef X case '\'': T(TOK_LIT_CHAR); if (i < l->buf.n && l->buf.s[i] == '\\') i += 2; else i++; if (i >= l->buf.n) lex_error(l, LE_ERROR, S("Unterminated character literal")); if (l->buf.s[i] != '\'') lex_error(l, LE_ERROR, S("Overlong character literal")); i++; break; case '"': T(TOK_LIT_STR); for (;;) { if (i >= l->buf.n) { lex_error(l, LE_ERROR, S("Unterminated string literal")); } if (l->buf.s[i] == '\\') { i += 2; continue; } if (l->buf.s[i++] == '"') break; } break; } } if (l->tok == TOK_MAX) { lex_error(l, LE_ERROR, S("Invalid token")); } l->ident.n = i - start_ofs; l->ofs = i; if (l->tok == TOK_IDENT) { l->tok = ident_to_keyword(l->ident); } }