#ifndef REGEX_H #define REGEX_H #include #include "arena.h" #include "dynarr.h" #include "str.h" typedef enum : u8 { RE_CHAR, RE_CHAR_NOT, RE_CHAR_ANY, RE_CHAR_SET, RE_CHAR_RANGE, RE_CHAR_RANGE_NOT, RE_CHAR_SET_PACKED, RE_CHAR_SET_PACKED_NOT, RE_LINE_START, RE_LINE_END, RE_MATCH, RE_FAIL, RE_JUMP, RE_SPLIT, RE_GROUP_START, RE_GROUP_END, RE_LABEL /* only used during codegen */ } ReOpType; typedef union { struct { ReOpType op; union { uint32_t c; struct { u16 a, b; }; }; }; u64 align; } ReOp; typedef struct { uint32_t min, max; } ReChRange; typedef struct { /* sorted list of non-overlapping character ranges */ ReChRange *v; uint32_t n; int invert; } ReChSet; /* for RE_CHAR */ typedef enum { C_ANY = 0x40000000, C_LINE_START, C_LINE_END, } ReChSpecial; typedef struct { DYNARR(ReOp) op; DYNARR(ReChSet) cset; uint32_t groups; u8 first_byte; /* 0 if unknown */ } RegEx; typedef struct { uint32_t start, len; } ReSpan; typedef struct { ReSpan extent; ReSpan *grp; } ReMatch; typedef DYNARR(ReMatch) ReMatchList; typedef struct { u32 i; ReSpan *grp; } ReThread; typedef struct { ReThread *v; uintmax_t *set; size_t n; } ReThreadList; typedef enum { RE_SEARCH_FIRST_CHUNK = 1, RE_SEARCH_LAST_CHUNK = 2, RE_SEARCH_MID_MATCH = 4, RE_SEARCH_WAS_NEWLINE = 8, } ReSearchFlags; typedef struct { Arena *a; RegEx *re; ReSpan *grp; const char *buf; size_t buf_len, buf_idx; size_t total_idx; size_t match_start, match_end; ReThreadList tcur, tnext; ReSearchFlags flags; uint32_t c, c_prev; } ReSearch; typedef enum { RE_COMP_NO_GROUPS = 1 } ReCompFlags; typedef enum { RE_COMP_ENONE, RE_COMP_ENORPAREN, RE_COMP_ENOLPAREN, RE_COMP_EEOF, } ReCompErr; int re_comp(RegEx *re, Str src, Arena *perm, Arena *scratch); int re_comp_ex(RegEx *re, Str src, Arena *perm, Arena *scratch, ReCompFlags flags); const char *re_comp_strerror(ReCompErr err); void re_search_start(ReSearch *s, RegEx *re, Arena *a); void re_search_chunk(ReSearch *s, const char *buf, size_t n); void re_search_first_chunk(ReSearch *s); void re_search_last_chunk(ReSearch *s); int re_search_match(ReSearch *s, ReMatch *m); int re_search_match_at_start(ReSearch *s, ReMatch *m); ReMatchList re_match_all(RegEx *re, Str s, Arena *a); int re_match_full(RegEx *re, Str s, Arena *a); int re_match(RegEx *re, Str s, ReMatch *out, Arena *a); #endif