diff options
Diffstat (limited to 'regex.h')
| -rw-r--r-- | regex.h | 126 |
1 files changed, 126 insertions, 0 deletions
@@ -0,0 +1,126 @@ +#ifndef REGEX_H +#define REGEX_H + +#include <stdint.h> + +#include "arena.h" +#include "dynarr.h" +#include "str.h" + +typedef enum : u8 { + RE_CHAR, + RE_CHAR_NOT, + RE_CHAR_ANY, + RE_CHAR_SET, + RE_CHAR_RANGE, + RE_CHAR_RANGE_NOT, + RE_CHAR_SET_PACKED, + RE_CHAR_SET_PACKED_NOT, + RE_LINE_START, + RE_LINE_END, + RE_MATCH, + RE_FAIL, + RE_JUMP, + RE_SPLIT, + RE_GROUP_START, + RE_GROUP_END, + RE_LABEL /* only used during codegen */ +} ReOpType; + +typedef union { + struct { + ReOpType op; + union { + uint32_t c; + struct { + u16 a, b; + }; + }; + }; + u64 align; +} ReOp; + +typedef struct { + uint32_t min, max; +} ReChRange; + +typedef struct { + /* sorted list of non-overlapping character ranges */ + ReChRange *v; + uint32_t n; + int invert; +} ReChSet; + +/* for RE_CHAR */ +typedef enum { + C_ANY = 0x80000000, + C_LINE_START, + C_LINE_END, + C_EOF = 0xffffffff +} ReChSpecial; + +typedef struct { + DYNARR(ReOp) op; + DYNARR(ReChSet) cset; + uint32_t groups; + u8 first_byte; /* 0 if unknown */ +} RegEx; + +typedef struct { + uint32_t start, len; +} ReSpan; + +typedef struct { + ReSpan extent; + ReSpan *grp; +} ReMatch; + +typedef DYNARR(ReMatch) ReMatchList; + +typedef struct { + u32 i; + ReSpan *grp; +} ReThread; + +typedef struct { + ReThread *v; + uintmax_t *set; + size_t n; +} ReThreadList; + +typedef enum { + RE_SEARCH_FIRST_CHUNK = 1, + RE_SEARCH_LAST_CHUNK = 2, + RE_SEARCH_MID_MATCH = 4, +} ReSearchFlags; + +typedef struct { + Arena *a; + RegEx *re; + ReSpan *grp; + const char *buf; + size_t buf_len, buf_idx; + size_t total_idx; + size_t match_start, match_end; + ReThreadList tcur, tnext; + ReSearchFlags flags; + uint32_t c, c_prev; +} ReSearch; + +typedef enum { + RE_COMP_NO_GROUPS = 1 +} ReCompFlags; + +int re_comp(RegEx *re, Str src, Arena *perm, Arena *scratch); +int re_comp_ex(RegEx *re, Str src, Arena *perm, Arena *scratch, ReCompFlags flags); + +void re_search_start(ReSearch *s, RegEx *re, Arena *a); +void re_search_chunk(ReSearch *s, const char *buf, size_t n); +void re_search_last_chunk(ReSearch *s); +int re_search_match(ReSearch *s, ReMatch *m); + +ReMatchList re_match_all(RegEx *re, Str s, Arena *a); +int re_match_full(RegEx *re, Str s, Arena *a); +int re_match(RegEx *re, Str s, ReMatch *out, Arena *a); + +#endif |
