summaryrefslogtreecommitdiff
path: root/regex.h
diff options
context:
space:
mode:
Diffstat (limited to 'regex.h')
-rw-r--r--regex.h126
1 files changed, 126 insertions, 0 deletions
diff --git a/regex.h b/regex.h
new file mode 100644
index 0000000..38089d3
--- /dev/null
+++ b/regex.h
@@ -0,0 +1,126 @@
+#ifndef REGEX_H
+#define REGEX_H
+
+#include <stdint.h>
+
+#include "arena.h"
+#include "dynarr.h"
+#include "str.h"
+
+typedef enum : u8 {
+ RE_CHAR,
+ RE_CHAR_NOT,
+ RE_CHAR_ANY,
+ RE_CHAR_SET,
+ RE_CHAR_RANGE,
+ RE_CHAR_RANGE_NOT,
+ RE_CHAR_SET_PACKED,
+ RE_CHAR_SET_PACKED_NOT,
+ RE_LINE_START,
+ RE_LINE_END,
+ RE_MATCH,
+ RE_FAIL,
+ RE_JUMP,
+ RE_SPLIT,
+ RE_GROUP_START,
+ RE_GROUP_END,
+ RE_LABEL /* only used during codegen */
+} ReOpType;
+
+typedef union {
+ struct {
+ ReOpType op;
+ union {
+ uint32_t c;
+ struct {
+ u16 a, b;
+ };
+ };
+ };
+ u64 align;
+} ReOp;
+
+typedef struct {
+ uint32_t min, max;
+} ReChRange;
+
+typedef struct {
+ /* sorted list of non-overlapping character ranges */
+ ReChRange *v;
+ uint32_t n;
+ int invert;
+} ReChSet;
+
+/* for RE_CHAR */
+typedef enum {
+ C_ANY = 0x80000000,
+ C_LINE_START,
+ C_LINE_END,
+ C_EOF = 0xffffffff
+} ReChSpecial;
+
+typedef struct {
+ DYNARR(ReOp) op;
+ DYNARR(ReChSet) cset;
+ uint32_t groups;
+ u8 first_byte; /* 0 if unknown */
+} RegEx;
+
+typedef struct {
+ uint32_t start, len;
+} ReSpan;
+
+typedef struct {
+ ReSpan extent;
+ ReSpan *grp;
+} ReMatch;
+
+typedef DYNARR(ReMatch) ReMatchList;
+
+typedef struct {
+ u32 i;
+ ReSpan *grp;
+} ReThread;
+
+typedef struct {
+ ReThread *v;
+ uintmax_t *set;
+ size_t n;
+} ReThreadList;
+
+typedef enum {
+ RE_SEARCH_FIRST_CHUNK = 1,
+ RE_SEARCH_LAST_CHUNK = 2,
+ RE_SEARCH_MID_MATCH = 4,
+} ReSearchFlags;
+
+typedef struct {
+ Arena *a;
+ RegEx *re;
+ ReSpan *grp;
+ const char *buf;
+ size_t buf_len, buf_idx;
+ size_t total_idx;
+ size_t match_start, match_end;
+ ReThreadList tcur, tnext;
+ ReSearchFlags flags;
+ uint32_t c, c_prev;
+} ReSearch;
+
+typedef enum {
+ RE_COMP_NO_GROUPS = 1
+} ReCompFlags;
+
+int re_comp(RegEx *re, Str src, Arena *perm, Arena *scratch);
+int re_comp_ex(RegEx *re, Str src, Arena *perm, Arena *scratch, ReCompFlags flags);
+
+void re_search_start(ReSearch *s, RegEx *re, Arena *a);
+void re_search_chunk(ReSearch *s, const char *buf, size_t n);
+void re_search_last_chunk(ReSearch *s);
+int re_search_match(ReSearch *s, ReMatch *m);
+
+ReMatchList re_match_all(RegEx *re, Str s, Arena *a);
+int re_match_full(RegEx *re, Str s, Arena *a);
+int re_match(RegEx *re, Str s, ReMatch *out, Arena *a);
+
+#endif