diff options
| author | WormHeamer | 2026-01-01 21:37:44 -0500 |
|---|---|---|
| committer | WormHeamer | 2026-01-01 21:37:44 -0500 |
| commit | 3b48035f5b9475293ff614c2d5193cf525e55a44 (patch) | |
| tree | 7ebac8e75ff462b30fd09614ad7b3b8558b08d6e | |
| parent | 4853268e16df1789541ea27586c7a4ec8c3786cb (diff) | |
make regex line ends more consistent across chunks
| -rw-r--r-- | main.c | 4 | ||||
| -rw-r--r-- | regex.c | 26 | ||||
| -rw-r--r-- | regex.h | 2 |
3 files changed, 27 insertions, 5 deletions
@@ -681,9 +681,10 @@ int search_next_regex(TxtLoc l, Str src, TxtLoc *out) { return 0; } int match_found = 0; -search_from_start: +search_from_start:; TxtLoc t = l; re_search_start(&s, &re, &e.scratch); + if (at_start(t) || txt_chr(cprev(t)) == '\n') re_search_first_chunk(&s); while (!at_end(t)) { Str chnk = txt_next_chunk(&t); re_search_chunk(&s, chnk.s, chnk.n); @@ -720,6 +721,7 @@ search_from_end: Arena a = e.scratch; re_search_start(&s, &re, &e.scratch); TxtLoc t = l; + if (at_start(t) || txt_chr(cprev(t)) == '\n') re_search_first_chunk(&s); while (!at_end(t)) { TxtLoc p = t; Str chnk = txt_next_chunk(&t); @@ -824,7 +824,7 @@ static inline isize re_search_try_match(ReSearch *s, size_t i, size_t n) { } isize found_i = -1; ReThreadStepFlags f = 0; - if (i + start == 0) f = RE_THREAD_AT_START; + if (i == 0 && (s->flags & (RE_SEARCH_FIRST_CHUNK | RE_SEARCH_WAS_NEWLINE))) f = RE_THREAD_AT_START; for (; i < n && s->tcur.n > 0; i++) { s->c = (unsigned char)s->buf[i]; @@ -854,7 +854,7 @@ void re_search_start(ReSearch *s, RegEx *re, Arena *a) { .a = a, .re = re, .grp = new_arr(a, ReSpan, re->groups), - .flags = RE_SEARCH_FIRST_CHUNK + .flags = 0 }; re_threadlist_alloc(&s->tcur, re->op.n, a); re_threadlist_alloc(&s->tnext, re->op.n, a); @@ -872,6 +872,9 @@ void re_search_last_chunk(ReSearch *s) { s->flags |= RE_SEARCH_LAST_CHUNK; } +void re_search_first_chunk(ReSearch *s) { + s->flags |= RE_SEARCH_FIRST_CHUNK; +} /* searching */ @@ -879,6 +882,18 @@ void re_search_last_chunk(ReSearch *s) { * check s->flags to find if the match is done yet */ +static inline void re_search_chunk_fin(ReSearch *s) { + u32 n = s->buf_len; + s->buf_idx = n; + if (n > 0) { + if (s->buf[n-1] == '\n') { + s->flags |= RE_SEARCH_WAS_NEWLINE; + } else { + s->flags &= ~RE_SEARCH_WAS_NEWLINE; + } + } +} + int re_search_match_at_start(ReSearch *s, ReMatch *m) { size_t i = s->buf_idx; size_t n = s->buf_len; @@ -901,7 +916,7 @@ int re_search_match_at_start(ReSearch *s, ReMatch *m) { } } } - s->buf_idx = s->buf_len; + re_search_chunk_fin(s); return 0; } @@ -917,7 +932,7 @@ int re_search_match(ReSearch *s, ReMatch *m) { s->buf_idx = i++; if (re_search_match_at_start(s, m)) return 1; } - s->buf_idx = n; + re_search_chunk_fin(s); return 0; } @@ -929,6 +944,7 @@ ReMatchList re_match_all(RegEx *re, Str s, Arena *a) { ReMatch m; re_search_start(&sr, re, a); re_search_chunk(&sr, s.s, s.n); + re_search_first_chunk(&sr); re_search_last_chunk(&sr); while (re_search_match(&sr, &m)) { ReSpan *grp = new_arr(a, ReSpan, re->groups); @@ -943,6 +959,7 @@ int re_match_full(RegEx *re, Str s, Arena *a) { ReMatch m; re_search_start(&sr, re, a); re_search_chunk(&sr, s.s, s.n); + re_search_first_chunk(&sr); re_search_last_chunk(&sr); return re_search_match(&sr, &m) && m.extent.start == 0 && m.extent.len == s.n; } @@ -951,6 +968,7 @@ int re_match(RegEx *re, Str s, ReMatch *out, Arena *a) { ReSearch sr = { 0 }; re_search_start(&sr, re, a); re_search_chunk(&sr, s.s, s.n); + re_search_first_chunk(&sr); re_search_last_chunk(&sr); return re_search_match(&sr, out); } @@ -92,6 +92,7 @@ typedef enum { RE_SEARCH_FIRST_CHUNK = 1, RE_SEARCH_LAST_CHUNK = 2, RE_SEARCH_MID_MATCH = 4, + RE_SEARCH_WAS_NEWLINE = 8, } ReSearchFlags; typedef struct { @@ -124,6 +125,7 @@ const char *re_comp_strerror(ReCompErr err); void re_search_start(ReSearch *s, RegEx *re, Arena *a); void re_search_chunk(ReSearch *s, const char *buf, size_t n); +void re_search_first_chunk(ReSearch *s); void re_search_last_chunk(ReSearch *s); int re_search_match(ReSearch *s, ReMatch *m); int re_search_match_at_start(ReSearch *s, ReMatch *m); |
