From 3b48035f5b9475293ff614c2d5193cf525e55a44 Mon Sep 17 00:00:00 2001 From: WormHeamer Date: Thu, 1 Jan 2026 21:37:44 -0500 Subject: make regex line ends more consistent across chunks --- regex.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'regex.c') diff --git a/regex.c b/regex.c index 6fe4809..1c42126 100644 --- a/regex.c +++ b/regex.c @@ -824,7 +824,7 @@ static inline isize re_search_try_match(ReSearch *s, size_t i, size_t n) { } isize found_i = -1; ReThreadStepFlags f = 0; - if (i + start == 0) f = RE_THREAD_AT_START; + if (i == 0 && (s->flags & (RE_SEARCH_FIRST_CHUNK | RE_SEARCH_WAS_NEWLINE))) f = RE_THREAD_AT_START; for (; i < n && s->tcur.n > 0; i++) { s->c = (unsigned char)s->buf[i]; @@ -854,7 +854,7 @@ void re_search_start(ReSearch *s, RegEx *re, Arena *a) { .a = a, .re = re, .grp = new_arr(a, ReSpan, re->groups), - .flags = RE_SEARCH_FIRST_CHUNK + .flags = 0 }; re_threadlist_alloc(&s->tcur, re->op.n, a); re_threadlist_alloc(&s->tnext, re->op.n, a); @@ -872,6 +872,9 @@ void re_search_last_chunk(ReSearch *s) { s->flags |= RE_SEARCH_LAST_CHUNK; } +void re_search_first_chunk(ReSearch *s) { + s->flags |= RE_SEARCH_FIRST_CHUNK; +} /* searching */ @@ -879,6 +882,18 @@ void re_search_last_chunk(ReSearch *s) { * check s->flags to find if the match is done yet */ +static inline void re_search_chunk_fin(ReSearch *s) { + u32 n = s->buf_len; + s->buf_idx = n; + if (n > 0) { + if (s->buf[n-1] == '\n') { + s->flags |= RE_SEARCH_WAS_NEWLINE; + } else { + s->flags &= ~RE_SEARCH_WAS_NEWLINE; + } + } +} + int re_search_match_at_start(ReSearch *s, ReMatch *m) { size_t i = s->buf_idx; size_t n = s->buf_len; @@ -901,7 +916,7 @@ int re_search_match_at_start(ReSearch *s, ReMatch *m) { } } } - s->buf_idx = s->buf_len; + re_search_chunk_fin(s); return 0; } @@ -917,7 +932,7 @@ int re_search_match(ReSearch *s, ReMatch *m) { s->buf_idx = i++; if (re_search_match_at_start(s, m)) return 1; } - s->buf_idx = n; + re_search_chunk_fin(s); return 0; } @@ -929,6 +944,7 @@ ReMatchList re_match_all(RegEx *re, Str s, Arena *a) { ReMatch m; re_search_start(&sr, re, a); re_search_chunk(&sr, s.s, s.n); + re_search_first_chunk(&sr); re_search_last_chunk(&sr); while (re_search_match(&sr, &m)) { ReSpan *grp = new_arr(a, ReSpan, re->groups); @@ -943,6 +959,7 @@ int re_match_full(RegEx *re, Str s, Arena *a) { ReMatch m; re_search_start(&sr, re, a); re_search_chunk(&sr, s.s, s.n); + re_search_first_chunk(&sr); re_search_last_chunk(&sr); return re_search_match(&sr, &m) && m.extent.start == 0 && m.extent.len == s.n; } @@ -951,6 +968,7 @@ int re_match(RegEx *re, Str s, ReMatch *out, Arena *a) { ReSearch sr = { 0 }; re_search_start(&sr, re, a); re_search_chunk(&sr, s.s, s.n); + re_search_first_chunk(&sr); re_search_last_chunk(&sr); return re_search_match(&sr, out); } -- cgit v1.2.3