summaryrefslogtreecommitdiff
path: root/regex.c
diff options
context:
space:
mode:
authorWormHeamer2026-01-01 21:37:44 -0500
committerWormHeamer2026-01-01 21:37:44 -0500
commit3b48035f5b9475293ff614c2d5193cf525e55a44 (patch)
tree7ebac8e75ff462b30fd09614ad7b3b8558b08d6e /regex.c
parent4853268e16df1789541ea27586c7a4ec8c3786cb (diff)
make regex line ends more consistent across chunks
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c26
1 files changed, 22 insertions, 4 deletions
diff --git a/regex.c b/regex.c
index 6fe4809..1c42126 100644
--- a/regex.c
+++ b/regex.c
@@ -824,7 +824,7 @@ static inline isize re_search_try_match(ReSearch *s, size_t i, size_t n) {
}
isize found_i = -1;
ReThreadStepFlags f = 0;
- if (i + start == 0) f = RE_THREAD_AT_START;
+ if (i == 0 && (s->flags & (RE_SEARCH_FIRST_CHUNK | RE_SEARCH_WAS_NEWLINE))) f = RE_THREAD_AT_START;
for (; i < n && s->tcur.n > 0; i++) {
s->c = (unsigned char)s->buf[i];
@@ -854,7 +854,7 @@ void re_search_start(ReSearch *s, RegEx *re, Arena *a) {
.a = a,
.re = re,
.grp = new_arr(a, ReSpan, re->groups),
- .flags = RE_SEARCH_FIRST_CHUNK
+ .flags = 0
};
re_threadlist_alloc(&s->tcur, re->op.n, a);
re_threadlist_alloc(&s->tnext, re->op.n, a);
@@ -872,6 +872,9 @@ void re_search_last_chunk(ReSearch *s) {
s->flags |= RE_SEARCH_LAST_CHUNK;
}
+void re_search_first_chunk(ReSearch *s) {
+ s->flags |= RE_SEARCH_FIRST_CHUNK;
+}
/* searching */
@@ -879,6 +882,18 @@ void re_search_last_chunk(ReSearch *s) {
* check s->flags to find if the match is done yet
*/
+static inline void re_search_chunk_fin(ReSearch *s) {
+ u32 n = s->buf_len;
+ s->buf_idx = n;
+ if (n > 0) {
+ if (s->buf[n-1] == '\n') {
+ s->flags |= RE_SEARCH_WAS_NEWLINE;
+ } else {
+ s->flags &= ~RE_SEARCH_WAS_NEWLINE;
+ }
+ }
+}
+
int re_search_match_at_start(ReSearch *s, ReMatch *m) {
size_t i = s->buf_idx;
size_t n = s->buf_len;
@@ -901,7 +916,7 @@ int re_search_match_at_start(ReSearch *s, ReMatch *m) {
}
}
}
- s->buf_idx = s->buf_len;
+ re_search_chunk_fin(s);
return 0;
}
@@ -917,7 +932,7 @@ int re_search_match(ReSearch *s, ReMatch *m) {
s->buf_idx = i++;
if (re_search_match_at_start(s, m)) return 1;
}
- s->buf_idx = n;
+ re_search_chunk_fin(s);
return 0;
}
@@ -929,6 +944,7 @@ ReMatchList re_match_all(RegEx *re, Str s, Arena *a) {
ReMatch m;
re_search_start(&sr, re, a);
re_search_chunk(&sr, s.s, s.n);
+ re_search_first_chunk(&sr);
re_search_last_chunk(&sr);
while (re_search_match(&sr, &m)) {
ReSpan *grp = new_arr(a, ReSpan, re->groups);
@@ -943,6 +959,7 @@ int re_match_full(RegEx *re, Str s, Arena *a) {
ReMatch m;
re_search_start(&sr, re, a);
re_search_chunk(&sr, s.s, s.n);
+ re_search_first_chunk(&sr);
re_search_last_chunk(&sr);
return re_search_match(&sr, &m) && m.extent.start == 0 && m.extent.len == s.n;
}
@@ -951,6 +968,7 @@ int re_match(RegEx *re, Str s, ReMatch *out, Arena *a) {
ReSearch sr = { 0 };
re_search_start(&sr, re, a);
re_search_chunk(&sr, s.s, s.n);
+ re_search_first_chunk(&sr);
re_search_last_chunk(&sr);
return re_search_match(&sr, out);
}