From b4e7527561f1c68b821d5cf25efe29ae63d1d434 Mon Sep 17 00:00:00 2001 From: Jeremy Harris Date: Thu, 25 Jan 2024 17:48:43 +0000 Subject: [PATCH] Appendfile: release regex-match store every thousand files. Bug 3047 From 35aacb69f5c839a4b77158464e401d86eb422ed6 Mon Sep 17 00:00:00 2001 From: Jeremy Harris Date: Fri, 26 Jan 2024 21:58:59 +0000 Subject: [PATCH] ACL: in "regex" condition, release store every thousand lines. Bug 3047 From: Jeremy Harris Date: Sun, 11 Feb 2024 13:57:18 +0000 (+0000) Subject: Use non-releaseable memory for regex match strings. Bug 3047 Broken-by: 35aacb69f5c8 From 6fcb3173d64ef8a9d70f8adf19f134a0cd9cf6e8 Mon Sep 17 00:00:00 2001 From: Jeremy Harris Date: Sun, 11 Feb 2024 15:04:58 +0000 Subject: [PATCH] use dynamic mem for regex_match_string From a173a4376d168edbf3fe2494dff998c4060bf425 Mon Sep 17 00:00:00 2001 From: Jeremy Harris Date: Tue, 13 Feb 2024 17:34:19 +0000 Subject: [PATCH] Use non-releasable memory for regex line-buffer Broken-by: 5aacb69f5c8 From 44b3172e369435c2c1baa4e9c837252f729d2905 Mon Sep 17 00:00:00 2001 From: Jeremy Harris Date: Thu, 15 Feb 2024 19:56:40 +0000 Subject: [PATCH] regex: avoid releasing built RE midloop diff --git a/src/src/exim.c b/src/src/exim.c --- a/src/exim.c +++ b/src/exim.c @@ -49,6 +49,8 @@ optimize out the tail recursion and so not make them too expensive. */ static void * function_store_malloc(PCRE2_SIZE size, void * tag) { +if (size > INT_MAX) + log_write(0, LOG_MAIN|LOG_PANIC_DIE, "excessive memory alloc request"); return store_malloc((int)size); } @@ -63,12 +65,15 @@ if (block) store_free(block); static void * function_store_get(PCRE2_SIZE size, void * tag) { +if (size > INT_MAX) + log_write(0, LOG_MAIN|LOG_PANIC_DIE, "excessive memory alloc request"); return store_get((int)size, GET_UNTAINTED); /* loses track of taint */ } static void function_store_nullfree(void * block, void * tag) { +/* We cannot free memory allocated using store_get() */ } diff --git a/src/src/transports/appendfile.c b/src/src/transports/appendfile.c --- a/src/transports/appendfile.c +++ b/src/transports/appendfile.c @@ -661,13 +665,14 @@ Returns: the sum of the sizes of the stattable files off_t check_dir_size(const uschar * dirname, int * countptr, const pcre2_code * re) { DIR *dir; off_t sum = 0; -int count = *countptr; +int count = *countptr, lcount = REGEX_LOOPCOUNT_STORE_RESET; +rmark reset_point = store_mark(); if (!(dir = exim_opendir(dirname))) return 0; for (struct dirent *ent; ent = readdir(dir); ) { uschar * path, * name = US ent->d_name; struct stat statbuf; @@ -675,6 +680,11 @@ for (struct dirent *ent; ent = readdir(dir); ) if (Ustrcmp(name, ".") == 0 || Ustrcmp(name, "..") == 0) continue; count++; + if (--lcount == 0) + { + store_reset(reset_point); reset_point = store_mark(); + lcount = REGEX_LOOPCOUNT_STORE_RESET; + } /* If there's a regex, try to find the size using it */ @@ -726,6 +736,7 @@ DEBUG(D_transport) debug_printf("check_dir_size: dir=%s sum=" OFF_T_FMT " count=%d\n", dirname, sum, count); +store_reset(reset_point); *countptr = count; return sum; } diff --git a/src/src/macros.h b/src/src/macros.h --- a/src/macros.h +++ b/src/macros.h @@ -1185,4 +1185,9 @@ typedef enum { sw_mrc_tx_fail, /* transmit failed */ } sw_mrc_t; +/* Recent versions of PCRE2 are allocating 20kB per match, rather than the previous 112 B. +When doing en extended loop of matching, release store periodically. */ + +#define REGEX_LOOPCOUNT_STORE_RESET 1000 + /* End of macros.h */ diff --git a/src/src/regex.c b/src/src/regex.c --- a/src/regex.c +++ b/src/regex.c @@ -24,8 +24,6 @@ typedef struct pcre_list { struct pcre_list * next; } pcre_list; -uschar regex_match_string_buffer[1024]; - extern FILE *mime_stream; extern uschar *mime_current_boundary; @@ -31,12 +31,11 @@ extern uschar *mime_current_boundary; static pcre_list * -compile(const uschar * list, BOOL cacheable) +compile(const uschar * list, BOOL cacheable, int * cntp) { -int sep = 0; +int sep = 0, cnt = 0; uschar * regex_string; -pcre_list * re_list_head = NULL; -pcre_list * ri; +pcre_list * re_list_head = NULL, * ri; /* precompile our regexes */ while ((regex_string = string_nextinlist(&list, &sep, NULL, 0))) @@ -58,10 +57,19 @@ while ((regex_string = string_nextinlist(&list, &sep, NULL, 0))) ri->pcre_text = regex_string; ri->next = re_list_head; re_list_head = ri; + cnt++; } +if (cntp) *cntp = cnt; return re_list_head; } + +/* Check list of REs against buffer, returning OK for (first) match, +else FAIL. On match return allocated result strings in regex_vars[]. + +We use the perm-pool for that, so that our caller can release +other allocations. +*/ static int matcher(pcre_list * re_list_head, uschar * linebuffer, int len) { @@ -75,9 +82,10 @@ for (pcre_list * ri = re_list_head; ri; ri = ri->next) /* try matcher on the line */ if ((n = pcre2_match(ri->re, (PCRE2_SPTR)linebuffer, len, 0, 0, md, pcre_gen_mtc_ctx)) > 0) { + int save_pool = store_pool; + store_pool = POOL_PERM; + - Ustrncpy(regex_match_string_buffer, ri->pcre_text, - sizeof(regex_match_string_buffer)-1); - regex_match_string = regex_match_string_buffer; + regex_match_string = string_copy(ri->pcre_text); for (int nn = 1; nn < n; nn++) { @@ -87,6 +97,7 @@ for (pcre_list * ri = re_list_head; ri; ri = ri->next) regex_vars[nn-1] = string_copyn(linebuffer + ovec[off], len); } + store_pool = save_pool; return OK; } } @@ -110,9 +111,8 @@ FILE * mbox_file; unsigned long mbox_size; FILE * mbox_file; pcre_list * re_list_head; -uschar * linebuffer; long f_pos = 0; -int ret = FAIL; +int ret = FAIL, cnt, lcount = REGEX_LOOPCOUNT_STORE_RESET; regex_vars_clear(); @@ -136,26 +138,32 @@ else mbox_file = mime_stream; } -/* precompile our regexes */ -if (!(re_list_head = compile(*listptr, cacheable))) - return FAIL; /* no regexes -> nothing to do */ - -/* match each line against all regexes */ -linebuffer = store_get(32767, GET_TAINTED); -while (fgets(CS linebuffer, 32767, mbox_file)) - { - if ( mime_stream && mime_current_boundary /* check boundary */ - && Ustrncmp(linebuffer, "--", 2) == 0 - && Ustrncmp((linebuffer+2), mime_current_boundary, - Ustrlen(mime_current_boundary)) == 0) - break; /* found boundary */ - - if ((ret = matcher(re_list_head, linebuffer, (int)Ustrlen(linebuffer))) == OK) - goto done; + /* precompile our regexes */ + if ((re_list_head = compile(*listptr, cacheable, &cnt))) + { + rmark reset_point = store_mark(); + + while (fgets(CS big_buffer, big_buffer_size, mbox_file)) + { + if ( mime_stream && mime_current_boundary /* check boundary */ + && Ustrncmp(big_buffer, "--", 2) == 0 + && Ustrncmp((big_buffer+2), mime_current_boundary, + Ustrlen(mime_current_boundary)) == 0) + break; /* found boundary */ + + if ((ret = matcher(re_list_head, big_buffer, (int)Ustrlen(big_buffer))) == OK) + break; + + if ((lcount -= cnt) <= 0) + { + store_reset(reset_point); reset_point = store_mark(); + lcount = REGEX_LOOPCOUNT_STORE_RESET; + } + } + + store_reset(reset_point); + } - } -/* no matches ... */ -done: if (!mime_stream) (void)fclose(mbox_file); else @@ -180,14 +190,11 @@ pcre_list * re_list_head = NULL; FILE * f; uschar * mime_subject = NULL; int mime_subject_len = 0; -int ret; +int ret = FAIL; +rmark reset_point; regex_vars_clear(); -/* precompile our regexes */ -if (!(re_list_head = compile(*listptr, cacheable))) - return FAIL; /* no regexes -> nothing to do */ - /* check if the file is already decoded */ if (!mime_decoded_filename) { /* no, decode it first */ @@ -210,12 +217,20 @@ if (!(f = fopen(CS mime_decoded_filename, "rb"))) return DEFER; } -/* get 32k memory, tainted */ -mime_subject = store_get(32767, GET_TAINTED); +reset_point = store_mark(); + { + /* precompile our regexes */ + if ((re_list_head = compile(*listptr, cacheable, NULL))) + { + /* get 32k memory, tainted */ + mime_subject = store_get(32767, GET_TAINTED); -mime_subject_len = fread(mime_subject, 1, 32766, f); + mime_subject_len = fread(mime_subject, 1, 32766, f); -ret = matcher(re_list_head, mime_subject, mime_subject_len); + ret = matcher(re_list_head, mime_subject, mime_subject_len); + } + } +store_reset(reset_point); (void)fclose(f); return ret; }