Skip to content

Commit bec9bf3

Browse files
authored
Fuzzer improvements (JIT, option entropy) (#317)
* jit differential + shape change * skip jit compare if jit compilation failed; only enable when support jit is passed
1 parent 494e765 commit bec9bf3

File tree

1 file changed

+78
-22
lines changed

1 file changed

+78
-22
lines changed

src/pcre2_fuzzsupport.c

+78-22
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ Written by Philip Hazel, October 2016
3434
#define ALLOWED_MATCH_OPTIONS \
3535
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
3636
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
37-
PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
37+
PCRE2_PARTIAL_SOFT)
3838

3939
/* This is the callout function. Its only purpose is to halt matching if there
4040
are more than 100 callouts, as one way of stopping too much time being spent on
@@ -58,40 +58,34 @@ int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size)
5858
{
5959
uint32_t compile_options;
6060
uint32_t match_options;
61-
uint32_t random_options;
61+
uint64_t random_options;
6262
pcre2_match_data *match_data = NULL;
63+
pcre2_match_data *match_data_jit = NULL;
6364
pcre2_match_context *match_context = NULL;
6465
size_t match_size;
6566
int dfa_workspace[DFA_WORKSPACE_COUNT];
66-
int r1, r2;
6767
int i;
6868

69-
if (size < 1) return 0;
69+
if (size < sizeof(random_options)) return -1;
7070

7171
/* Limiting the length of the subject for matching stops fruitless searches
7272
in large trees taking too much time. */
7373

74-
match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
75-
76-
/* Figure out some options to use. Initialize the random number to ensure
77-
repeatability. Ensure that we get a 32-bit unsigned random number for testing
78-
options. (RAND_MAX is required to be at least 32767, but is commonly
79-
2147483647, which excludes the top bit.) */
74+
random_options = *(uint64_t *)(data);
75+
data += sizeof(random_options);
76+
size -= sizeof(random_options);
8077

81-
srand((unsigned int)(data[size/2]));
82-
r1 = rand() & 0xffff;
83-
r2 = rand() & 0xffff;
84-
random_options = ((uint32_t)r1 << 16) | (uint32_t)r2;
78+
match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
8579

8680
/* Ensure that all undefined option bits are zero (waste of time trying them)
8781
and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
8882
input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no
8983
reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because
9084
\C in random patterns is highly likely to cause a crash. */
9185

92-
compile_options = (random_options & ALLOWED_COMPILE_OPTIONS) |
86+
compile_options = ((random_options >> 32) & ALLOWED_COMPILE_OPTIONS) |
9387
PCRE2_NEVER_BACKSLASH_C;
94-
match_options = random_options & ALLOWED_MATCH_OPTIONS;
88+
match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) | PCRE2_NO_JIT;
9589

9690
/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
9791
allowed together and just give an immediate error return. */
@@ -105,7 +99,8 @@ likewise do the match with and without the options. */
10599
for (i = 0; i < 2; i++)
106100
{
107101
uint32_t callout_count;
108-
int errorcode;
102+
int errorcode, errorcode_jit;
103+
uint32_t ovector_count;
109104
PCRE2_SIZE erroroffset;
110105
pcre2_code *code;
111106

@@ -151,7 +146,7 @@ for (i = 0; i < 2; i++)
151146
uint32_t save_match_options = match_options;
152147

153148
#ifdef SUPPORT_JIT
154-
pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
149+
int jit_ret = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
155150
#endif
156151

157152
/* Create match data and context blocks only when we first need them. Set
@@ -161,12 +156,13 @@ for (i = 0; i < 2; i++)
161156
if (match_data == NULL)
162157
{
163158
match_data = pcre2_match_data_create(32, NULL);
164-
if (match_data == NULL)
159+
match_data_jit = pcre2_match_data_create(32, NULL);
160+
if (match_data == NULL || match_data_jit == NULL)
165161
{
166162
#ifdef STANDALONE
167163
printf("** Failed to create match data block\n");
168164
#endif
169-
return 0;
165+
abort();
170166
}
171167
}
172168

@@ -178,7 +174,7 @@ for (i = 0; i < 2; i++)
178174
#ifdef STANDALONE
179175
printf("** Failed to create match context block\n");
180176
#endif
181-
return 0;
177+
abort();
182178
}
183179
(void)pcre2_set_match_limit(match_context, 100);
184180
(void)pcre2_set_depth_limit(match_context, 100);
@@ -217,7 +213,66 @@ for (i = 0; i < 2; i++)
217213
}
218214
#endif
219215

220-
match_options = 0; /* For second time */
216+
#ifdef SUPPORT_JIT
217+
if (jit_ret >= 0)
218+
{
219+
callout_count = 0;
220+
errorcode_jit = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0,
221+
match_options & ~PCRE2_NO_JIT, match_data_jit, match_context);
222+
223+
if (errorcode_jit != errorcode)
224+
{
225+
printf("JIT errorcode %d did not match original errorcode %d\n", errorcode_jit, errorcode);
226+
abort();
227+
}
228+
229+
ovector_count = pcre2_get_ovector_count(match_data);
230+
231+
if (ovector_count != pcre2_get_ovector_count(match_data_jit))
232+
{
233+
puts("JIT ovector count did not match original");
234+
abort();
235+
}
236+
237+
for (uint32_t ovector = 0; ovector < ovector_count; ovector++)
238+
{
239+
PCRE2_UCHAR *bufferptr, *bufferptr_jit;
240+
PCRE2_SIZE bufflen, bufflen_jit;
241+
242+
bufferptr = bufferptr_jit = NULL;
243+
bufflen = bufflen_jit = 0;
244+
245+
errorcode = pcre2_substring_get_bynumber(match_data, ovector, &bufferptr, &bufflen);
246+
errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, ovector, &bufferptr_jit, &bufflen_jit);
247+
248+
if (errorcode != errorcode_jit)
249+
{
250+
printf("when extracting substring, JIT errorcode %d did not match original %d\n", errorcode_jit, errorcode);
251+
abort();
252+
}
253+
254+
if (errorcode >= 0)
255+
{
256+
if (bufflen != bufflen_jit)
257+
{
258+
printf("when extracting substring, JIT buffer length %zu did not match original %zu\n", bufflen_jit, bufflen);
259+
abort();
260+
}
261+
262+
if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0)
263+
{
264+
puts("when extracting substring, JIT buffer contents did not match original");
265+
abort();
266+
}
267+
}
268+
269+
pcre2_substring_free(bufferptr);
270+
pcre2_substring_free(bufferptr_jit);
271+
}
272+
}
273+
#endif
274+
275+
match_options = PCRE2_NO_JIT; /* For second time */
221276
}
222277

223278
/* Match with DFA twice, with and without options. */
@@ -278,6 +333,7 @@ for (i = 0; i < 2; i++)
278333
}
279334

280335
if (match_data != NULL) pcre2_match_data_free(match_data);
336+
if (match_data_jit != NULL) pcre2_match_data_free(match_data_jit);
281337
if (match_context != NULL) pcre2_match_context_free(match_context);
282338

283339
return 0;

0 commit comments

Comments
 (0)