Skip to content

Commit

Permalink
reimplement asserts with a safer approach
Browse files Browse the repository at this point in the history
The original asserts weren't very useful in debug mode as they
were lacking information on where they were being triggered and
were also unreliable and dangerous as they could result in
important code being removed and trigger crashes (even in non
debug mode).

Instead of implementing one generic assert for both modes, build
a more useful one for each one, so PCRE2_UNREACHABLE() could be
also used in non debug builds to help with optimization.

Reinstate all original assertions to use the new versions, which
will have the sideeffect of fixing indentation issues introduced
in the original, and include additional asserts that were provided
as the original ones were being audited for safety. Note that during
such audit the use of the original asserts might had been refactored
so it includes all those relevant code changes.

While at it, update the cmake and CI to help with testing and
update other documentation.
  • Loading branch information
carenas committed Sep 26, 2024
1 parent 6915395 commit 11bfded
Show file tree
Hide file tree
Showing 16 changed files with 180 additions and 92 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ jobs:
run: ./autogen.sh

- name: Configure
run: ./configure CPPFLAGS='-Wall -Wextra' --enable-jit --enable-pcre2-16 --enable-pcre2-32
run: ./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32

- name: Build
run: make -j2
run: make -j2 CPPFLAGS='-Wall -Wextra -Werror'

- name: Test (main test script)
run: ./RunTest
Expand Down Expand Up @@ -50,10 +50,10 @@ jobs:
run: ./autogen.sh

- name: Configure
run: ./configure CPPFLAGS='-Wall -Wextra' --enable-jit --enable-pcre2-16 --enable-pcre2-32
run: ./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32

- name: Build
run: make -j2
run: make -j2 CPPFLAGS='-Wall -Wextra -Werror'

- name: Test (main test script)
run: ./RunTest
Expand All @@ -77,7 +77,7 @@ jobs:
submodules: true

- name: Configure
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DCMAKE_OSX_ARCHITECTURES='arm64;x86_64' -DCMAKE_C_FLAGS='-Wall -Wextra' -B build
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DCMAKE_OSX_ARCHITECTURES='arm64;x86_64' -DCMAKE_C_FLAGS='-Wall -Wextra' -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -B build

- name: Build
run: cmake --build build
Expand Down Expand Up @@ -112,7 +112,7 @@ jobs:
submodules: true

- name: Configure
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -B build -A Win32
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -B build -A Win32

- name: Build
run: cmake --build build
Expand Down
3 changes: 1 addition & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,6 @@ INCLUDE(CheckIncludeFile)
INCLUDE(CheckTypeSize)
INCLUDE(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR

CHECK_INCLUDE_FILE(stdio.h HAVE_STDIO_H)
CHECK_INCLUDE_FILE(stdlib.h HAVE_STDLIB_H)
CHECK_INCLUDE_FILE(assert.h HAVE_ASSERT_H)
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H)
Expand Down Expand Up @@ -1164,6 +1162,7 @@ IF(PCRE2_SHOW_REPORT)
MESSAGE(STATUS " Build 8 bit PCRE2 library ....... : ${PCRE2_BUILD_PCRE2_8}")
MESSAGE(STATUS " Build 16 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_16}")
MESSAGE(STATUS " Build 32 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_32}")
MESSAGE(STATUS " Include debugging code ...........: ${PCRE2_DEBUG}")
MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE2_SUPPORT_JIT}")
MESSAGE(STATUS " Use SELinux allocator in JIT .... : ${PCRE2_SUPPORT_JIT_SEALLOC}")
MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}")
Expand Down
2 changes: 0 additions & 2 deletions config-cmake.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
#cmakedefine HAVE_BUILTIN_UNREACHABLE 1
#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
#cmakedefine HAVE_DIRENT_H 1
#cmakedefine HAVE_STDIO_H 1
#cmakedefine HAVE_STDLIB_H 1
#cmakedefine HAVE_SYS_STAT_H 1
#cmakedefine HAVE_SYS_TYPES_H 1
#cmakedefine HAVE_UNISTD_H 1
Expand Down
6 changes: 4 additions & 2 deletions maint/GenerateUcd.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@

# Some general parameters

MAX_LIST = 8 # keep on sync with the value in pcre2_auto_possess.c
MAX_UNICODE = 0x110000
NOTACHAR = 0xffffffff

Expand Down Expand Up @@ -648,7 +649,7 @@ def write_bitsets(list, item_size):
s = set(bprops[c])
for i in range(len(bool_props_lists)):
if s == set(bool_props_lists[i]):
break;
break
else:
bool_props_lists.append(bprops[c])
i += 1
Expand Down Expand Up @@ -693,6 +694,7 @@ def write_bitsets(list, item_size):
found = 1

# Add new characters to an existing set
# TODO: make sure the data doesn't overflow a list[]

if found:
found = 0
Expand All @@ -715,7 +717,7 @@ def write_bitsets(list, item_size):

caseless_offsets = [0] * MAX_UNICODE

offset = 1;
offset = 1
for s in caseless_sets:
for x in s:
caseless_offsets[x] = offset
Expand Down
2 changes: 1 addition & 1 deletion maint/GenerateUcpTables.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def stdnames(x):

# Output the long string of concatenated names

f.write('\nconst char PRIV(utt_names)[] =\n');
f.write('\nconst char PRIV(utt_names)[] =\n')
last = ''
for utt in utt_table:
if utt == utt_table[-1]:
Expand Down
9 changes: 4 additions & 5 deletions maint/README
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,10 @@ new release.

. I used to test new releases myself on a number of different operating
systems. For example, on Solaris it is helpful to test using Sun's cc
compiler as a change from gcc. Adding -xarch=v9 to the cc options does a
64-bit test, but it also needs -S 64 for pcre2test to increase the stack size
for test 2. Since I retired I can no longer do much of this. There are
automated tests under Ubuntu, Alpine, and Windows that are now set up as
GitHub actions. Check that they are running clean.
compiler as a change from gcc. Adding -m64 to the cc options does a 64-bit
build. Since I retired I can no longer do much of this. There are automated
tests under Ubuntu, Alpine, macOS and Windows that are now set up as GitHub
actions. Check that they are running clean.

. The buildbots at http://buildfarm.opencsw.org/ do some automated testing
of PCRE2 and should also be checked before putting out a release. (June 2024:
Expand Down
30 changes: 18 additions & 12 deletions src/pcre2_auto_possess.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ repeats into possessive repeats where possible. */

#include "pcre2_internal.h"

/* This macro represents the max size of list[] and that is used to keep
track of UCD info in several places, it should be kept on sync with the
value used by GenerateUcd.py */
#define MAX_LIST 8

/*************************************************
* Tables for auto-possessification *
Expand Down Expand Up @@ -199,7 +203,7 @@ static BOOL
check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
BOOL negated)
{
BOOL ok;
BOOL ok, rc;
const uint32_t *p;
const ucd_record *prop = GET_UCD(c);

Expand Down Expand Up @@ -240,13 +244,13 @@ switch(ptype)
{
HSPACE_CASES:
VSPACE_CASES:
return negated;
rc = negated;
break;

default:
return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
rc = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
}
PCRE2_UNREACHABLE(); /* Control never reaches here */
break;
return rc;

case PT_WORD:
return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
Expand All @@ -260,7 +264,7 @@ switch(ptype)
if (c < *p) return !negated;
if (c == *p++) return negated;
}
PCRE2_UNREACHABLE(); /* Control never reaches here */
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
break;

/* Haven't yet thought these through. */
Expand Down Expand Up @@ -452,10 +456,12 @@ switch(c)
code += 2;

do {
if (clist_dest >= list + 8)
if (clist_dest >= list + MAX_LIST)
{
/* Early return if there is not enough space. This should never
happen, since all clists are shorter than 5 character now. */
/* Early return if there is not enough space. GenerateUcd.py
generated a list with more than 5 characters and something
must be done about that going forward. */
PCRE2_DEBUG_UNREACHABLE(); /* Remove if it ever triggers */
list[2] = code[0];
list[3] = code[1];
return code;
Expand Down Expand Up @@ -539,7 +545,7 @@ compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
{
PCRE2_UCHAR c;
uint32_t list[8];
uint32_t list[MAX_LIST];
const uint32_t *chr_ptr;
const uint32_t *ochr_ptr;
const uint32_t *list_ptr;
Expand Down Expand Up @@ -1121,7 +1127,7 @@ for(;;)
if (list[1] == 0) return TRUE;
}

PCRE2_UNREACHABLE(); /* Control never reaches here */
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
}


Expand Down Expand Up @@ -1151,7 +1157,7 @@ PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
PCRE2_UCHAR c;
PCRE2_SPTR end;
PCRE2_UCHAR *repeat_opcode;
uint32_t list[8];
uint32_t list[MAX_LIST];
int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */
BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
Expand Down
32 changes: 21 additions & 11 deletions src/pcre2_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1848,8 +1848,9 @@ else
/* As we know we are at a digit, the only possible error from
read_number() is a number that is too large to be a group number. Because
that number might be still valid if read as an octal, errorcodeptr is not
set on failure and therefore a bogus value of INT_MAX is set instead that
will be used later to properly set the error, if not falling through. */
set on failure and therefore a sentinel value of INT_MAX is used instead
of the original value, and will be used later to properly set the error,
if not falling through. */

if (!read_number(&ptr, ptrend, -1, MAX_GROUP_NUMBER, 0, &s, errorcodeptr))
s = INT_MAX;
Expand All @@ -1860,10 +1861,14 @@ else
if (s < 10 || c >= CHAR_8 || (unsigned)s <= bracount)
{
/* s > MAX_GROUP_NUMBER should not be possible because of read_number(),
but we keep it just to be safe and because it will also catch the bogus
value set on failure of that function. */
but we keep it just to be safe and because it will also catch the
sentinel value that was set on failure by that function. */

if ((unsigned)s > MAX_GROUP_NUMBER) *errorcodeptr = ERR61;
if ((unsigned)s > MAX_GROUP_NUMBER)
{
PCRE2_ASSERT(s == INT_MAX);
*errorcodeptr = ERR61;
}
else escape = -s; /* Indicates a back reference */
break;
}
Expand Down Expand Up @@ -5245,7 +5250,7 @@ for (;;)
}
}

PCRE2_UNREACHABLE(); /* Control never reaches here */
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
}


Expand Down Expand Up @@ -5525,6 +5530,7 @@ have duplicate names. Give an internal error. */

if (i >= cb->names_found)
{
PCRE2_DEBUG_UNREACHABLE();
*errorcodeptr = ERR53;
cb->erroroffset = name - cb->start_pattern;
return FALSE;
Expand Down Expand Up @@ -8452,7 +8458,7 @@ for (;; pptr++)
} /* End of big switch */
} /* End of big loop */

PCRE2_UNREACHABLE(); /* Control never reaches here */
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
}


Expand Down Expand Up @@ -8758,7 +8764,7 @@ for (;;)
pptr++;
}

PCRE2_UNREACHABLE(); /* Control never reaches here */
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
}


Expand Down Expand Up @@ -9382,6 +9388,7 @@ for (;; pptr++)
/* This should never occur. */

case META_END:
PCRE2_DEBUG_UNREACHABLE();
return NULL;

/* The data for these items is variable in length. */
Expand Down Expand Up @@ -9446,7 +9453,7 @@ for (;; pptr++)
pptr += meta_extra_lengths[meta];
}

PCRE2_UNREACHABLE(); /* Control never reaches here */
PCRE2_UNREACHABLE(); /* Control never reaches here */
}


Expand Down Expand Up @@ -10508,7 +10515,8 @@ if ((options & PCRE2_LITERAL) == 0)
optim_flags &= ~(p->value);

/* For backward compatibility the three original VERBs to disable
optimizations need to also update the corresponding external option. */
optimizations need to also update the corresponding bit in the
external options. */

switch(p->value)
{
Expand All @@ -10528,7 +10536,9 @@ if ((options & PCRE2_LITERAL) == 0)
break;

default:
PCRE2_UNREACHABLE();
/* Fail assertion if a new type was added and someone forgot to
update this switch */
PCRE2_DEBUG_UNREACHABLE();
}
break; /* Out of the table scan loop */
}
Expand Down
11 changes: 6 additions & 5 deletions src/pcre2_convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -1137,8 +1137,7 @@ for (int i = 0; i < 2; i++)
break;

default:
*bufflenptr = 0; /* Error offset */
return PCRE2_ERROR_INTERNAL;
goto EXIT;
}

if (rc != 0 || /* Error */
Expand All @@ -1158,10 +1157,12 @@ for (int i = 0; i < 2; i++)
use_length = *bufflenptr + 1;
}

/* Normally, we should exit this function in the previous loop, but we
can't return an API call without a meaningful value, so if something
went terribly wrong, we then will just report it as an intenal error */
/* Something went terribly wrong. Trigger and assert and return an error */
PCRE2_DEBUG_UNREACHABLE();

EXIT:

*bufflenptr = 0; /* Error offset */
return PCRE2_ERROR_INTERNAL;
}

Expand Down
4 changes: 3 additions & 1 deletion src/pcre2_dfa_match.c
Original file line number Diff line number Diff line change
Expand Up @@ -3570,7 +3570,9 @@ switch(re->newline_convention)
mb->nltype = NLTYPE_ANYCRLF;
break;

default: return PCRE2_ERROR_INTERNAL;
default:
PCRE2_DEBUG_UNREACHABLE();
return PCRE2_ERROR_INTERNAL;
}

/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
Expand Down
Loading

0 comments on commit 11bfded

Please sign in to comment.