Skip to content

Commit

Permalink
Add PCRE2_EXTRA_VANILLA_SYNTAX to disable PCRE2 extensions
Browse files Browse the repository at this point in the history
  • Loading branch information
NWilson committed Sep 17, 2024
1 parent 5e75d9b commit f3d23c0
Show file tree
Hide file tree
Showing 5 changed files with 330 additions and 12 deletions.
2 changes: 2 additions & 0 deletions src/pcre2.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */
#define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */
#define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */
/* ... other PRs ... */
#define PCRE2_EXTRA_VANILLA_SYNTAX 0x00008000u /* C */

/* These are for pcre2_jit_compile(). */

Expand Down
59 changes: 47 additions & 12 deletions src/pcre2_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,7 @@ are allowed. */
PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \
PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK|PCRE2_EXTRA_ASCII_BSD| \
PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX| \
PCRE2_EXTRA_ASCII_DIGIT)
PCRE2_EXTRA_ASCII_DIGIT|PCRE2_EXTRA_VANILLA_SYNTAX)

/* Compile time error code numbers. They are given names so that they can more
easily be tracked. When a new number is added, the tables called eint1 and
Expand Down Expand Up @@ -4059,10 +4059,17 @@ while (ptr < ptrend)
goto FAILED;
}

meta = alasmeta[i].meta;
if ((meta == META_SCS_NUMBER || meta == META_LOOKAHEAD_NA || meta == META_LOOKBEHIND_NA) &&
(xoptions & PCRE2_EXTRA_VANILLA_SYNTAX) != 0)
{
errorcode = ERR95; /* Do not expose hidden non-Perl compatible syntax */
goto FAILED;
}

/* Check for expecting an assertion condition. If so, only atomic
lookaround assertions are valid. */

meta = alasmeta[i].meta;
if (prev_expect_cond_assert > 0 &&
(meta < META_LOOKAHEAD || meta > META_LOOKBEHINDNOT))
{
Expand Down Expand Up @@ -4384,6 +4391,12 @@ while (ptr < ptrend)
/* There are some two-character sequences that start with 'a'. */

case CHAR_a:
if ((xoptions & PCRE2_EXTRA_VANILLA_SYNTAX) != 0)
{
errorcode = ERR11;
ptr--; /* Correct the offset */
goto FAILED;
}
if (ptr < ptrend)
{
if (*ptr == CHAR_D)
Expand Down Expand Up @@ -4430,10 +4443,19 @@ while (ptr < ptrend)
case CHAR_i: *optset |= PCRE2_CASELESS; break;
case CHAR_m: *optset |= PCRE2_MULTILINE; break;
case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break;
case CHAR_r: *xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT; break;
case CHAR_s: *optset |= PCRE2_DOTALL; break;
case CHAR_U: *optset |= PCRE2_UNGREEDY; break;

case CHAR_r:
if ((xoptions & PCRE2_EXTRA_VANILLA_SYNTAX) != 0)
{
errorcode = ERR11;
ptr--; /* Correct the offset */
goto FAILED;
}
*xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT;
break;

/* If x appears twice it sets the extended extended option. */

case CHAR_x:
Expand Down Expand Up @@ -4589,6 +4611,11 @@ while (ptr < ptrend)
/* ---- Callout with numerical or string argument ---- */

case CHAR_C:
if ((xoptions & PCRE2_EXTRA_VANILLA_SYNTAX) != 0)
{
errorcode = ERR11;
goto FAILED;
}
if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS;

/* If the previous item was a condition starting (?(? an assertion,
Expand Down Expand Up @@ -4763,7 +4790,8 @@ while (ptr < ptrend)

else if (ptrend - ptr >= 10 &&
PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
ptr[7] != CHAR_RIGHT_PARENTHESIS)
ptr[7] != CHAR_RIGHT_PARENTHESIS &&
(xoptions & PCRE2_EXTRA_VANILLA_SYNTAX) == 0)
{
uint32_t ge = 0;
int major = 0;
Expand Down Expand Up @@ -4897,7 +4925,12 @@ while (ptr < ptrend)
goto POST_ASSERTION;

case CHAR_ASTERISK:
POSITIVE_NONATOMIC_LOOK_AHEAD: /* Come from (?* */
if ((xoptions & PCRE2_EXTRA_VANILLA_SYNTAX) != 0)
{
errorcode = ERR11;
goto FAILED;
}
POSITIVE_NONATOMIC_LOOK_AHEAD: /* Come from (*napla: */
*parsed_pattern++ = META_LOOKAHEAD_NA;
ptr++;
goto POST_ASSERTION;
Expand All @@ -4918,7 +4951,8 @@ while (ptr < ptrend)
if (ptrend - ptr <= 1 ||
(ptr[1] != CHAR_EQUALS_SIGN &&
ptr[1] != CHAR_EXCLAMATION_MARK &&
ptr[1] != CHAR_ASTERISK))
ptr[1] != CHAR_ASTERISK) ||
(ptr[1] == CHAR_ASTERISK && (xoptions & PCRE2_EXTRA_VANILLA_SYNTAX) != 0))
{
terminator = CHAR_GREATER_THAN_SIGN;
goto DEFINE_NAME;
Expand Down Expand Up @@ -10493,11 +10527,11 @@ for (i = 0; i < 10; i++) cb.small_ref_offset[i] = PCRE2_UNSET;

/* --------------- Start looking at the pattern --------------- */

/* Unless PCRE2_LITERAL is set, check for global one-time option settings at
the start of the pattern, and remember the offset to the actual regex. With
valgrind support, make the terminator of a zero-terminated pattern
inaccessible. This catches bugs that would otherwise only show up for
non-zero-terminated patterns. */
/* Unless PCRE2_LITERAL or PCRE2_EXTRA_VANILLA_SYNTAX is set, check for
global one-time option settings at the start of the pattern, and remember
the offset to the actual regex. With valgrind support, make the terminator
of a zero-terminated pattern inaccessible. This catches bugs that would
otherwise only show up for non-zero-terminated patterns. */

#ifdef SUPPORT_VALGRIND
if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1));
Expand All @@ -10506,7 +10540,8 @@ if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1));
ptr = pattern;
skipatstart = 0;

if ((options & PCRE2_LITERAL) == 0)
if ((options & PCRE2_LITERAL) == 0 &&
(ccontext->extra_options & PCRE2_EXTRA_VANILLA_SYNTAX) == 0)
{
while (patlen - skipatstart >= 2 &&
ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
Expand Down
1 change: 1 addition & 0 deletions src/pcre2test.c
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,7 @@ static modstruct modlist[] = {
{ "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
{ "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
{ "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
{ "vanilla_syntax", MOD_CTC, MOD_OPT, PCRE2_EXTRA_VANILLA_SYNTAX, CO(extra_options) },
{ "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
};

Expand Down
113 changes: 113 additions & 0 deletions testdata/testinput2
Original file line number Diff line number Diff line change
Expand Up @@ -6392,4 +6392,117 @@ a)"xI

# --------------

# "Vanilla" syntax option

# -- start of pattern

/(*NOTEMPTY)a/
a

/(*NtOMeTpY)a/

/(*NOTEMPTY)a/vanilla_syntax

# -- inline pattern options

/Z(?a)Z(?a:Z)Z(?^a)Z(?x-a)Z(?r)Z/
ZZZZZZZ

/Z(?F)Z/

/Z(?a)Z/vanilla_syntax

/Z(?a:Z)Z/vanilla_syntax

/Z(?^a)Z/vanilla_syntax

/Z(?x-a)Z/vanilla_syntax

/Z(?r)Z/vanilla_syntax

/Z(?i:z)Z/vanilla_syntax
ZZZ

# -- version detection

/(?(VERSION>=10.4)yes|no)/
yes

/(?(vReSoIn>=10.4)yes|no)/

/(?(VERSION>=10.4)yes|no)/vanilla_syntax

# -- callouts

/A(?C0)B/
AB

/A(?F0)B/

/A(?C0)B/vanilla_syntax

# -- scan_substring

/\b(\w++)(*scs:(1).+rh)/
>myrrh<

/\b(\w++)(*szz:(1).+rh)/

/\b(\w++)(*scs:(1).+rh)/vanilla_syntax

/\b(\w++)(*scan_substring:(1).+rh)/
>myrrh<

/\b(\w++)(*szzz_zzzzzzzzz:(1).+rh)/

/\b(\w++)(*scan_substring:(1).+rh)/vanilla_syntax

# -- napla

/(*napla:(a{1,2}))\1\1/
aa

/(*nzzzz:(a{1,2}))\1\1/

/(*napla:(a{1,2}))\1\1/vanilla_syntax

/(*non_atomic_positive_lookahead:(a{1,2}))\1\1/
aa

/(*nzz_zzzzzz_positive_lookahead:(a{1,2}))\1\1/

/(*non_atomic_positive_lookahead:(a{1,2}))\1\1/vanilla_syntax

/(?*(a{1,2}))\1\1/
aa

/(?;(a{1,2}))\1\1/

/(?*(a{1,2}))\1\1/vanilla_syntax

# -- naplb

/^..(*naplb:a(b)|(a)b)\2\2/
abaa

/^..(*nzzzz:a(b)|(a)b)\2\2/

/^..(*naplb:a(b)|(a)b)\2\2/vanilla_syntax

/^..(*non_atomic_positive_lookbehind:a(b)|(a)b)\2\2/
abaa

/^..(*nzz_zzzzzz_positive_lookbehind:a(b)|(a)b)\2\2/

/^..(*non_atomic_positive_lookbehind:a(b)|(a)b)\2\2/vanilla_syntax

/^..(?<*a(b)|(a)b)\2\2/
abaa

/^..(?<;a(b)|(a)b)\2\2/

/^..(?<*a(b)|(a)b)\2\2/vanilla_syntax

# --------------

# End of testinput2
Loading

0 comments on commit f3d23c0

Please sign in to comment.