Skip to content

Commit

Permalink
Copy regex grammar from hoa/regex into the project
Browse files Browse the repository at this point in the history
  • Loading branch information
Seldaek authored and ondrejmirtes committed Jul 19, 2024
1 parent 9bd84cf commit bacb3c6
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 3 deletions.
198 changes: 198 additions & 0 deletions resources/RegexGrammar.pp
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
//
// Hoa
//
//
// @license
//
// New BSD License
//
// Copyright © 2007-2017, Hoa community. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the Hoa nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Grammar \Hoa\Regex\Grammar.
//
// Provide grammar of PCRE (Perl Compatible Regular Expression)for the LL(k)
// parser. More informations at http://pcre.org/pcre.txt, sections pcrepattern &
// pcresyntax.
//
// @copyright Copyright © 2007-2017 Hoa community.
// @license New BSD License
//


// Skip.
%skip nl \n

// Character classes.
%token negative_class_ \[\^
%token class_ \[
%token _class \]
%token range \-

// Internal options.
%token internal_option \(\?[\-+]?[imsx]\)

// Lookahead and lookbehind assertions.
%token lookahead_ \(\?=
%token negative_lookahead_ \(\?!
%token lookbehind_ \(\?<=
%token negative_lookbehind_ \(\?<!

// Conditions.
%token named_reference_ \(\?\(< -> nc
%token absolute_reference_ \(\?\((?=\d) -> c
%token relative_reference_ \(\?\((?=[\+\-]) -> c
%token c:index [\+\-]?\d+ -> default
%token assertion_reference_ \(\?\(

// Comments.
%token comment_ \(\?# -> co
%token co:_comment \) -> default
%token co:comment .*?(?=(?<!\\)\))

// Capturing group.
%token named_capturing_ \(\?< -> nc
%token nc:_named_capturing > -> default
%token nc:capturing_name .+?(?=(?<!\\)>)
%token non_capturing_ \(\?:
%token non_capturing_reset_ \(\?\|
%token atomic_group_ \(\?>
%token capturing_ \(
%token _capturing \)

// Quantifiers (by default, greedy).
%token zero_or_one_possessive \?\+
%token zero_or_one_lazy \?\?
%token zero_or_one \?
%token zero_or_more_possessive \*\+
%token zero_or_more_lazy \*\?
%token zero_or_more \*
%token one_or_more_possessive \+\+
%token one_or_more_lazy \+\?
%token one_or_more \+
%token exactly_n \{[0-9]+\}
%token n_to_m_possessive \{[0-9]+,[0-9]+\}\+
%token n_to_m_lazy \{[0-9]+,[0-9]+\}\?
%token n_to_m \{[0-9]+,[0-9]+\}
%token n_or_more_possessive \{[0-9]+,\}\+
%token n_or_more_lazy \{[0-9]+,\}\?
%token n_or_more \{[0-9]+,\}

// Alternation.
%token alternation \|

// Literal.
%token character \\([aefnrt]|c[\x00-\x7f])
%token dynamic_character \\([0-7]{3}|x[0-9a-zA-Z]{2}|x{[0-9a-zA-Z]+})
// Please, see PCRESYNTAX(3), General Category properties, PCRE special category
// properties and script names for \p{} and \P{}.
%token character_type \\([CdDhHNRsSvVwWX]|[pP]{[^}]+})
%token anchor \\(bBAZzG)|\^|\$
%token match_point_reset \\K
%token literal \\.|.


// Rules.

#expression:
alternation()

alternation:
concatenation() ( ::alternation:: concatenation() #alternation )*

concatenation:
( internal_options() | assertion() | quantification() | condition() )
( ( internal_options() | assertion() | quantification() | condition() ) #concatenation )*

#internal_options:
<internal_option>

#condition:
(
::named_reference_:: <capturing_name> ::_named_capturing:: #namedcondition
| (
::relative_reference_:: #relativecondition
| ::absolute_reference_:: #absolutecondition
)
<index>
| ::assertion_reference_:: alternation() #assertioncondition
)
::_capturing:: concatenation()?
( ::alternation:: concatenation()? )?
::_capturing::

assertion:
(
::lookahead_:: #lookahead
| ::negative_lookahead_:: #negativelookahead
| ::lookbehind_:: #lookbehind
| ::negative_lookbehind_:: #negativelookbehind
)
alternation() ::_capturing::

quantification:
( class() | simple() ) ( quantifier() #quantification )?

quantifier:
<zero_or_one_possessive> | <zero_or_one_lazy> | <zero_or_one>
| <zero_or_more_possessive> | <zero_or_more_lazy> | <zero_or_more>
| <one_or_more_possessive> | <one_or_more_lazy> | <one_or_more>
| <exactly_n>
| <n_to_m_possessive> | <n_to_m_lazy> | <n_to_m>
| <n_or_more_possessive> | <n_or_more_lazy> | <n_or_more>

#class:
(
::negative_class_:: #negativeclass
| ::class_::
)
( range() | literal() )+
::_class::

#range:
literal() ::range:: literal()

simple:
capturing()
| literal()

capturing:
::comment_:: <comment>? ::_comment:: #comment
| (
::named_capturing_:: <capturing_name> ::_named_capturing:: #namedcapturing
| ::non_capturing_:: #noncapturing
| ::non_capturing_reset_:: #noncapturingreset
| ::atomic_group_:: #atomicgroup
| ::capturing_::
)
alternation() ::_capturing::

literal:
<character>
| <dynamic_character>
| <character_type>
| <anchor>
| <match_point_reset>
| <literal>
2 changes: 1 addition & 1 deletion src/DependencyInjection/ValidateIgnoredErrorsExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public function loadConfiguration(): void
}

/** @throws void */
$parser = Llk::load(new Read('hoa://Library/Regex/Grammar.pp'));
$parser = Llk::load(new Read(__DIR__ . '/../../resources/RegexGrammar.pp'));
$reflectionProvider = new DummyReflectionProvider();
$reflectionProviderProvider = new DirectReflectionProviderProvider($reflectionProvider);
ReflectionProviderStaticAccessor::registerInstance($reflectionProvider);
Expand Down
2 changes: 1 addition & 1 deletion src/Type/Php/RegexArrayShapeMatcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ private function parseGroups(string $regex): ?array
{
if (self::$parser === null) {
/** @throws void */
self::$parser = Llk::load(new Read('hoa://Library/Regex/Grammar.pp'));
self::$parser = Llk::load(new Read(__DIR__ . '/../../resources/RegexGrammar.pp'));
}

try {
Expand Down
2 changes: 1 addition & 1 deletion tests/PHPStan/Command/IgnoredRegexValidatorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ public function testValidate(
bool $expectAllErrorsIgnored,
): void
{
$grammar = new Read('hoa://Library/Regex/Grammar.pp');
$grammar = new Read(__DIR__ . '/../../../resources/RegexGrammar.pp');
$parser = Llk::load($grammar);
$validator = new IgnoredRegexValidator($parser, self::getContainer()->getByType(TypeStringResolver::class));

Expand Down

0 comments on commit bacb3c6

Please sign in to comment.