From b1f42c53cce3845ab6f68ba851e278c48fc1c066 Mon Sep 17 00:00:00 2001 From: Haozhun Jin Date: Thu, 19 Mar 2015 15:15:14 -0700 Subject: [PATCH] Support \p 1-char char classes without curly braces --- src/org/joni/Lexer.java | 22 ++++++++++++++------ src/org/joni/Syntax.java | 7 ++++++- src/org/joni/Token.java | 7 +++++++ src/org/joni/constants/SyntaxProperties.java | 1 + 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/org/joni/Lexer.java b/src/org/joni/Lexer.java index 695e8332..a441e7fd 100644 --- a/src/org/joni/Lexer.java +++ b/src/org/joni/Lexer.java @@ -956,6 +956,11 @@ private void fetchTokenFor_charProperty() { unfetch(); } } + token.setPropSingleChar(false); + } else if (syntax.op2EscPCharCharProperty()) { + token.type = TokenType.CHAR_PROPERTY; + token.setPropNot(c == 'P'); + token.setPropSingleChar(true); } else { syntaxWarn(Warnings.INVALID_UNICODE_PROPERTY, (char)c); } @@ -1252,13 +1257,18 @@ private void possessiveCheck() { protected final int fetchCharPropertyToCType() { mark(); - while (left()) { - int last = p; + if (token.getPropSingleChar()) { fetch(); - if (c == '}') { - return enc.propertyNameToCType(bytes, _p, last); - } else if (c == '(' || c == ')' || c == '{' || c == '|') { - throw new CharacterPropertyException(ERR_INVALID_CHAR_PROPERTY_NAME, bytes, _p, last); + return enc.propertyNameToCType(bytes, _p, p); + } else { + while (left()) { + int last = p; + fetch(); + if (c == '}') { + return enc.propertyNameToCType(bytes, _p, last); + } else if (c == '(' || c == ')' || c == '{' || c == '|') { + throw new CharacterPropertyException(ERR_INVALID_CHAR_PROPERTY_NAME, bytes, _p, last); + } } } newInternalException(ERR_PARSER_BUG); diff --git a/src/org/joni/Syntax.java b/src/org/joni/Syntax.java index f5f9480b..f4aa87d0 100644 --- a/src/org/joni/Syntax.java +++ b/src/org/joni/Syntax.java @@ -286,6 +286,10 @@ public boolean op2QMarkLParenCondition() { return isOp2(OP2_QMARK_LPAREN_CONDITION); } + public boolean op2EscPCharCharProperty() { + return isOp2(OP2_ESC_P_BRACE_CHAR_PROPERTY); + } + /** * BEHAVIOR * @@ -540,7 +544,8 @@ public boolean warnReduntantNestedRepeat() { OP2_OPTION_PERL | OP2_PLUS_POSSESSIVE_REPEAT | OP2_PLUS_POSSESSIVE_INTERVAL | OP2_CCLASS_SET_OP | OP2_ESC_V_VTAB | OP2_ESC_U_HEX4 | - OP2_ESC_P_BRACE_CHAR_PROPERTY ), + OP2_ESC_P_BRACE_CHAR_PROPERTY | + OP2_ESC_P_CHAR_CHAR_PROPERTY), ( GNU_REGEX_BV | DIFFERENT_LEN_ALT_LOOK_BEHIND ), diff --git a/src/org/joni/Token.java b/src/org/joni/Token.java index 321ad91d..06d08f54 100644 --- a/src/org/joni/Token.java +++ b/src/org/joni/Token.java @@ -170,4 +170,11 @@ boolean getPropNot() { void setPropNot(boolean not) { INT2 = not ? 1 : 0; } + + boolean getPropSingleChar() { + return INT3 != 0; + } + void setPropSingleChar(boolean singleChar) { + INT3 = singleChar ? 1 : 0; + } } diff --git a/src/org/joni/constants/SyntaxProperties.java b/src/org/joni/constants/SyntaxProperties.java index d1e2cd39..44a0fea3 100644 --- a/src/org/joni/constants/SyntaxProperties.java +++ b/src/org/joni/constants/SyntaxProperties.java @@ -75,6 +75,7 @@ public interface SyntaxProperties { final int OP2_ESC_H_XDIGIT = (1<<19); /* \h, \H */ final int OP2_INEFFECTIVE_ESCAPE = (1<<20); /* \ */ final int OP2_OPTION_ECMASCRIPT = (1<<21); /* EcmaScript quirks */ + final int OP2_ESC_P_CHAR_CHAR_PROPERTY = (1<<22); /* \pX, \PX */ final int OP2_QMARK_LPAREN_CONDITION = (1<<29); /* (?(cond)yes...|no...) */