From 24a3e5e974a6aa034ecf9b9e137e50b7c26c6992 Mon Sep 17 00:00:00 2001 From: Alex Kenion Date: Fri, 26 Jul 2024 14:32:20 -0400 Subject: [PATCH 1/2] Fixed encoding handling when parsing PHP files --- wordfence/cli/vulnscan/reporting.py | 5 +- wordfence/cli/vulnscan/vulnscan.py | 5 +- wordfence/intel/vulnerabilities.py | 2 +- wordfence/php/lexing.py | 354 ++++++++++++++-------------- wordfence/php/parsing.py | 148 ++++++------ wordfence/util/versioning.py | 51 ++-- wordfence/wordpress/site.py | 6 +- 7 files changed, 292 insertions(+), 279 deletions(-) diff --git a/wordfence/cli/vulnscan/reporting.py b/wordfence/cli/vulnscan/reporting.py index 7c0f04b2..962cc515 100644 --- a/wordfence/cli/vulnscan/reporting.py +++ b/wordfence/cli/vulnscan/reporting.py @@ -22,7 +22,7 @@ class VulnScanReportColumn(ReportColumnEnum): SOFTWARE_TYPE = 'software_type', lambda record: record.software.type.value SLUG = 'slug', lambda record: record.software.slug - VERSION = 'version', lambda record: record.software.version + VERSION = 'version', lambda record: record.software.version.decode('ascii') ID = 'id', \ lambda record: record.vulnerability.identifier TITLE = 'title', lambda record: record.vulnerability.title @@ -92,6 +92,7 @@ def get_severity_color(self, severity: str) -> str: def format_record(self, record) -> str: vuln = record.vulnerability sw = record.software + sw_version = sw.version.decode('ascii') yellow = escape(color=Color.YELLOW) link = vuln.get_wordfence_link() blue = escape(color=Color.BLUE) @@ -115,7 +116,7 @@ def format_record(self, record) -> str: info_message = '' return ( f'{yellow}Found {severity_message}{info_message}vulnerability ' - f'{vuln.title} in {sw.slug}({sw.version})\n' + f'{vuln.title} in {sw.slug}({sw_version})\n' f'{white}Details: {blue}{link}{RESET}' ) diff --git a/wordfence/cli/vulnscan/vulnscan.py b/wordfence/cli/vulnscan/vulnscan.py index a0f88551..a749ef3a 100644 --- a/wordfence/cli/vulnscan/vulnscan.py +++ b/wordfence/cli/vulnscan/vulnscan.py @@ -111,7 +111,10 @@ def _scan( raise log.debug('Located WordPress files at ' + os.fsdecode(site.core_path)) version = site.get_version() - log.debug(f'WordPress Core Version: {version}') + log.debug( + 'WordPress Core Version: ' + + version.decode('ascii', 'replace') + ) if scan_path is None: scan_path = path scanner.scan_core(version, scan_path) diff --git a/wordfence/intel/vulnerabilities.py b/wordfence/intel/vulnerabilities.py index eb3df553..bdd29c3a 100644 --- a/wordfence/intel/vulnerabilities.py +++ b/wordfence/intel/vulnerabilities.py @@ -46,7 +46,7 @@ class SoftwareType(str, Enum): class ScannableSoftware: type: SoftwareType slug: str - version: str + version: bytes scan_path: Optional[str] def get_key(self) -> str: diff --git a/wordfence/php/lexing.py b/wordfence/php/lexing.py index 41b973c9..ce316a70 100644 --- a/wordfence/php/lexing.py +++ b/wordfence/php/lexing.py @@ -2,7 +2,7 @@ from collections import deque from enum import Enum, auto -from typing import Generator, IO, Optional, Union, Set +from typing import Generator, BinaryIO, Optional, Union, Set class LexingException(Exception): @@ -18,11 +18,11 @@ class MatchType(Enum): class TokenMatcher: - def match(self, value: str) -> MatchType: + def match(self, value: bytes) -> MatchType: return MatchType.NONE # TODO: Find a more efficient algorithm for string end matching - def match_at_end(self, value: str) -> MatchType: + def match_at_end(self, value: bytes) -> MatchType: match_length = 1 total_length = len(value) while match_length <= total_length: @@ -34,7 +34,7 @@ def match_at_end(self, value: str) -> MatchType: return MatchType.NONE -def match_literal(literal: str, value: str) -> MatchType: +def match_literal(literal: bytes, value: bytes) -> MatchType: if value == literal: return MatchType.FINAL_MATCH elif len(value) < len(literal) and literal.find(value) == 0: @@ -45,16 +45,16 @@ def match_literal(literal: str, value: str) -> MatchType: class LiteralTokenMatcher(TokenMatcher): - def __init__(self, value: str): + def __init__(self, value: bytes): self.value = value - def match(self, value: str) -> MatchType: + def match(self, value: bytes) -> MatchType: return match_literal(self.value, value) class WhitespaceTokenMatcher(TokenMatcher): - def match(self, value: str) -> MatchType: + def match(self, value: bytes) -> MatchType: # TODO: Does this match PHP's definition of whitespace? if value.isspace(): return MatchType.MATCH @@ -63,17 +63,17 @@ def match(self, value: str) -> MatchType: class OpenTagTokenMatcher(TokenMatcher): - def match(self, value: str) -> MatchType: + def match(self, value: bytes) -> MatchType: # TODO: Handle tag variations - return match_literal('' +DOC_COMMENT_START = b'/*' +DOC_COMMENT_END = b'*/' +COMMENT_START = b'//' +ALTERNATE_COMMENT_START = b'#' +COMMENT_END = b'\n' +CLOSING_TAG = b'?>' POSSIBLE_COMMENT_STARTS = { COMMENT_START, ALTERNATE_COMMENT_START @@ -86,12 +86,12 @@ def match(self, value: str) -> MatchType: class EnclosedTokenMatcher(TokenMatcher): - def __init__(self, start: str, end: str): + def __init__(self, start: bytes, end: bytes): self.start = start self.end = end self.end_length = len(end) - def match(self, value: str) -> MatchType: + def match(self, value: bytes) -> MatchType: if value.find(self.start) == 0: if value.find(self.end) == \ len(value) - self.end_length: @@ -114,7 +114,7 @@ def __init__(self): class CommentTokenMatcher(TokenMatcher): - def match(self, value: str) -> MatchType: + def match(self, value: bytes) -> MatchType: for start in POSSIBLE_COMMENT_STARTS: if value.find(start) == 0: for end in POSSIBLE_COMMENT_ENDS: @@ -126,14 +126,14 @@ def match(self, value: str) -> MatchType: return MatchType.NONE -VARIABLE_PREFIX = '$' -IDENTIFIER_PATTERN = re.compile(r'^[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*$') +VARIABLE_PREFIX = b'$' +IDENTIFIER_PATTERN = re.compile(br'^[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*$') class VariableTokenMatcher(TokenMatcher): - def match(self, value: str) -> MatchType: - if value[0] == VARIABLE_PREFIX: + def match(self, value: bytes) -> MatchType: + if value[0:1] == VARIABLE_PREFIX: if IDENTIFIER_PATTERN.fullmatch(value[1:]) is not None: return MatchType.MATCH if len(value) == 1: @@ -143,37 +143,29 @@ def match(self, value: str) -> MatchType: class IdentifierTokenMatcher(TokenMatcher): - def match(self, value: str) -> MatchType: + def match(self, value: bytes) -> MatchType: if IDENTIFIER_PATTERN.fullmatch(value): return MatchType.MATCH return MatchType.NONE -class SingleCharacterTokenMatcher(TokenMatcher): - - def match(self, value: str) -> MatchType: - if len(value) == 1: - return MatchType.MATCH - return MatchType.NONE - - STRING_QUOTES = { - '"', - "'" + b'"', + b"'" } -STRING_ESCAPE = "\\" +STRING_ESCAPE = b"\\" class StringLiteralTokenMatcher(TokenMatcher): - def match(self, value: str) -> MatchType: - quote = value[0] + def match(self, value: bytes) -> MatchType: + quote = value[0:1] if quote in STRING_QUOTES: escaped = None length = len(value) end = length - 1 for index in range(1, length): - character = value[index] + character = value[index:index+1] if escaped is None: escaped = False if character == quote and not escaped: @@ -186,12 +178,12 @@ def match(self, value: str) -> MatchType: return MatchType.NONE -INTEGER_PATTERN = re.compile('^[0-9]+$') +INTEGER_PATTERN = re.compile(b'^[0-9]+$') class IntegerLiteralTokenMatcher(TokenMatcher): - def match(self, value: str) -> MatchType: + def match(self, value: bytes) -> MatchType: # TODO: Support alternate integer syntaxes if INTEGER_PATTERN.match(value) is not None: return MatchType.MATCH @@ -200,13 +192,13 @@ def match(self, value: str) -> MatchType: class UnmatchingTokenMatcher(TokenMatcher): - def match(self, value: str) -> MatchType: + def match(self, value: bytes) -> MatchType: return MatchType.NONE class CharacterTokenMatcher(TokenMatcher): - def match(self, value: str) -> MatchType: + def match(self, value: bytes) -> MatchType: if len(value) == 1: return MatchType.FINAL_MATCH return MatchType.NONE @@ -239,130 +231,130 @@ class TokenType(Enum): LNUMBER = IntegerLiteralTokenMatcher(), # Literal token types - INCLUDE_ONCE = _literal('include_once'), - INCLUDE = _literal('include'), - EVAL = _literal('eval'), - REQUIRE_ONCE = _literal('require_once'), - REQUIRE = _literal('require'), - LOGICAL_OR = _literal('or'), - LOGICAL_XOR = _literal('xor'), - LOGICAL_AND = _literal('and'), - PRINT = _literal('print'), - YIELD = _literal('yield'), - YIELD_FROM = _literal('yield from'), - INSTANCEOF = _literal('instanceof'), - NEW = _literal('new'), - CLONE = _literal('clone'), - EXIT = _literal('exit'), - IF = _literal('if'), - ELSEIF = _literal('elseif'), - ELSE = _literal('else'), - ENDIF = _literal('endif'), - ECHO = _literal('echo'), - DO = _literal('do'), - WHILE = _literal('while'), - ENDWHILE = _literal('endwhile'), - FOREACH = _literal('foreach'), - FOR = _literal('for'), - ENDFOR = _literal('endfor'), - ENDFOREACH = _literal('endforeach'), - DECLARE = _literal('declare'), - ENDDECLARE = _literal('enddeclare'), - AS = _literal('as'), - SWITCH = _literal('switch'), - ENDSWITCH = _literal('endswitch'), - CASE = _literal('case'), - DEFAULT = _literal('default'), - MATCH = _literal('match'), - BREAK = _literal('break'), - CONTINUE = _literal('continue'), - GOTO = _literal('goto'), - FUNCTION = _literal('function'), - FN = _literal('fn'), - CONST = _literal('const'), - RETURN = _literal('return'), - TRY = _literal('try'), - CATCH = _literal('catch'), - FINALLY = _literal('finally'), - THROW = _literal('throw'), - USE = _literal('use'), - INSTEADOF = _literal('insteadof'), - GLOBAL = _literal('global'), - STATIC = _literal('static'), - ABSTRACT = _literal('abstract'), - FINAL = _literal('final'), - PRIVATE = _literal('private'), - PROTECTED = _literal('protected'), - PUBLIC = _literal('public'), - READONLY = _literal('readonly'), - VAR = _literal('var'), - UNSET = _literal('unset'), - ISSET = _literal('isset'), - EMPTY = _literal('empty'), - HALT_COMPILER = _literal('__halt_compiler'), - CLASS = _literal('class'), - TRAIT = _literal('trait'), - INTERFACE = _literal('interface'), - ENUM = _literal('enum'), - EXTENDS = _literal('extends'), - IMPLEMENTS = _literal('implements'), - NAMESPACE = _literal('namespace'), - LIST = _literal('list'), - ARRAY = _literal('array'), - CALLABLE = _literal('callable'), - LINE = _literal('__LINE__'), - FILE = _literal('__FILE__'), - DIR = _literal('__DIR__'), - CLASS_C = _literal('__CLASS__'), - TRAIT_C = _literal('__TRAIT__'), - METHOD_C = _literal('__METHOD__'), - FUNC_C = _literal('__FUNCTION__'), - NS_C = _literal('__NAMESPACE__'), - PLUS_EQUAL = _literal('+='), - MINUS_EQUAL = _literal('-='), - MUL_EQUAL = _literal('*='), - DIV_EQUAL = _literal('/='), - CONCAT_EQUAL = _literal('.='), - MOD_EQUAL = _literal('%='), - AND_EQUAL = _literal('&='), - OR_EQUAL = _literal('|='), - XOR_EQUAL = _literal('^='), - SL_EQUAL = _literal('<<='), - SR_EQUAL = _literal('>>='), - COALESCE_EQUAL = _literal('??='), - BOOLEAN_OR = _literal('||'), - BOOLEAN_AND = _literal('&&'), - IS_IDENTICAL = _literal('==='), - IS_NOT_IDENTICAL = _literal('!=='), - IS_SMALLER_OR_EQUAL = _literal('<='), - IS_GREATER_OR_EQUAL = _literal('>='), - SPACESHIP = _literal('<=>'), - IS_EQUAL = _literal('=='), - IS_NOT_EQUAL = _literal('!='), - SL = _literal('<<'), - SR = _literal('>>'), - INC = _literal('++'), - DEC = _literal('--'), - INT_CAST = _literal('(int)'), - DOUBLE_CAST = _literal('(double)'), - STRING_CAST = _literal('(string)'), - ARRAY_CAST = _literal('(array)'), - OBJECT_CAST = _literal('(object)'), - BOOL_CAST = _literal('(bool)'), - UNSET_CAST = _literal('(unset)'), - OBJECT_OPERATOR = _literal('->'), - NULLSAFE_OBJECT_OPERATOR = _literal('?->'), - DOUBLE_ARROW = _literal('=>'), - DOLLAR_OPEN_CURLY_BRACES = _literal('${'), - CURLY_OPEN = _literal('{$'), - PAAMAYIM_NEKUDOTAYIM = _literal('::'), - NS_SEPARATOR = _literal('\\'), - ELLIPSIS = _literal('...'), - COALESCE = _literal('??'), - POW = _literal('**'), - POW_EQUAL = _literal('**='), - ATTRIBUTE = _literal('#['), - OPEN_TAG_WITH_ECHO = _literal('>='), + COALESCE_EQUAL = _literal(b'??='), + BOOLEAN_OR = _literal(b'||'), + BOOLEAN_AND = _literal(b'&&'), + IS_IDENTICAL = _literal(b'==='), + IS_NOT_IDENTICAL = _literal(b'!=='), + IS_SMALLER_OR_EQUAL = _literal(b'<='), + IS_GREATER_OR_EQUAL = _literal(b'>='), + SPACESHIP = _literal(b'<=>'), + IS_EQUAL = _literal(b'=='), + IS_NOT_EQUAL = _literal(b'!='), + SL = _literal(b'<<'), + SR = _literal(b'>>'), + INC = _literal(b'++'), + DEC = _literal(b'--'), + INT_CAST = _literal(b'(int)'), + DOUBLE_CAST = _literal(b'(double)'), + STRING_CAST = _literal(b'(string)'), + ARRAY_CAST = _literal(b'(array)'), + OBJECT_CAST = _literal(b'(object)'), + BOOL_CAST = _literal(b'(bool)'), + UNSET_CAST = _literal(b'(unset)'), + OBJECT_OPERATOR = _literal(b'->'), + NULLSAFE_OBJECT_OPERATOR = _literal(b'?->'), + DOUBLE_ARROW = _literal(b'=>'), + DOLLAR_OPEN_CURLY_BRACES = _literal(b'${'), + CURLY_OPEN = _literal(b'{$'), + PAAMAYIM_NEKUDOTAYIM = _literal(b'::'), + NS_SEPARATOR = _literal(b'\\'), + ELLIPSIS = _literal(b'...'), + COALESCE = _literal(b'??'), + POW = _literal(b'**'), + POW_EQUAL = _literal(b'**='), + ATTRIBUTE = _literal(b'#['), + OPEN_TAG_WITH_ECHO = _literal(b' MatchType: + def match(self, value: bytes) -> MatchType: return self.matcher.match(value) - def match_at_end(self, value: str) -> MatchType: + def match_at_end(self, value: bytes) -> MatchType: return self.matcher.match_at_end(value) -class CharacterType(str, Enum): - EQUALS = '=', - SEMICOLON = ';', - OPEN_PARENTHESIS = '(', - CLOSE_PARENTHESIS = ')', - COMMA = ',', - OPEN_BRACE = '{', - CLOSE_BRACE = '}' +class CharacterType(bytes, Enum): + EQUALS = b'=', + SEMICOLON = b';', + OPEN_PARENTHESIS = b'(', + CLOSE_PARENTHESIS = b')', + COMMA = b',', + OPEN_BRACE = b'{', + CLOSE_BRACE = b'}' class Token: - def __init__(self, type: TokenType, value: str): + def __init__(self, type: TokenType, value: bytes): self.type = type self.value = value @@ -418,13 +410,13 @@ def is_closing_parenthesis(self) -> bool: def is_comma(self) -> bool: return self.is_character(CharacterType.COMMA) - def __repr__(self) -> str: + def __repr__(self) -> bytes: return f'{self.type.name} ({self.value})' class Lexer: - def __init__(self, stream: IO, chunk_size: int = 4096): + def __init__(self, stream: BinaryIO, chunk_size: int = 4096): self.chunks = deque() self.chunk_size = chunk_size self.chunk_offset = 0 @@ -450,7 +442,7 @@ def step(self) -> bool: return False return True - def get_current(self) -> str: + def get_current(self) -> bytes: components = [] remaining = self.position - self.offset remaining_offset = self.offset @@ -465,7 +457,7 @@ def get_current(self) -> str: if chunk_length >= remaining: break remaining -= chunk_length - return ''.join(components) + return b''.join(components) def step_backwards(self) -> None: self.position -= 1 @@ -547,7 +539,7 @@ def get_next_token(self) -> Optional[Token]: return token -def lex(stream: IO) -> Generator[Token, None, None]: +def lex(stream: BinaryIO) -> Generator[Token, None, None]: lexer = Lexer(stream) while (token := lexer.get_next_token()) is not None: yield token diff --git a/wordfence/php/parsing.py b/wordfence/php/parsing.py index d129ba9a..332f19eb 100644 --- a/wordfence/php/parsing.py +++ b/wordfence/php/parsing.py @@ -43,11 +43,17 @@ class PhpStateType: pass +def make_strings_binary(value: Any) -> Any: + if isinstance(value, str): + return value.encode('ascii', 'ignore') + return value + + class PhpType(Enum): - STRING = str, + STRING = bytes, INTEGER = int, ARRAY = List, - NULL = None + NULL = type(None) def is_valid_value(self, value: Any) -> bool: return value is self.value or isinstance(value, self.value) @@ -55,16 +61,19 @@ def is_valid_value(self, value: Any) -> bool: def validate(self, value: Any) -> None: if not self.is_valid_value(value): raise EvaluationException( - f'Value {value} is not valid for type {self.name}' + 'Value ' + repr(value) + + f' is not valid for type {self.name} ({self.value})' ) @classmethod def for_python_value(cls, value: Any): + value = make_strings_binary(value) for type in cls: if type.is_valid_value(value): return type raise ImplementationException( - 'Python type does not have a corresponding PHP type' + 'Python value does not have a corresponding PHP type: ' + + repr(value) ) @@ -81,6 +90,7 @@ def __init__( @classmethod def for_python_value(cls, value: Any): + value = make_strings_binary(value) type = PhpType.for_python_value(value) return cls( type, @@ -102,7 +112,7 @@ class PhpName: def __init__( self, - components: List[str], + components: List[bytes], base=None ): self.components = components @@ -114,16 +124,16 @@ class PhpEntity: def __init__(self): self.comments = [] - def attach_comment(self, comment: str) -> None: + def attach_comment(self, comment: bytes) -> None: self.comments.append(comment) - def attach_comments(self, comments: List[str]) -> None: + def attach_comments(self, comments: List[bytes]) -> None: self.comments.extend(comments) class PhpIdentifiedEntity(PhpEntity): - def __init__(self, name: str): + def __init__(self, name: bytes): super().__init__() self.name = name @@ -135,7 +145,7 @@ class PhpVariable(PhpIdentifiedEntity): def __init__( self, - name: str, + name: bytes, value: PhpValue, ): super().__init__(name) @@ -192,10 +202,10 @@ def evaluate(self, state: PhpStateType): return self.instruction_group.evaluate(state) -class PhpVisibility(str, Enum): - PRIVATE = 'private' - PROTECTED = 'protected' - PUBLIC = 'public' +class PhpVisibility(bytes, Enum): + PRIVATE = b'private' + PROTECTED = b'protected' + PUBLIC = b'public' @classmethod def for_token_type(cls, token_type: TokenType): @@ -209,11 +219,11 @@ def for_token_type(cls, token_type: TokenType): return None -class PhpModifier(str, Enum): - ABSTRACT = 'abstract' - STATIC = 'static' - FINAL = 'final' - READONLY = 'readonly' +class PhpModifier(bytes, Enum): + ABSTRACT = b'abstract' + STATIC = b'static' + FINAL = b'final' + READONLY = b'readonly' @classmethod def for_token_type(cls, token_type: TokenType): @@ -243,7 +253,7 @@ class PhpClassMember(PhpEntity, Evaluable): def __init__( self, - name: str, + name: bytes, modifier_group: Optional[PhpModifierGroup] = None ): self.name = name @@ -262,7 +272,7 @@ class PhpMethod(PhpClassMember): def __init__( self, - name: str, + name: bytes, function: PhpFunction, modifier_group: Optional[PhpModifierGroup] = None ): @@ -278,8 +288,8 @@ class PhpClassConstant(PhpEntity, Evaluable): def __init__( self, - class_name: str, - constant_name: str + class_name: bytes, + constant_name: bytes ): self.class_name = class_name self.constant_name = constant_name @@ -298,7 +308,7 @@ class PhpClass(PhpIdentifiedEntity): def __init__( self, - name: str, + name: bytes, modifier_group: PhpModifierGroup ): super().__init__(name) @@ -313,7 +323,7 @@ def add_property(self, property: PhpProperty) -> None: def add_method(self, method: PhpMethod) -> None: self.methods[method.name] = method - def get_method(self, name: str) -> Optional[PhpMethod]: + def get_method(self, name: bytes) -> Optional[PhpMethod]: try: return self.methods[name] except KeyError: @@ -322,7 +332,7 @@ def get_method(self, name: str) -> Optional[PhpMethod]: def add_constant(self, constant: PhpClassConstant) -> None: self.constants[constant.constant_name] = constant - def get_constant(self, name: str) -> Optional[PhpClassConstant]: + def get_constant(self, name: bytes) -> Optional[PhpClassConstant]: try: return self.constants[name] except KeyError: @@ -333,16 +343,16 @@ class PhpDefinitions: def __init__( self, - base_functions: Dict[str, Callable] = None, - base_classes: Dict[str, PhpClass] = None + base_functions: Dict[bytes, Callable] = None, + base_classes: Dict[bytes, PhpClass] = None ): self.functions = base_functions.copy() self.classes = base_classes.copy() - def define_function(self, name: str, callable: Callable) -> None: + def define_function(self, name: bytes, callable: Callable) -> None: self.functions[name] = callable - def get_function(self, name: str) -> Optional[Callable]: + def get_function(self, name: bytes) -> Optional[Callable]: try: return self.functions[name] except KeyError: @@ -351,7 +361,7 @@ def get_function(self, name: str) -> Optional[Callable]: def define_class(self, definition: PhpClass) -> None: self.classes[definition.name] = definition - def get_class(self, name: str) -> Optional[PhpClass]: + def get_class(self, name: bytes) -> Optional[PhpClass]: try: return self.classes[name] except KeyError: @@ -363,7 +373,7 @@ class PhpScope: def __init__(self): self.variables = {} - def get_variable(self, name: str) -> PhpVariable: + def get_variable(self, name: bytes) -> PhpVariable: try: return self.variables[name] except KeyError: @@ -400,14 +410,14 @@ def __init__( self.options = options if options is not None \ else PhpEvaluationOptions() - def define_constant(self, name: str, value: Any) -> None: + def define_constant(self, name: bytes, value: Any) -> None: if name in self.constants: raise EvaluationException(f'Constant {name} is already defined') self.constants[name] = value def get_constant( self, - name: str, + name: bytes, default_to_name: bool = True ) -> PhpValue: try: @@ -419,18 +429,18 @@ def get_constant( def get_constant_value( self, - name: str, + name: bytes, default_to_name: bool = True ) -> PhpValue: return self.get_constant(name, default_to_name).value - def get_variable(self, name: str) -> PhpVariable: + def get_variable(self, name: bytes) -> PhpVariable: return self.scope.get_variable(name) - def get_variable_value(self, name: str) -> Any: + def get_variable_value(self, name: bytes) -> Any: return self.get_variable(name).value.value - def write_output(self, output: str) -> None: + def write_output(self, output: bytes) -> None: self.output.append(output) @@ -443,13 +453,15 @@ def php_defined(state: PhpState, constant: PhpValue) -> PhpValue: def php_dirname(state: PhpState, path: PhpValue) -> PhpValue: - return PhpValue.for_python_value(os.path.dirname(path.value)) + return PhpValue.for_python_value( + os.fsencode(os.path.dirname(path.value)) + ) BASE_FUNCTIONS = { - 'define': php_define, - 'defined': php_defined, - 'dirname': php_dirname + b'define': php_define, + b'defined': php_defined, + b'dirname': php_dirname } BASE_CLASSES = { } @@ -479,7 +491,7 @@ class PhpUnaryOperator(PhpOperator): def __init__( self, - operator: str, + operator: bytes, callable: Callable[[Any], Any] ): self.operator = operator @@ -490,7 +502,7 @@ def apply(self, value: PhpValue) -> Any: def _register_unary_operator( - operator: str, + operator: bytes, callable: Callable[[Any], Any] ) -> None: instance = PhpUnaryOperator(operator, callable) @@ -498,7 +510,7 @@ def _register_unary_operator( _register_unary_operator( - '!', + b'!', lambda value: PhpValue(value.type, not value.value) ) @@ -507,7 +519,7 @@ class PhpBinaryOperator(PhpOperator): def __init__( self, - operator: str, + operator: bytes, callable: Callable[[Any, Any], Any] ): super().__init__() @@ -519,7 +531,7 @@ def apply(self, left: Any, right: Any) -> Any: def _register_binary_operator( - operator: str, + operator: bytes, callable: Callable[[Any, Any], Any] ): operator_instance = PhpBinaryOperator(operator, callable) @@ -527,48 +539,48 @@ def _register_binary_operator( _register_binary_operator( - '.', + b'.', lambda left, right: PhpValue(left.type, left.value + right.value) ) _register_binary_operator( - '===', + b'===', lambda left, right: left.type is right.type and left.value == right.value ) _register_binary_operator( - '!==', + b'!==', lambda left, right: left.type is not right.type or left.value != right.value ) _register_binary_operator( - '==', + b'==', lambda left, right: left.value == right.value ) _register_binary_operator( - '!=', + b'!=', lambda left, right: left.value != right.value ) _register_binary_operator( - '=', + b'=', lambda left, right: left.assign(right) ) _register_binary_operator( - '>=', + b'>=', lambda left, right: left.value >= right.value ) _register_binary_operator( - '&&', + b'&&', lambda left, right: left.value and right.value ) _register_binary_operator( - '||', + b'||', lambda left, right: left.value or right.value ) class PhpOutput(PhpInstruction): - def __init__(self, content: str): + def __init__(self, content: bytes): super().__init__() self.content = content @@ -624,7 +636,7 @@ def __init__(self): class PhpIdentifier(PhpEntity): - def __init__(self, name: str, parent_name: Optional[str] = None): + def __init__(self, name: bytes, parent_name: Optional[bytes] = None): super().__init__() self.name = name self.parent_name = parent_name @@ -683,7 +695,7 @@ def __init__( def evaluate_path(self, state: PhpState) -> bytes: path = self.path.evaluate(state) - if isinstance(path, str): + if isinstance(path, bytes): return os.fsencode(path) raise EvaluationException( 'Included path is not a string, received: {repr(path)}' @@ -714,7 +726,7 @@ def get_callable(self, state: PhpState) -> Callable: class PhpStaticMethodReference(PhpCallable): - def __init__(self, class_name: str, method_name: str): + def __init__(self, class_name: bytes, method_name: bytes): self.class_name = class_name self.method_name = method_name @@ -733,7 +745,7 @@ class PhpMethodReference(PhpCallable): def __init__( self, - method_name: str + method_name: bytes ): self.method_name = method_name @@ -761,7 +773,7 @@ class PhpPropertyReference(PhpEntity, Evaluable): def __init__( self, - property_name: str + property_name: bytes ): self.property_name = property_name @@ -836,8 +848,8 @@ def __init__( self, expression: PhpExpression, instruction_group: PhpInstructionGroup, - value_name: str, - key_name: Optional[str] + value_name: bytes, + key_name: Optional[bytes] ): self.expression = expression self.instruction_group = instruction_group @@ -983,7 +995,7 @@ def require_character(self, type: CharacterType) -> None: def require_equals(self) -> None: self.require_character(CharacterType.EQUALS) - def take_comments(self) -> List[str]: + def take_comments(self) -> List[bytes]: comments = self.pending_comments self.pending_comments = [] return comments @@ -1029,7 +1041,7 @@ def parse_instruction( def parse_string(self, token: Token) -> PhpLiteral: if token.type != TokenType.CONSTANT_ENCAPSED_STRING: raise ParsingException('Token is not a valid string') - value = token.value[1:-1].replace(STRING_ESCAPE, '') + value = token.value[1:-1].replace(STRING_ESCAPE, b'') return PhpLiteral(PhpType.STRING, value) def parse_integer(self, token: Token) -> PhpLiteral: @@ -1234,7 +1246,7 @@ def parse_expression( expression_level -= 1 return expression - def parse_variable_name(self, token: Token) -> str: + def parse_variable_name(self, token: Token) -> bytes: return token.value[1:] def parse_include( @@ -1575,6 +1587,8 @@ def parse_output( raise TagStateChanged(True) else: break + elif token.type is TokenType.CLOSE_TAG: + raise TagStateChanged(False) else: raise ParsingException(f'Unexpected token: {token.type}') if content is not None: @@ -1632,7 +1646,7 @@ def parse(self, context: PhpContext = None) -> PhpContext: def parse_php_file(path: bytes) -> PhpContext: try: - with open(path, 'r') as stream: + with open(path, 'rb') as stream: metadata = SourceMetadata(path) source = Source(stream, metadata) parser = Parser(source) diff --git a/wordfence/util/versioning.py b/wordfence/util/versioning.py index db9a3401..9085a60a 100644 --- a/wordfence/util/versioning.py +++ b/wordfence/util/versioning.py @@ -2,41 +2,42 @@ from typing import List, Dict, Union, Optional -PHP_VERSION_DELIMITER = '.' -PHP_VERSION_ALTERNATE_DELIMITERS = ['_', '-', '+'] +PHP_VERSION_DELIMITER = b'.' +PHP_VERSION_ALTERNATE_DELIMITERS = [b'_', b'-', b'+'] -NON_NUMBER_PATTERN = re.compile('[^0-9.]+') -NUMBER_PATTERN = re.compile('^[0-9]+$') -REPEATED_DOT_PATTERN = re.compile('\\.{2,}') +NON_NUMBER_PATTERN = re.compile(b'[^0-9.]+') +NUMBER_PATTERN = re.compile(b'^[0-9]+$') +REPEATED_DOT_PATTERN = re.compile(b'\\.{2,}') +# TODO: Convert str to bytes def delimit_non_numbers(version: str) -> str: - return NON_NUMBER_PATTERN.sub(".\\g<0>.", version).strip('.') + return NON_NUMBER_PATTERN.sub(b".\\g<0>.", version).strip(b'.') -def is_number(string: str) -> bool: +def is_number(string: bytes) -> bool: return NUMBER_PATTERN.match(string) is not None -def strip_repeated_delimiters(version: str) -> str: +def strip_repeated_delimiters(version: bytes) -> bytes: return REPEATED_DOT_PATTERN.sub('.', version) LOWER_ALPHA_VERSIONS = [ - ['dev'], - ['alpha', 'a'], - ['beta', 'b'], - ['RC', 'rc'], + [b'dev'], + [b'alpha', b'a'], + [b'beta', b'b'], + [b'RC', b'rc'], ] HIGHER_ALPHA_VERSIONS = [ - ['pl', 'p'] + [b'pl', b'p'] ] TIER_OFFSET = 2 TIER_NUMBER = len(LOWER_ALPHA_VERSIONS) + TIER_OFFSET -def create_alpha_version_map(versions: List[List[str]]) -> Dict[str, int]: +def create_alpha_version_map(versions: List[List[bytes]]) -> Dict[bytes, int]: map = {} for index, tier in enumerate(versions): for version in tier: @@ -48,24 +49,24 @@ def create_alpha_version_map(versions: List[List[str]]) -> Dict[str, int]: HIGHER_ALPHA_VERSIONS_MAP = create_alpha_version_map(HIGHER_ALPHA_VERSIONS) -def get_alpha_tier(string: str, map: Dict[str, int]) -> Optional[int]: +def get_alpha_tier(string: bytes, map: Dict[bytes, int]) -> Optional[int]: try: return map[string] except KeyError: return None -def get_lower_alpha_tier(string: str) -> Optional[int]: +def get_lower_alpha_tier(string: bytes) -> Optional[int]: return get_alpha_tier(string, LOWER_ALPHA_VERSION_MAP) -def get_higher_alpha_tier(string: str) -> Optional[int]: +def get_higher_alpha_tier(string: bytes) -> Optional[int]: return get_alpha_tier(string, HIGHER_ALPHA_VERSIONS_MAP) class PhpVersionComponent: - def __init__(self, value: str): + def __init__(self, value: bytes): self.is_number = is_number(value) if self.is_number: self.value = int(value) @@ -93,23 +94,25 @@ def __str__(self) -> str: return str(self.value) -DefaultComponent = PhpVersionComponent('0') +DefaultComponent = PhpVersionComponent(b'0') class PhpVersion: - def __init__(self, version: str): + def __init__(self, version: Union[str, bytes]): + if isinstance(version, str): + version = version.encode('ascii') self.version = version self._components = self.extract_components(version) - def extract_components(self, version: str) -> List[str]: + def extract_components(self, version: bytes) -> List[bytes]: for character in PHP_VERSION_ALTERNATE_DELIMITERS: version = version.replace(character, PHP_VERSION_DELIMITER) # Note that this also strips leading/trailing delimiters version = strip_repeated_delimiters( delimit_non_numbers(version) ) - return list(map(PhpVersionComponent, version.split('.'))) + return list(map(PhpVersionComponent, version.split(b'.'))) def _get_component(self, index: int) -> PhpVersionComponent: try: @@ -132,8 +135,8 @@ def compare_version_components( def compare_php_versions( - a: Union[PhpVersion, str], - b: Union[PhpVersion, str] + a: Union[PhpVersion, str, bytes], + b: Union[PhpVersion, str, bytes] ) -> int: """ This is intended to mirror PHP's version_compare function """ """ https://www.php.net/manual/en/function.version-compare.php """ diff --git a/wordfence/wordpress/site.py b/wordfence/wordpress/site.py index e1b9df19..e2e82909 100644 --- a/wordfence/wordpress/site.py +++ b/wordfence/wordpress/site.py @@ -251,15 +251,15 @@ def resolve_core_path(self, path: bytes) -> bytes: def resolve_content_path(self, path: bytes) -> bytes: return self._resolve_path(path, self.get_content_directory()) - def _determine_version(self) -> str: + def _determine_version(self) -> bytes: version_path = self.resolve_core_path(b'wp-includes/version.php') context = parse_php_file(version_path) try: state = context.evaluate( options=EVALUATION_OPTIONS ) - version = state.get_variable_value('wp_version') - if isinstance(version, str): + version = state.get_variable_value(b'wp_version') + if isinstance(version, bytes): return version except PhpException as exception: raise WordpressException( From 08ff952c19b36687adeffba6d97de590df88ebdd Mon Sep 17 00:00:00 2001 From: Alex Kenion Date: Mon, 29 Jul 2024 15:19:53 -0400 Subject: [PATCH 2/2] Fixed issue that was breaking unit tests --- wordfence/util/versioning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wordfence/util/versioning.py b/wordfence/util/versioning.py index 9085a60a..59287f23 100644 --- a/wordfence/util/versioning.py +++ b/wordfence/util/versioning.py @@ -21,7 +21,7 @@ def is_number(string: bytes) -> bool: def strip_repeated_delimiters(version: bytes) -> bytes: - return REPEATED_DOT_PATTERN.sub('.', version) + return REPEATED_DOT_PATTERN.sub(b'.', version) LOWER_ALPHA_VERSIONS = [