diff --git a/wordfence/cli/scan/config/config_definitions.py b/wordfence/cli/scan/config/config_definitions.py index 5d4b2336..52439cbc 100644 --- a/wordfence/cli/scan/config/config_definitions.py +++ b/wordfence/cli/scan/config/config_definitions.py @@ -3,6 +3,7 @@ from ..reporting import ReportFormat, ReportColumn from wordfence.cli.config.defaults import INI_DEFAULT_PATH +from wordfence.util.pcre import PCRE_DEFAULT_MATCH_LIMIT, PCRE_DEFAULT_MATCH_LIMIT_RECURSION KIBIBYTE = 1024 MEBIBYTE = 1024 * 1024 @@ -175,12 +176,13 @@ def byte_length(conversion_value: str) -> int: "value_type": byte_length } }, - "max-file-size": { + "scanned-content-limit": { "short_name": "M", - "description": "Files above this limit will not be scanned. Defaults" - " to 50 mebibytes. Use a whole number followed by one" - " config the following suffixes: b (byte), k (kibibyte)" - ", m (mebibyte).", + "description": "The maximum amount of data to scan in each file." + " Content beyond this limit will not be scanned." + " Defaults to 50 mebibytes. Use a whole number followed" + " by one of the following suffixes: b (byte)," + " k (kibibyte), m (mebibyte).", "context": "ALL", "argument_type": "OPTION", "default": byte_length('50m'), @@ -196,23 +198,32 @@ def byte_length(conversion_value: str) -> int: "argument_type": "FLAG", "default": False }, - "workers": { - "short_name": "w", - "description": "Number of worker processes used to perform scanning. " - "Defaults to 1 worker process.", + "pcre-backtrack-limit": { + "description": "The regex backtracking limit for signature evaluation", "context": "ALL", "argument_type": "OPTION", - "default": 1 + "default": PCRE_DEFAULT_MATCH_LIMIT, + "meta": { + "value_type": int + } }, - "backtrack-limit": { - "description": "The backtracking limit for signature evaluation", + "pcre-recursion-limit": { + "description": "The regex recursion limit for signature evaluation", "context": "ALL", "argument_type": "OPTION", - "default": 100000, + "default": PCRE_DEFAULT_MATCH_LIMIT_RECURSION, "meta": { "value_type": int } }, + "workers": { + "short_name": "w", + "description": "Number of worker processes used to perform scanning. " + "Defaults to 1 worker process.", + "context": "ALL", + "argument_type": "OPTION", + "default": 1 + }, "configuration": { "short_name": "c", "description": "Path to a configuration INI file to use (defaults to" diff --git a/wordfence/cli/scan/scan.py b/wordfence/cli/scan/scan.py index f87e9904..00976414 100644 --- a/wordfence/cli/scan/scan.py +++ b/wordfence/cli/scan/scan.py @@ -9,8 +9,7 @@ from wordfence import scanning, api from wordfence.api.licensing import LicenseSpecific from wordfence.scanning import filtering -from wordfence.util import caching -from wordfence.util import updater +from wordfence.util import caching, updater, pcre from wordfence.util.io import StreamReader from wordfence.intel.signatures import SignatureSet from wordfence.logging import log @@ -146,6 +145,13 @@ def _initialize_file_filter(self) -> filtering.FileFilter: filter.add(filtering.filter_images) return filter + def _get_pcre_options(self) -> pcre.PcreOptions: + return pcre.PcreOptions( + caseless=True, + match_limit=self.config.pcre_backtrack_limit, + match_limit_recursion=self.config.pcre_recursion_limit + ) + def execute(self) -> int: if self.config.purge_cache: self.cache.purge() @@ -159,9 +165,10 @@ def execute(self) -> int: workers=int(self.config.workers), signatures=self._get_signatures(), chunk_size=self.config.chunk_size, - max_file_size=int(self.config.max_file_size), + max_file_size=int(self.config.scanned_content_limit), file_filter=self._initialize_file_filter(), - match_all=self.config.match_all + match_all=self.config.match_all, + pcre_options=self._get_pcre_options() ) if self._should_read_stdin(): options.path_source = StreamReader( @@ -213,6 +220,8 @@ def handle_interrupt(signal_number: int, stack) -> None: def display_version() -> None: print(f"Wordfence CLI {__version__}") + jit_support_text = 'Yes' if pcre.HAS_JIT_SUPPORT else 'No' + print(f"PCRE Version: {pcre.VERSION} - JIT Supported: {jit_support_text}") def main(config) -> int: diff --git a/wordfence/scanning/scanner.py b/wordfence/scanning/scanner.py index 74e3d479..3eb33bbe 100644 --- a/wordfence/scanning/scanner.py +++ b/wordfence/scanning/scanner.py @@ -12,7 +12,7 @@ from .filtering import FileFilter, filter_any from ..util import timing from ..util.io import StreamReader -from ..util.pcre import PcreJitStack +from ..util.pcre import PcreOptions, PCRE_DEFAULT_OPTIONS, PcreJitStack from ..intel.signatures import SignatureSet from ..logging import log @@ -36,6 +36,7 @@ class Options: max_file_size: Optional[int] = None file_filter: Optional[FileFilter] = None match_all: bool = False + pcre_options: PcreOptions = PCRE_DEFAULT_OPTIONS class Status(IntEnum): diff --git a/wordfence/util/pcre.py b/wordfence/util/pcre.py index 64a8522d..f8f94eaa 100644 --- a/wordfence/util/pcre.py +++ b/wordfence/util/pcre.py @@ -1,13 +1,30 @@ from ctypes import cdll, c_char_p, c_void_p, c_int, c_ulong, c_ubyte, byref, \ - create_string_buffer, Structure, POINTER + create_string_buffer, Structure, POINTER, CFUNCTYPE +from ctypes.util import find_library from typing import Optional -pcre = cdll.LoadLibrary('libpcre.so') + +class PcreException(Exception): + pass + + +class PcreLibraryNotAvailableException(PcreException): + pass + + +library_name = find_library('pcre') +if library_name is None: + raise PcreLibraryNotAvailableException('Failed to locate libpcre') +try: + pcre = cdll.LoadLibrary(library_name) +except OSError as e: + raise PcreLibraryNotAvailableException('Failed to load libpcre') from e _pcre_version = pcre.pcre_version +_pcre_version.argtypes = [] _pcre_version.restype = c_char_p -VERSION = _pcre_version() +VERSION = _pcre_version().decode('ascii') PCRE_ERROR_NOMATCH = -1 @@ -15,6 +32,7 @@ _pcre_config = pcre.pcre_config +_pcre_config.argtypes = [c_int, c_void_p] _pcre_config.restype = c_int PCRE_CONFIG_JIT = 9 @@ -30,39 +48,24 @@ def _check_jit_support() -> bool: HAS_JIT_SUPPORT = _check_jit_support() -_pcre_compile = pcre.pcre_compile -_pcre_compile.restype = c_void_p - -if HAS_JIT_SUPPORT: - _pcre_jit_stack_alloc = pcre.pcre_jit_stack_alloc - _pcre_jit_stack_alloc.restype = c_void_p - _pcre_jit_stack_free = pcre.pcre_jit_stack_free - - -_pcre_exec = pcre.pcre_exec -_pcre_exec.restype = c_int - -_pcre_jit_exec = pcre.pcre_jit_exec -_pcre_jit_exec.restype = c_int - - -PCRE_EXTRA_MATCH_LIMIT = 0x0012 -PCRE_EXTRA_MATCH_LIMIT_RECURSION = 0x0010 -PCRE_STUDY_JIT_COMPILE = 0x0001 -PCRE_STUDY_EXTRA_NEEDED = 0x0008 -PCRE_CASELESS = 0x00000001 - - -PCRE_JIT_STACK_MIN_SIZE = 32 * 1024 -PCRE_JIT_STACK_MAX_SIZE = 64 * 1024 +class _StructPcre(Structure): + pass -class PcreException(Exception): - pass +_pcre_p = POINTER(_StructPcre) +_pcre_compile = pcre.pcre_compile +_pcre_compile.argtypes = [ + c_char_p, + c_int, + POINTER(c_char_p), + POINTER(c_int), + POINTER(c_ubyte) + ] +_pcre_compile.restype = _pcre_p -class PcreExtra(Structure): +class _StructPcreExtra(Structure): _fields_ = [ ('flags', c_ulong), ('study_data', c_void_p), @@ -75,11 +78,76 @@ class PcreExtra(Structure): ] +_pcre_extra_p = POINTER(_StructPcreExtra) _pcre_study = pcre.pcre_study -_pcre_study.restype = POINTER(PcreExtra) +_pcre_study.argtypes = [_pcre_p, c_int, POINTER(c_char_p)] +_pcre_study.restype = _pcre_extra_p _pcre_free_study = pcre.pcre_free_study +_pcre_free_study.argtypes = [_pcre_extra_p] +_pcre_free_study.restype = None + + +_pcre_exec = pcre.pcre_exec +_pcre_exec.argtypes = [ + _pcre_p, + _pcre_extra_p, + c_char_p, + c_int, + c_int, + c_int, + POINTER(c_int), + c_int + ] +_pcre_exec.restype = c_int + + +_pcre_free_address = c_void_p.in_dll(pcre, 'pcre_free').value +_pcre_free_prototype = CFUNCTYPE(None, c_void_p) +_pcre_free = _pcre_free_prototype(_pcre_free_address) + + +if HAS_JIT_SUPPORT: + class _StructPcreJitStack(Structure): + pass + + _pcre_jit_stack_p = POINTER(_StructPcreJitStack) + + _pcre_jit_stack_alloc = pcre.pcre_jit_stack_alloc + _pcre_jit_stack_alloc.argtypes = [c_int, c_int] + _pcre_jit_stack_alloc.restype = _pcre_jit_stack_p + + _pcre_jit_stack_free = pcre.pcre_jit_stack_free + _pcre_jit_stack_free.argtypes = [_pcre_jit_stack_p] + _pcre_jit_stack_free.restype = None + + _pcre_jit_exec = pcre.pcre_jit_exec + _pcre_jit_exec.argtypes = [ + _pcre_p, + _pcre_extra_p, + c_char_p, + c_int, + c_int, + c_int, + c_void_p, + c_int, + _pcre_jit_stack_p + ] + _pcre_jit_exec.restype = c_int + + +PCRE_EXTRA_MATCH_LIMIT = 0x0012 +PCRE_EXTRA_MATCH_LIMIT_RECURSION = 0x0010 +PCRE_STUDY_JIT_COMPILE = 0x0001 +PCRE_STUDY_EXTRA_NEEDED = 0x0008 +PCRE_CASELESS = 0x00000001 + + +PCRE_JIT_STACK_MIN_SIZE = 32 * 1024 +PCRE_JIT_STACK_MAX_SIZE = 64 * 1024 +PCRE_DEFAULT_MATCH_LIMIT = 1000000 +PCRE_DEFAULT_MATCH_LIMIT_RECURSION = 100000 class PcreJitStack: @@ -126,26 +194,54 @@ def __init__(self, matched_string: bytes): self.matched_string = matched_string +class PcreOptions: + + def __init__( + self, + caseless: bool = False, + match_limit: int = PCRE_DEFAULT_MATCH_LIMIT, + match_limit_recursion: int = PCRE_DEFAULT_MATCH_LIMIT_RECURSION + ): + self.caseless = caseless + self.match_limit = match_limit + self.match_limit_recursion = match_limit_recursion + self._compilation_options = None + + def _get_compilation_options(self) -> c_int: + if self._compilation_options is None: + options = 0 + if self.caseless: + options |= PCRE_CASELESS + self._compilation_options = c_int(options) + return self._compilation_options + + +PCRE_DEFAULT_OPTIONS = PcreOptions() + + class PcrePattern: - def __init__(self, pattern: str): - self._compile(pattern) + def __init__( + self, + pattern: str, + options: PcreOptions = PCRE_DEFAULT_OPTIONS + ): + self._compile(pattern, options) - def _compile(self, pattern: str) -> c_void_p: + def _compile(self, pattern: str, options: PcreOptions) -> c_void_p: pattern_cstr = c_char_p(pattern.encode('utf8')) - error_buffer = create_string_buffer(100) + error_message = c_char_p(None) error_offset = c_int(-1) - options = c_int(PCRE_CASELESS) self.compiled = _pcre_compile( pattern_cstr, - options, - byref(error_buffer), + options._get_compilation_options(), + byref(error_message), byref(error_offset), None ) if not self.compiled: offset = error_offset.value - message = error_buffer.value + message = error_message.value.decode('utf8') raise PcreException( f'Pattern compilation failed at offset {offset}: {message}' ) @@ -153,13 +249,14 @@ def _compile(self, pattern: str) -> c_void_p: self.extra = _pcre_study( self.compiled, study_options, - byref(error_buffer) + byref(error_message) ) self.extra.flags = c_ulong( PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION ) - self.extra.match_limit = c_ulong(100000) - self.extra.match_limit_recursion = c_ulong(100000) + self.extra.match_limit = c_ulong(options.match_limit) + self.extra.match_limit_recursion = \ + c_ulong(options.match_limit_recursion) def match( self, @@ -215,4 +312,12 @@ def match( return PcreMatch(matched_string) def _free(self) -> None: - _pcre_free_study(self.extra) + if self.extra is not None: + _pcre_free_study(self.extra) + self.extra = None + if self.compiled is not None: + _pcre_free(self.compiled) + self.compiled = None + + def __del__(self) -> None: + self._free()