Skip to content

Commit

Permalink
Fixed seg faults in PCRE module, made ctypes usage safer, further adj…
Browse files Browse the repository at this point in the history
…usted config options
  • Loading branch information
akenion committed Aug 22, 2023
1 parent c693e45 commit c00e5a9
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 62 deletions.
37 changes: 24 additions & 13 deletions wordfence/cli/scan/config/config_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from ..reporting import ReportFormat, ReportColumn
from wordfence.cli.config.defaults import INI_DEFAULT_PATH
from wordfence.util.pcre import PCRE_DEFAULT_MATCH_LIMIT, PCRE_DEFAULT_MATCH_LIMIT_RECURSION

KIBIBYTE = 1024
MEBIBYTE = 1024 * 1024
Expand Down Expand Up @@ -175,12 +176,13 @@ def byte_length(conversion_value: str) -> int:
"value_type": byte_length
}
},
"max-file-size": {
"scanned-content-limit": {
"short_name": "M",
"description": "Files above this limit will not be scanned. Defaults"
" to 50 mebibytes. Use a whole number followed by one"
" config the following suffixes: b (byte), k (kibibyte)"
", m (mebibyte).",
"description": "The maximum amount of data to scan in each file."
" Content beyond this limit will not be scanned."
" Defaults to 50 mebibytes. Use a whole number followed"
" by one of the following suffixes: b (byte),"
" k (kibibyte), m (mebibyte).",
"context": "ALL",
"argument_type": "OPTION",
"default": byte_length('50m'),
Expand All @@ -196,23 +198,32 @@ def byte_length(conversion_value: str) -> int:
"argument_type": "FLAG",
"default": False
},
"workers": {
"short_name": "w",
"description": "Number of worker processes used to perform scanning. "
"Defaults to 1 worker process.",
"pcre-backtrack-limit": {
"description": "The regex backtracking limit for signature evaluation",
"context": "ALL",
"argument_type": "OPTION",
"default": 1
"default": PCRE_DEFAULT_MATCH_LIMIT,
"meta": {
"value_type": int
}
},
"backtrack-limit": {
"description": "The backtracking limit for signature evaluation",
"pcre-recursion-limit": {
"description": "The regex recursion limit for signature evaluation",
"context": "ALL",
"argument_type": "OPTION",
"default": 100000,
"default": PCRE_DEFAULT_MATCH_LIMIT_RECURSION,
"meta": {
"value_type": int
}
},
"workers": {
"short_name": "w",
"description": "Number of worker processes used to perform scanning. "
"Defaults to 1 worker process.",
"context": "ALL",
"argument_type": "OPTION",
"default": 1
},
"configuration": {
"short_name": "c",
"description": "Path to a configuration INI file to use (defaults to"
Expand Down
17 changes: 13 additions & 4 deletions wordfence/cli/scan/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
from wordfence import scanning, api
from wordfence.api.licensing import LicenseSpecific
from wordfence.scanning import filtering
from wordfence.util import caching
from wordfence.util import updater
from wordfence.util import caching, updater, pcre
from wordfence.util.io import StreamReader
from wordfence.intel.signatures import SignatureSet
from wordfence.logging import log
Expand Down Expand Up @@ -146,6 +145,13 @@ def _initialize_file_filter(self) -> filtering.FileFilter:
filter.add(filtering.filter_images)
return filter

def _get_pcre_options(self) -> pcre.PcreOptions:
return pcre.PcreOptions(
caseless=True,
match_limit=self.config.pcre_backtrack_limit,
match_limit_recursion=self.config.pcre_recursion_limit
)

def execute(self) -> int:
if self.config.purge_cache:
self.cache.purge()
Expand All @@ -159,9 +165,10 @@ def execute(self) -> int:
workers=int(self.config.workers),
signatures=self._get_signatures(),
chunk_size=self.config.chunk_size,
max_file_size=int(self.config.max_file_size),
max_file_size=int(self.config.scanned_content_limit),
file_filter=self._initialize_file_filter(),
match_all=self.config.match_all
match_all=self.config.match_all,
pcre_options=self._get_pcre_options()
)
if self._should_read_stdin():
options.path_source = StreamReader(
Expand Down Expand Up @@ -213,6 +220,8 @@ def handle_interrupt(signal_number: int, stack) -> None:

def display_version() -> None:
print(f"Wordfence CLI {__version__}")
jit_support_text = 'Yes' if pcre.HAS_JIT_SUPPORT else 'No'
print(f"PCRE Version: {pcre.VERSION} - JIT Supported: {jit_support_text}")


def main(config) -> int:
Expand Down
3 changes: 2 additions & 1 deletion wordfence/scanning/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from .filtering import FileFilter, filter_any
from ..util import timing
from ..util.io import StreamReader
from ..util.pcre import PcreJitStack
from ..util.pcre import PcreOptions, PCRE_DEFAULT_OPTIONS, PcreJitStack
from ..intel.signatures import SignatureSet
from ..logging import log

Expand All @@ -36,6 +36,7 @@ class Options:
max_file_size: Optional[int] = None
file_filter: Optional[FileFilter] = None
match_all: bool = False
pcre_options: PcreOptions = PCRE_DEFAULT_OPTIONS


class Status(IntEnum):
Expand Down
193 changes: 149 additions & 44 deletions wordfence/util/pcre.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,38 @@
from ctypes import cdll, c_char_p, c_void_p, c_int, c_ulong, c_ubyte, byref, \
create_string_buffer, Structure, POINTER
create_string_buffer, Structure, POINTER, CFUNCTYPE
from ctypes.util import find_library
from typing import Optional

pcre = cdll.LoadLibrary('libpcre.so')

class PcreException(Exception):
pass


class PcreLibraryNotAvailableException(PcreException):
pass


library_name = find_library('pcre')
if library_name is None:
raise PcreLibraryNotAvailableException('Failed to locate libpcre')
try:
pcre = cdll.LoadLibrary(library_name)
except OSError as e:
raise PcreLibraryNotAvailableException('Failed to load libpcre') from e


_pcre_version = pcre.pcre_version
_pcre_version.argtypes = []
_pcre_version.restype = c_char_p
VERSION = _pcre_version()
VERSION = _pcre_version().decode('ascii')


PCRE_ERROR_NOMATCH = -1
PCRE_ERROR_BADOPTION = 3


_pcre_config = pcre.pcre_config
_pcre_config.argtypes = [c_int, c_void_p]
_pcre_config.restype = c_int

PCRE_CONFIG_JIT = 9
Expand All @@ -30,39 +48,24 @@ def _check_jit_support() -> bool:

HAS_JIT_SUPPORT = _check_jit_support()

_pcre_compile = pcre.pcre_compile
_pcre_compile.restype = c_void_p

if HAS_JIT_SUPPORT:
_pcre_jit_stack_alloc = pcre.pcre_jit_stack_alloc
_pcre_jit_stack_alloc.restype = c_void_p
_pcre_jit_stack_free = pcre.pcre_jit_stack_free


_pcre_exec = pcre.pcre_exec
_pcre_exec.restype = c_int


_pcre_jit_exec = pcre.pcre_jit_exec
_pcre_jit_exec.restype = c_int


PCRE_EXTRA_MATCH_LIMIT = 0x0012
PCRE_EXTRA_MATCH_LIMIT_RECURSION = 0x0010
PCRE_STUDY_JIT_COMPILE = 0x0001
PCRE_STUDY_EXTRA_NEEDED = 0x0008
PCRE_CASELESS = 0x00000001


PCRE_JIT_STACK_MIN_SIZE = 32 * 1024
PCRE_JIT_STACK_MAX_SIZE = 64 * 1024
class _StructPcre(Structure):
pass


class PcreException(Exception):
pass
_pcre_p = POINTER(_StructPcre)
_pcre_compile = pcre.pcre_compile
_pcre_compile.argtypes = [
c_char_p,
c_int,
POINTER(c_char_p),
POINTER(c_int),
POINTER(c_ubyte)
]
_pcre_compile.restype = _pcre_p


class PcreExtra(Structure):
class _StructPcreExtra(Structure):
_fields_ = [
('flags', c_ulong),
('study_data', c_void_p),
Expand All @@ -75,11 +78,76 @@ class PcreExtra(Structure):
]


_pcre_extra_p = POINTER(_StructPcreExtra)
_pcre_study = pcre.pcre_study
_pcre_study.restype = POINTER(PcreExtra)
_pcre_study.argtypes = [_pcre_p, c_int, POINTER(c_char_p)]
_pcre_study.restype = _pcre_extra_p


_pcre_free_study = pcre.pcre_free_study
_pcre_free_study.argtypes = [_pcre_extra_p]
_pcre_free_study.restype = None


_pcre_exec = pcre.pcre_exec
_pcre_exec.argtypes = [
_pcre_p,
_pcre_extra_p,
c_char_p,
c_int,
c_int,
c_int,
POINTER(c_int),
c_int
]
_pcre_exec.restype = c_int


_pcre_free_address = c_void_p.in_dll(pcre, 'pcre_free').value
_pcre_free_prototype = CFUNCTYPE(None, c_void_p)
_pcre_free = _pcre_free_prototype(_pcre_free_address)


if HAS_JIT_SUPPORT:
class _StructPcreJitStack(Structure):
pass

_pcre_jit_stack_p = POINTER(_StructPcreJitStack)

_pcre_jit_stack_alloc = pcre.pcre_jit_stack_alloc
_pcre_jit_stack_alloc.argtypes = [c_int, c_int]
_pcre_jit_stack_alloc.restype = _pcre_jit_stack_p

_pcre_jit_stack_free = pcre.pcre_jit_stack_free
_pcre_jit_stack_free.argtypes = [_pcre_jit_stack_p]
_pcre_jit_stack_free.restype = None

_pcre_jit_exec = pcre.pcre_jit_exec
_pcre_jit_exec.argtypes = [
_pcre_p,
_pcre_extra_p,
c_char_p,
c_int,
c_int,
c_int,
c_void_p,
c_int,
_pcre_jit_stack_p
]
_pcre_jit_exec.restype = c_int


PCRE_EXTRA_MATCH_LIMIT = 0x0012
PCRE_EXTRA_MATCH_LIMIT_RECURSION = 0x0010
PCRE_STUDY_JIT_COMPILE = 0x0001
PCRE_STUDY_EXTRA_NEEDED = 0x0008
PCRE_CASELESS = 0x00000001


PCRE_JIT_STACK_MIN_SIZE = 32 * 1024
PCRE_JIT_STACK_MAX_SIZE = 64 * 1024
PCRE_DEFAULT_MATCH_LIMIT = 1000000
PCRE_DEFAULT_MATCH_LIMIT_RECURSION = 100000


class PcreJitStack:
Expand Down Expand Up @@ -126,40 +194,69 @@ def __init__(self, matched_string: bytes):
self.matched_string = matched_string


class PcreOptions:

def __init__(
self,
caseless: bool = False,
match_limit: int = PCRE_DEFAULT_MATCH_LIMIT,
match_limit_recursion: int = PCRE_DEFAULT_MATCH_LIMIT_RECURSION
):
self.caseless = caseless
self.match_limit = match_limit
self.match_limit_recursion = match_limit_recursion
self._compilation_options = None

def _get_compilation_options(self) -> c_int:
if self._compilation_options is None:
options = 0
if self.caseless:
options |= PCRE_CASELESS
self._compilation_options = c_int(options)
return self._compilation_options


PCRE_DEFAULT_OPTIONS = PcreOptions()


class PcrePattern:

def __init__(self, pattern: str):
self._compile(pattern)
def __init__(
self,
pattern: str,
options: PcreOptions = PCRE_DEFAULT_OPTIONS
):
self._compile(pattern, options)

def _compile(self, pattern: str) -> c_void_p:
def _compile(self, pattern: str, options: PcreOptions) -> c_void_p:
pattern_cstr = c_char_p(pattern.encode('utf8'))
error_buffer = create_string_buffer(100)
error_message = c_char_p(None)
error_offset = c_int(-1)
options = c_int(PCRE_CASELESS)
self.compiled = _pcre_compile(
pattern_cstr,
options,
byref(error_buffer),
options._get_compilation_options(),
byref(error_message),
byref(error_offset),
None
)
if not self.compiled:
offset = error_offset.value
message = error_buffer.value
message = error_message.value.decode('utf8')
raise PcreException(
f'Pattern compilation failed at offset {offset}: {message}'
)
study_options = c_int(PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_EXTRA_NEEDED)
self.extra = _pcre_study(
self.compiled,
study_options,
byref(error_buffer)
byref(error_message)
)
self.extra.flags = c_ulong(
PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION
)
self.extra.match_limit = c_ulong(100000)
self.extra.match_limit_recursion = c_ulong(100000)
self.extra.match_limit = c_ulong(options.match_limit)
self.extra.match_limit_recursion = \
c_ulong(options.match_limit_recursion)

def match(
self,
Expand Down Expand Up @@ -215,4 +312,12 @@ def match(
return PcreMatch(matched_string)

def _free(self) -> None:
_pcre_free_study(self.extra)
if self.extra is not None:
_pcre_free_study(self.extra)
self.extra = None
if self.compiled is not None:
_pcre_free(self.compiled)
self.compiled = None

def __del__(self) -> None:
self._free()

0 comments on commit c00e5a9

Please sign in to comment.