diff --git a/wordfence/api/noc1.py b/wordfence/api/noc1.py index 330793ce..c0740915 100644 --- a/wordfence/api/noc1.py +++ b/wordfence/api/noc1.py @@ -1,13 +1,18 @@ import json import re +import base64 from typing import Callable, Optional from .noc_client import NocClient from .exceptions import ApiException from .licensing import License -from ..intel.signatures import CommonString, Signature, SignatureSet -from ..util.validation import DictionaryValidator, ListValidator, Validator +from ..intel.signatures import CommonString, Signature, SignatureSet, \ + PrecompiledSignatureSet +from ..util.validation import DictionaryValidator, ListValidator, Validator, \ + OptionalValueValidator +from ..util.platform import Platform +from ..util.serialization import limited_deserialize NOC1_BASE_URL = 'https://noc1.wordfence.com/v2.27/' @@ -137,6 +142,58 @@ def get_malware_signatures(self) -> SignatureSet: ) from index_error return SignatureSet(common_strings, signatures, self.license) + def get_precompiled_patterns( + self, + platform: str, + library_version: str, + library_type: Optional[str] = None, + database_version: int = PrecompiledSignatureSet.VERSION + ) -> dict: + parameters = { + 'platform': platform, + 'library_version': library_version, + 'database_version': database_version + } + if library_type is not None: + parameters['library_type'] = library_type + response = self.request('get_precompiled_patterns', parameters) + validator = DictionaryValidator({ + 'data': OptionalValueValidator(str) + }) + self.validate_response(response, validator) + return response + + def get_precompiled_malware_signatures( + self, + platform: Platform, + library_version: str, + library_type: Optional[str] = None, + database_version: int = PrecompiledSignatureSet.VERSION + ) -> Optional[PrecompiledSignatureSet]: + response = self.get_precompiled_patterns( + platform.key, + library_version, + library_type, + database_version + ) + data = response['data'] + if data is None: + return None + data = base64.b64decode(data) + signature_set = limited_deserialize( + response.data, + { + 'wordfence.intel.signatures.PrecompiledSignatureSet', + 'wordfence.intel.signatures.SignatureSet', + 'wordfence.intel.signatures.Signature' + } + ) + if isinstance(signature_set, PrecompiledSignatureSet): + return signature_set + raise ApiException( + 'Malformed signature set data received from Wordfence API' + ) + def ping_api_key(self) -> bool: return self.process_simple_request('ping_api_key') diff --git a/wordfence/api/noc_client.py b/wordfence/api/noc_client.py index fa430a34..32db9138 100644 --- a/wordfence/api/noc_client.py +++ b/wordfence/api/noc_client.py @@ -21,6 +21,8 @@ def __init__( self.base_url = base_url \ if base_url is not None \ else self.get_default_base_url() + from wordfence.logging import log + log.debug(f'NOC1 Base URL: {self.base_url}') self.timeout = timeout def get_default_base_url(self) -> str: diff --git a/wordfence/cli/context.py b/wordfence/cli/context.py index 5c0cd09f..e1897461 100644 --- a/wordfence/cli/context.py +++ b/wordfence/cli/context.py @@ -181,8 +181,10 @@ def display_version(self) -> None: has_vectorscan = self.has_vectorscan() vectorscan_support_text = yes_no(has_vectorscan) if has_vectorscan: - vectorscan_support_text += \ - f' - Version: {vectorscan.VERSION}' + vectorscan_support_text += ( + f' - Version: {vectorscan.VERSION} (API Version: ' + f'{vectorscan.API_VERSION})' + ) print(f'Vectorscan Supported: {vectorscan_support_text}') def has_terminal_output(self) -> bool: diff --git a/wordfence/cli/malwarescan/definition.py b/wordfence/cli/malwarescan/definition.py index 518f2422..f3b842ba 100644 --- a/wordfence/cli/malwarescan/definition.py +++ b/wordfence/cli/malwarescan/definition.py @@ -204,6 +204,14 @@ "argument_type": "FLAG", "default": False }, + "pre-compile-generic": { + "description": "Pre-compile and cache the signature set without " + "any CPU-specific optimizations and without running " + "a scan", + "context": "CLI", + "argument_type": "FLAG", + "default": False + }, "pattern-database-path": { "description": "Use an alternate path for storage of the pattern " "database", @@ -211,7 +219,8 @@ "argument_type": "OPTION", "meta": { "accepts_file": True - } + }, + "default": None }, "profile": { "description": "Profile scan performance", diff --git a/wordfence/cli/malwarescan/malwarescan.py b/wordfence/cli/malwarescan/malwarescan.py index 610df298..aedaa09e 100644 --- a/wordfence/cli/malwarescan/malwarescan.py +++ b/wordfence/cli/malwarescan/malwarescan.py @@ -7,8 +7,10 @@ from wordfence import scanning from wordfence.scanning import filtering -from wordfence.scanning.matching import MatchEngine, MatchEngineOptions -from wordfence.util import caching, pcre, serialization +from wordfence.scanning.matching import MatchEngine, MatchEngineOptions, \ + MatchEngineCompilerOptions +from wordfence.util import caching, pcre, serialization, vectorscan +from wordfence.util.platform import Platform from wordfence.intel.signatures import SignatureSet, PrecompiledSignatureSet from wordfence.logging import (log, remove_initial_handler, restore_initial_handler) @@ -58,10 +60,36 @@ def _filter_signatures( signature_count = len(signatures.signatures) log.debug(f'Filtered signature count: {signature_count}') - def _get_signatures(self) -> SignatureSet: + def _get_pre_compiled_signatures( + self, + match_engine: MatchEngine + ) -> Optional[PrecompiledSignatureSet]: + + def fetch_pre_compiled() -> Optional[PrecompiledSignatureSet]: + client = self.context.get_noc1_client() + return client.get_precompiled_malware_signatures( + Platform.detect(), + vectorscan.API_VERSION + ) + + cacheable = caching.Cacheable( + 'pre-compiled-signatures-{match_engine.module}', + fetch_pre_compiled, + caching.DURATION_ONE_DAY + ) + return cacheable.get(self.cache) + + def _get_signatures(self, match_engine: MatchEngine) -> SignatureSet: + supports_pre_compilation = match_engine.supports_pre_compilation() + if supports_pre_compilation: + precompiled = self._get_pre_compiled_signatures(match_engine) + if precompiled is not None: + return precompiled.signature_set, precompiled.data + def fetch_signatures() -> SignatureSet: noc1_client = self.context.get_noc1_client() return noc1_client.get_malware_signatures() + self.cacheable_signatures = caching.Cacheable( 'signatures', fetch_signatures, @@ -69,7 +97,7 @@ def fetch_signatures() -> SignatureSet: ) signatures = self.cacheable_signatures.get(self.cache) self._filter_signatures(signatures) - return signatures + return signatures, None def _get_file_list_separator(self) -> str: if isinstance(self.config.file_list_separator, bytes): @@ -131,13 +159,17 @@ def _get_database_source( current_hash = match_engine_options.signature_set.get_hash() def compile_database(): - compiler = match_engine.get_compiler(match_engine_options) + compiler_options = MatchEngineCompilerOptions( + generic=self.config.pre_compile_generic + ) + compiler = match_engine.get_compiler(compiler_options) if compiler is None: return None compiled = compiler.compile_serializable( match_engine_options.signature_set ) return PrecompiledSignatureSet( + match_engine_options.signature_set, current_hash, compiled, match_engine_options.signature_set.license @@ -146,7 +178,8 @@ def compile_database(): def is_precompiled_compatible(precompiled): return ( precompiled is not None and - precompiled.signature_hash == current_hash + precompiled.signature_hash == current_hash and + precompiled.is_supported_version() ) if self.config.pattern_database_path is None: @@ -158,7 +191,7 @@ def filter_precompiled(precompiled): ) return precompiled cacheable = caching.Cacheable( - f'precompiled-signatures-{match_engine.module}', + f'compiled-signatures-{match_engine.module}', compile_database, filters=[ filter_precompiled @@ -209,20 +242,27 @@ def handle_interrupt(signal_number: int, stack) -> None: def invoke(self) -> int: self._initialize_interrupt_handling() - signatures = self._get_signatures() match_engine = MatchEngine.for_option(self.config.match_engine) + signatures, database_source = self._get_signatures(match_engine) match_engine_options = MatchEngineOptions( signature_set=signatures, pcre_options=self._get_pcre_options(), match_all=self.config.match_all ) - match_engine_options.database_source = self._get_database_source( - match_engine, - match_engine_options, - force=self.config.pre_compile + pre_compile = ( + self.config.pre_compile + or self.config.pre_compile_generic + ) + match_engine_options.database_source = ( + database_source if database_source is not None else + self._get_database_source( + match_engine, + match_engine_options, + force=pre_compile + ) ) - if self.config.pre_compile: + if pre_compile: if match_engine_options.database_source is None: log.error( 'Signature set pre-compilation is not supported ' diff --git a/wordfence/intel/signatures.py b/wordfence/intel/signatures.py index 903f1e69..33cd96ee 100644 --- a/wordfence/intel/signatures.py +++ b/wordfence/intel/signatures.py @@ -1,5 +1,5 @@ from hashlib import sha256 -from typing import Union +from typing import Optional from ..api.licensing import License, LicenseSpecific @@ -81,15 +81,23 @@ def get_hash(self) -> str: class PrecompiledSignatureSet(LicenseSpecific): + VERSION = 1 + def __init__( self, - signature_set: Union[bytes, SignatureSet], + signature_set: SignatureSet, + signature_hash: Optional[bytes], data: bytes, license: License = None ): super().__init__(license) + self.signature_set = SignatureSet self.signature_hash = ( signature_set if isinstance(signature_set, bytes) else signature_set.get_hash() ) self.data = data + self.version = self.VERSION + + def is_supported_version(self) -> bool: + return hasattr(self, 'version') and self.version == self.VERSION diff --git a/wordfence/scanning/matching/matching.py b/wordfence/scanning/matching/matching.py index f5b1c7df..559383d7 100644 --- a/wordfence/scanning/matching/matching.py +++ b/wordfence/scanning/matching/matching.py @@ -110,6 +110,11 @@ def __init__(self, matcher: Matcher): super().__init__() +@dataclass +class MatchEngineCompilerOptions: + generic: bool = False + + @dataclass class MatchEngineOptions: signature_set: SignatureSet @@ -164,10 +169,17 @@ def _get_loaded_module(self): self._loaded_module = self._load_module() return self._loaded_module - def get_compiler(self, options: MatchEngineOptions) -> Optional[Compiler]: + def get_compiler( + self, + options: MatchEngineCompilerOptions + ) -> Optional[Compiler]: module = self._get_loaded_module() return module.create_compiler(options) + def supports_pre_compilation(self) -> bool: + compiler = self.get_compiler(MatchEngineCompilerOptions()) + return compiler is not None + def create_matcher(self, options: MatchEngineOptions) -> Matcher: module = self._get_loaded_module() return module.create_matcher(options) diff --git a/wordfence/scanning/matching/pcre.py b/wordfence/scanning/matching/pcre.py index 9e5be114..a9a116ad 100644 --- a/wordfence/scanning/matching/pcre.py +++ b/wordfence/scanning/matching/pcre.py @@ -8,7 +8,8 @@ PcreOptions, PCRE_DEFAULT_OPTIONS from .matching import Matcher, BaseMatcherContext, TimeoutException, \ - MatchWorkspace, MatchEngineOptions, DEFAULT_TIMEOUT + MatchWorkspace, MatchEngineCompilerOptions, MatchEngineOptions, \ + DEFAULT_TIMEOUT if not pcre.AVAILABLE: @@ -234,7 +235,7 @@ def create_workspace(self) -> PcreMatchWorkspace: return PcreMatchWorkspace() -def create_compiler(options: MatchEngineOptions) -> None: +def create_compiler(options: MatchEngineCompilerOptions) -> None: return None diff --git a/wordfence/scanning/matching/vectorscan.py b/wordfence/scanning/matching/vectorscan.py index b6e6101b..6fd42053 100644 --- a/wordfence/scanning/matching/vectorscan.py +++ b/wordfence/scanning/matching/vectorscan.py @@ -5,7 +5,7 @@ from ...util import vectorscan from .matching import MatchEngineOptions, Matcher, BaseMatcherContext, \ - MatchWorkspace, Compiler + MatchWorkspace, MatchEngineCompilerOptions, Compiler if not vectorscan.AVAILABLE: @@ -14,7 +14,8 @@ from ...util.vectorscan import VectorscanStreamScanner, VectorscanMatch, \ VectorscanFlags, VectorscanDatabase, VectorscanScanTerminated, \ - VectorscanMode, vectorscan_compile, vectorscan_deserialize + VectorscanMode, vectorscan_compile, vectorscan_deserialize, \ + VectorscanPlatformInfo, VectorscanCpuFeatures, VectorscanTuneFamily class VectorscanMatcherContext(BaseMatcherContext): @@ -54,6 +55,9 @@ def __exit__(self, exc_type, exc_value, traceback) -> None: class VectorscanCompiler(Compiler): + def __init__(self, generic: bool = False): + self.generic = generic + def compile(self, signature_set: SignatureSet) -> bytes: patterns = { signature.identifier: signature.rule @@ -69,10 +73,15 @@ def compile(self, signature_set: SignatureSet) -> bytes: VectorscanFlags.SINGLEMATCH | VectorscanFlags.ALLOWEMPTY ) + platform_info = VectorscanPlatformInfo( + VectorscanCpuFeatures.NONE, + VectorscanTuneFamily.GENERIC + ) if self.generic else None database = vectorscan_compile( patterns, mode=VectorscanMode.STREAM, - flags=flags + flags=flags, + platform_info=platform_info ) log.debug('Successfully compiled vectorscan database') return database @@ -134,8 +143,8 @@ def create_context(self) -> VectorscanMatcherContext: ) -def create_compiler(options: MatchEngineOptions): - return VectorscanCompiler() +def create_compiler(options: MatchEngineCompilerOptions): + return VectorscanCompiler(generic=options.generic) def create_matcher(options: MatchEngineOptions) -> VectorscanMatcher: diff --git a/wordfence/util/platform.py b/wordfence/util/platform.py new file mode 100644 index 00000000..ec195954 --- /dev/null +++ b/wordfence/util/platform.py @@ -0,0 +1,21 @@ +from enum import Enum +from platform import machine +from typing import Optional, Set + + +class Platform(Enum): + + AMD64 = ('amd64', {'amd64'}) + ARM64 = ('arm64', {'x86_64'}) + + def __init__(self, key: str, machine_names: Set[str]): + self.key = key + self.machine_names = machine_names + + @classmethod + def detect(cls) -> Optional: + machine_name = machine() + for platform in cls: + if machine_name in platform.machine_names: + return platform + return None diff --git a/wordfence/util/vectorscan/bindings.py b/wordfence/util/vectorscan/bindings.py index c930476a..23ae4647 100644 --- a/wordfence/util/vectorscan/bindings.py +++ b/wordfence/util/vectorscan/bindings.py @@ -20,6 +20,7 @@ _hs_version.argtypes = [] _hs_version.restype = c_char_p VERSION = _hs_version().decode('ascii') +API_VERSION = ''.join(VERSION.split()[:1]) class _StructHsDatabase(Structure): @@ -43,7 +44,12 @@ class _StructHsCompileError(Structure): class _StructHsPlatformInfo(Structure): - pass + _fields_ = [ + ('tune', c_uint), + ('cpu_features', c_ulonglong), + ('reserved1', c_ulonglong), + ('reserved2', c_ulonglong) + ] _hs_platform_info_p = POINTER(_StructHsPlatformInfo) @@ -471,10 +477,45 @@ def __del__(self) -> None: self._close_stream() +class VectorscanCpuFeatures(IntFlag): + NONE = 0 + AVX2 = 4 + AVX512 = 8 + AVX512VBMI = 16 + + +class VectorscanTuneFamily(IntEnum): + GENERIC = 0 + SNB = 1 + IVB = 2 + HSW = 3 + SLM = 4 + BDW = 5 + SKL = 6 + SKX = 7 + GLM = 8 + ICL = 9 + ICX = 10 + + +class VectorscanPlatformInfo: + + def __init__( + self, + cpu_features: VectorscanCpuFeatures, + tune_family: VectorscanTuneFamily + ): + self._platform_info = _StructHsPlatformInfo( + c_uint(tune_family.value), + c_ulonglong(cpu_features.value) + ) + + def vectorscan_compile( patterns: Dict[int, str], mode: VectorscanMode = VectorscanMode.BLOCK, flags: VectorscanFlags = VectorscanFlags.NONE, + platform_info: Optional[VectorscanPlatformInfo] = None ) -> VectorscanDatabase: database = _hs_database_p() compiler_error = _hs_compile_error_p() @@ -489,13 +530,15 @@ def vectorscan_compile( for i in range(0, len(ids)): c_flags[i] = c_uint(flags) signals.reset() + platform_info_p = _hs_platform_info_p() if platform_info is None \ + else byref(platform_info._platform_info) error = _hs_compile_multi( expressions, c_flags, ids, c_int(len(patterns)), c_int(mode), - _hs_platform_info_p(), + platform_info_p, byref(database), byref(compiler_error) )