diff --git a/README.md b/README.md index b906faf4ef..048741e343 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,7 @@ - [Docker image not working on MacOS](#docker-image-not-working-on-macos) - [ValueError: filedescriptor out of range in select](#valueerror-filedescriptor-out-of-range-in-select) - [None:None in access logs](#nonenone-in-access-logs) + - [OSError when wrapping client for TLS Interception](#oserror-when-wrapping-client-for-tls-interception) - [Plugin Developer and Contributor Guide](#plugin-developer-and-contributor-guide) - [High level architecture](#high-level-architecture) - [Everything is a plugin](#everything-is-a-plugin) @@ -1721,6 +1722,52 @@ few obvious ones include: 1. Client established a connection but never completed the request. 2. A plugin returned a response prematurely, avoiding connection to upstream server. +## OSError when wrapping client for TLS Interception + +With `TLS Interception` on, you might occasionally see following exceptions: + +```console +2021-11-06 23:33:34,540 - pid:91032 [E] server.intercept:678 - OSError when wrapping client +Traceback (most recent call last): + ...[redacted]... + ...[redacted]... + ...[redacted]... +ssl.SSLError: [SSL: TLSV1_ALERT_UNKNOWN_CA] tlsv1 alert unknown ca (_ssl.c:997) +...[redacted]... - CONNECT oauth2.googleapis.com:443 - 0 bytes - 272.08 ms +``` + +Some clients can throw `TLSV1_ALERT_UNKNOWN_CA` if they cannot verify the certificate of the server +because it is signed by an unknown issuer CA. Which is the case when we are doing TLS interception. +This can be for a variety of reasons e.g. certificate pinning etc. + +Another exception you might see is `CERTIFICATE_VERIFY_FAILED`: + +```console +2021-11-06 23:36:02,002 - pid:91033 [E] handler.handle_readables:293 - Exception while receiving from client connection with reason SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:997)') +Traceback (most recent call last): + ...[redacted]... + ...[redacted]... + ...[redacted]... +ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:997) +...[redacted]... - CONNECT init.push.apple.com:443 - 0 bytes - 892.99 ms +``` + +In future, we might support serving original HTTPS content for such clients while still +performing TLS interception in the background. This will keep the clients happy without +impacting our ability to TLS intercept. Unfortunately, this feature is currently not available. + +Another example with `SSLEOFError` exception: + +```console +2021-11-06 23:46:40,446 - pid:91034 [E] server.intercept:678 - OSError when wrapping client +Traceback (most recent call last): + ...[redacted]... + ...[redacted]... + ...[redacted]... +ssl.SSLEOFError: EOF occurred in violation of protocol (_ssl.c:997) +...[redacted]... - CONNECT stock.adobe.io:443 - 0 bytes - 685.32 ms +``` + # Plugin Developer and Contributor Guide ## High level architecture diff --git a/proxy/common/constants.py b/proxy/common/constants.py index 25c6ee54c9..43c449be0f 100644 --- a/proxy/common/constants.py +++ b/proxy/common/constants.py @@ -12,6 +12,7 @@ import time import secrets import pathlib +import sysconfig import ipaddress from typing import List @@ -23,6 +24,10 @@ # /path/to/proxy.py/proxy folder PROXY_PY_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +# Path to virtualenv/lib/python3.X/site-packages +PROXY_PY_SITE_PACKAGES = sysconfig.get_path('purelib') +assert PROXY_PY_SITE_PACKAGES + CRLF = b'\r\n' COLON = b':' WHITESPACE = b' ' @@ -46,7 +51,9 @@ DEFAULT_CA_KEY_FILE = None DEFAULT_CA_SIGNING_KEY_FILE = None DEFAULT_CERT_FILE = None -DEFAULT_CA_FILE = None +DEFAULT_CA_FILE = pathlib.Path( + PROXY_PY_SITE_PACKAGES, +) / 'certifi' / 'cacert.pem' DEFAULT_CLIENT_RECVBUF_SIZE = DEFAULT_BUFFER_SIZE DEFAULT_DEVTOOLS_WS_PATH = b'/devtools' DEFAULT_DISABLE_HEADERS: List[bytes] = [] diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 2e25673ac3..d50d311d38 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -74,12 +74,22 @@ def selected_events(self) -> Generator[ Tuple[Readables, Writables], None, None, ]: + assert self.selector is not None events: Dict[socket.socket, int] = {} for work in self.works.values(): - events.update(work.get_events()) - assert self.selector is not None - for fd in events: - self.selector.register(fd, events[fd]) + worker_events = work.get_events() + events.update(worker_events) + for fd in worker_events: + # Can throw ValueError: Invalid file descriptor: -1 + # + # Work classes must handle the exception and shutdown + # gracefully otherwise this will result in bringing down the + # entire threadless process + # + # This is only possible when work.get_events pass + # an invalid file descriptor. Example, because of bad + # exception handling within the work implementation class. + self.selector.register(fd, worker_events[fd]) ev = self.selector.select(timeout=1) readables = [] writables = [] diff --git a/proxy/http/proxy/server.py b/proxy/http/proxy/server.py index 67f7989846..113fe6599d 100644 --- a/proxy/http/proxy/server.py +++ b/proxy/http/proxy/server.py @@ -66,9 +66,9 @@ flags.add_argument( '--ca-file', type=str, - default=DEFAULT_CA_FILE, - help='Default: None. Provide path to custom CA file for peer certificate validation. ' - 'Specially useful on MacOS.', + default=str(DEFAULT_CA_FILE), + help='Default: ' + str(DEFAULT_CA_FILE) + + '. Provide path to custom CA bundle for peer certificate verification', ) flags.add_argument( '--ca-signing-key-file', @@ -658,49 +658,122 @@ def generate_upstream_certificate( def intercept(self) -> Union[socket.socket, bool]: # Perform SSL/TLS handshake with upstream - self.wrap_server() + teardown = self.wrap_server() + if teardown: + raise HttpProtocolException( + 'Exception when wrapping server for interception', + ) # Generate certificate and perform handshake with client + # wrap_client also flushes client data before wrapping + # sending to client can raise, handle expected exceptions + teardown = self.wrap_client() + if teardown: + raise HttpProtocolException( + 'Exception when wrapping client for interception', + ) + # Update all plugin connection reference + # TODO(abhinavsingh): Is this required? + for plugin in self.plugins.values(): + plugin.client._conn = self.client.connection + return self.client.connection + + def wrap_server(self) -> bool: + assert self.upstream is not None + assert isinstance(self.upstream.connection, socket.socket) + try: + self.upstream.wrap(text_(self.request.host), self.flags.ca_file) + except ssl.SSLCertVerificationError: # Server raised certificate verification error + # When --disable-interception-on-ssl-cert-verification-error flag is on, + # we will cache such upstream hosts and avoid intercepting them for future + # requests. + logger.warning( + 'ssl.SSLCertVerificationError: ' + + 'Server raised cert verification error for upstream: {0}'.format( + self.upstream.addr[0], + ), + ) + return True + except ssl.SSLError as e: + if e.reason == 'SSLV3_ALERT_HANDSHAKE_FAILURE': + logger.warning( + '{0}: '.format(e.reason) + + 'Server raised handshake alert failure for upstream: {0}'.format( + self.upstream.addr[0], + ), + ) + else: + logger.exception( + 'SSLError when wrapping client for upstream: {0}'.format( + self.upstream.addr[0], + ), exc_info=e, + ) + return True + assert isinstance(self.upstream.connection, ssl.SSLSocket) + return False + + def wrap_client(self) -> bool: + assert self.upstream is not None and self.flags.ca_signing_key_file is not None + assert isinstance(self.upstream.connection, ssl.SSLSocket) try: - # wrap_client also flushes client data before wrapping - # sending to client can raise, handle expected exceptions - self.wrap_client() + # TODO: Perform async certificate generation + generated_cert = self.generate_upstream_certificate( + cast(Dict[str, Any], self.upstream.connection.getpeercert()), + ) + self.client.wrap(self.flags.ca_signing_key_file, generated_cert) except subprocess.TimeoutExpired as e: # Popen communicate timeout logger.exception( 'TimeoutExpired during certificate generation', exc_info=e, ) return True + except ssl.SSLCertVerificationError: # Client raised certificate verification error + # When --disable-interception-on-ssl-cert-verification-error flag is on, + # we will cache such upstream hosts and avoid intercepting them for future + # requests. + logger.warning( + 'ssl.SSLCertVerificationError: ' + + 'Client raised cert verification error for upstream: {0}'.format( + self.upstream.addr[0], + ), + ) + return True + except ssl.SSLEOFError as e: + logger.warning( + 'ssl.SSLEOFError {0} when wrapping client for upstream: {1}'.format( + str(e), self.upstream.addr[0], + ), + ) + return True + except ssl.SSLError as e: + if e.reason in ('TLSV1_ALERT_UNKNOWN_CA', 'UNSUPPORTED_PROTOCOL'): + logger.warning( + '{0}: '.format(e.reason) + + 'Client raised cert verification error for upstream: {0}'.format( + self.upstream.addr[0], + ), + ) + else: + logger.exception( + 'OSError when wrapping client for upstream: {0}'.format( + self.upstream.addr[0], + ), exc_info=e, + ) + return True except BrokenPipeError: logger.error( - 'BrokenPipeError when wrapping client', + 'BrokenPipeError when wrapping client for upstream: {0}'.format( + self.upstream.addr[0], + ), ) return True except OSError as e: logger.exception( - 'OSError when wrapping client', exc_info=e, + 'OSError when wrapping client for upstream: {0}'.format( + self.upstream.addr[0], + ), exc_info=e, ) return True - # Update all plugin connection reference - # TODO(abhinavsingh): Is this required? - for plugin in self.plugins.values(): - plugin.client._conn = self.client.connection - return self.client.connection - - def wrap_server(self) -> None: - assert self.upstream is not None - assert isinstance(self.upstream.connection, socket.socket) - self.upstream.wrap(text_(self.request.host), self.flags.ca_file) - assert isinstance(self.upstream.connection, ssl.SSLSocket) - - def wrap_client(self) -> None: - assert self.upstream is not None and self.flags.ca_signing_key_file is not None - assert isinstance(self.upstream.connection, ssl.SSLSocket) - generated_cert = self.generate_upstream_certificate( - cast(Dict[str, Any], self.upstream.connection.getpeercert()), - ) - self.client.wrap(self.flags.ca_signing_key_file, generated_cert) - logger.debug( - 'TLS interception using %s', generated_cert, - ) + logger.debug('TLS intercepting using %s', generated_cert) + return False # # Event emitter callbacks diff --git a/proxy/plugin/cache/store/disk.py b/proxy/plugin/cache/store/disk.py index f50565f866..41429809b4 100644 --- a/proxy/plugin/cache/store/disk.py +++ b/proxy/plugin/cache/store/disk.py @@ -27,7 +27,8 @@ '--cache-dir', type=str, default=tempfile.gettempdir(), - help='Default: A temporary directory. Flag only applicable when cache plugin is used with on-disk storage.', + help='Default: A temporary directory. ' + + 'Flag only applicable when cache plugin is used with on-disk storage.', ) diff --git a/proxy/plugin/reverse_proxy.py b/proxy/plugin/reverse_proxy.py index 19cd13e774..13f5b80c84 100644 --- a/proxy/plugin/reverse_proxy.py +++ b/proxy/plugin/reverse_proxy.py @@ -12,9 +12,7 @@ import random import socket import logging -import sysconfig -from pathlib import Path from typing import List, Optional, Tuple, Any from urllib import parse as urlparse @@ -29,11 +27,6 @@ logger = logging.getLogger(__name__) -# We need CA bundle to verify TLS connection to upstream servers -PURE_LIB = sysconfig.get_path('purelib') -assert PURE_LIB -CACERT_PEM_PATH = Path(PURE_LIB) / 'certifi' / 'cacert.pem' - # TODO: ReverseProxyPlugin and ProxyPoolPlugin are implementing # a similar behavior. Abstract that particular logic out into its @@ -135,7 +128,7 @@ def handle_request(self, request: HttpParser) -> None: self.upstream.wrap( text_( url.hostname, - ), ca_file=str(CACERT_PEM_PATH), + ), ca_file=str(self.flags.ca_file), ) self.upstream.queue(memoryview(request.build())) except ConnectionRefusedError: diff --git a/proxy/proxy.py b/proxy/proxy.py index 988dec4535..6f9d20785b 100644 --- a/proxy/proxy.py +++ b/proxy/proxy.py @@ -514,6 +514,10 @@ def main( # configuration etc. # # TODO: Python shell within running proxy.py environment? + # + # TODO: Pid watcher which watches for processes started + # by proxy.py core. May be alert or restart those processes + # on failure. while True: time.sleep(1) except KeyboardInterrupt: diff --git a/tests/http/test_http_proxy_tls_interception.py b/tests/http/test_http_proxy_tls_interception.py index a413a3fe63..1c03068568 100644 --- a/tests/http/test_http_proxy_tls_interception.py +++ b/tests/http/test_http_proxy_tls_interception.py @@ -16,6 +16,7 @@ from typing import Any from unittest import mock +from proxy.common.constants import DEFAULT_CA_FILE from proxy.core.connection import TcpClientConnection, TcpServerConnection from proxy.http.handler import HttpProtocolHandler @@ -168,7 +169,7 @@ def mock_connection() -> Any: ) self.mock_ssl_context.assert_called_with( - ssl.Purpose.SERVER_AUTH, cafile=None, + ssl.Purpose.SERVER_AUTH, cafile=str(DEFAULT_CA_FILE), ) # self.assertEqual(self.mock_ssl_context.return_value.options, # ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | ssl.OP_NO_TLSv1 |