From 1a8f862e329c3872a11d4ef8eb85cf353ca2f4d5 Mon Sep 17 00:00:00 2001 From: Michael Handler Date: Tue, 4 Apr 2023 21:55:24 -0700 Subject: [PATCH] gh-66897: Upgrade HTTP CONNECT to protocol HTTP/1.1 (#8305) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * bpo-22708: Upgrade HTTP CONNECT to protocol HTTP/1.1 (GH-NNNN) Use protocol HTTP/1.1 when sending HTTP CONNECT tunnelling requests; generate Host: headers if one is not already provided (required by HTTP/1.1), convert IDN domains to punycode in HTTP CONNECT requests. * Refactor tests to pass under -bb (fix ByteWarnings); missed some lines >80. * Use consistent 'tunnelling' spelling in Lib/http/client.py * Lib/test/test_httplib: Remove remnant of obsoleted test. * Use dict.copy() not copy.copy() * fix version changed * Update Lib/http/client.py Co-authored-by: bgehman * Switch to for/else: syntax, as suggested * Don't use for: else: * Sure, fine, w/e * Oops * 1nm to the left --------- Co-authored-by: Éric Co-authored-by: bgehman Co-authored-by: Oleg Iarygin --- Doc/library/http.client.rst | 12 ++ Lib/http/client.py | 25 ++- Lib/test/test_httplib.py | 147 ++++++++++++++++-- Misc/ACKS | 1 + .../2018-07-16-14-10-29.bpo-22708.592iRR.rst | 3 + 5 files changed, 167 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2018-07-16-14-10-29.bpo-22708.592iRR.rst diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst index ad3416135e307b..38821b32c91cf1 100644 --- a/Doc/library/http.client.rst +++ b/Doc/library/http.client.rst @@ -353,6 +353,13 @@ HTTPConnection Objects The *headers* argument should be a mapping of extra HTTP headers to send with the CONNECT request. + As HTTP/1.1 is used for HTTP CONNECT tunnelling request, `as per the RFC + `_, a HTTP ``Host:`` + header must be provided, matching the authority-form of the request target + provided as the destination for the CONNECT request. If a HTTP ``Host:`` + header is not provided via the headers argument, one is generated and + transmitted automatically. + For example, to tunnel through a HTTPS proxy server running locally on port 8080, we would pass the address of the proxy to the :class:`HTTPSConnection` constructor, and the address of the host that we eventually want to reach to @@ -365,6 +372,11 @@ HTTPConnection Objects .. versionadded:: 3.2 + .. versionchanged:: 3.12 + HTTP CONNECT tunnelling requests use protocol HTTP/1.1, upgraded from + protocol HTTP/1.0. ``Host:`` HTTP headers are mandatory for HTTP/1.1, so + one will be automatically generated and transmitted if not provided in + the headers argument. .. method:: HTTPConnection.connect() diff --git a/Lib/http/client.py b/Lib/http/client.py index bd55e7d239af0c..0f5cd35247ae82 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -870,9 +870,9 @@ def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, def set_tunnel(self, host, port=None, headers=None): """Set up host and port for HTTP CONNECT tunnelling. - In a connection that uses HTTP CONNECT tunneling, the host passed to the - constructor is used as a proxy server that relays all communication to - the endpoint passed to `set_tunnel`. This done by sending an HTTP + In a connection that uses HTTP CONNECT tunnelling, the host passed to + the constructor is used as a proxy server that relays all communication + to the endpoint passed to `set_tunnel`. This done by sending an HTTP CONNECT request to the proxy server when the connection is established. This method must be called before the HTTP connection has been @@ -880,6 +880,13 @@ def set_tunnel(self, host, port=None, headers=None): The headers argument should be a mapping of extra HTTP headers to send with the CONNECT request. + + As HTTP/1.1 is used for HTTP CONNECT tunnelling request, as per the RFC + (https://tools.ietf.org/html/rfc7231#section-4.3.6), a HTTP Host: + header must be provided, matching the authority-form of the request + target provided as the destination for the CONNECT request. If a + HTTP Host: header is not provided via the headers argument, one + is generated and transmitted automatically. """ if self.sock: @@ -887,10 +894,15 @@ def set_tunnel(self, host, port=None, headers=None): self._tunnel_host, self._tunnel_port = self._get_hostport(host, port) if headers: - self._tunnel_headers = headers + self._tunnel_headers = headers.copy() else: self._tunnel_headers.clear() + if not any(header.lower() == "host" for header in self._tunnel_headers): + encoded_host = self._tunnel_host.encode("idna").decode("ascii") + self._tunnel_headers["Host"] = "%s:%d" % ( + encoded_host, self._tunnel_port) + def _get_hostport(self, host, port): if port is None: i = host.rfind(':') @@ -915,8 +927,9 @@ def set_debuglevel(self, level): self.debuglevel = level def _tunnel(self): - connect = b"CONNECT %s:%d HTTP/1.0\r\n" % ( - self._tunnel_host.encode("ascii"), self._tunnel_port) + connect = b"CONNECT %s:%d %s\r\n" % ( + self._tunnel_host.encode("idna"), self._tunnel_port, + self._http_vsn_str.encode("ascii")) headers = [connect] for header, value in self._tunnel_headers.items(): headers.append(f"{header}: {value}\r\n".encode("latin-1")) diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 9ff6afcbadec54..b4f4e2b14351a6 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -2187,11 +2187,12 @@ def test_getting_header_defaultint(self): class TunnelTests(TestCase): def setUp(self): response_text = ( - 'HTTP/1.0 200 OK\r\n\r\n' # Reply to CONNECT + 'HTTP/1.1 200 OK\r\n\r\n' # Reply to CONNECT 'HTTP/1.1 200 OK\r\n' # Reply to HEAD 'Content-Length: 42\r\n\r\n' ) self.host = 'proxy.com' + self.port = client.HTTP_PORT self.conn = client.HTTPConnection(self.host) self.conn._create_connection = self._create_connection(response_text) @@ -2203,15 +2204,45 @@ def create_connection(address, timeout=None, source_address=None): return FakeSocket(response_text, host=address[0], port=address[1]) return create_connection - def test_set_tunnel_host_port_headers(self): + def test_set_tunnel_host_port_headers_add_host_missing(self): tunnel_host = 'destination.com' tunnel_port = 8888 tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)'} + tunnel_headers_after = tunnel_headers.copy() + tunnel_headers_after['Host'] = '%s:%d' % (tunnel_host, tunnel_port) self.conn.set_tunnel(tunnel_host, port=tunnel_port, headers=tunnel_headers) self.conn.request('HEAD', '/', '') self.assertEqual(self.conn.sock.host, self.host) - self.assertEqual(self.conn.sock.port, client.HTTP_PORT) + self.assertEqual(self.conn.sock.port, self.port) + self.assertEqual(self.conn._tunnel_host, tunnel_host) + self.assertEqual(self.conn._tunnel_port, tunnel_port) + self.assertEqual(self.conn._tunnel_headers, tunnel_headers_after) + + def test_set_tunnel_host_port_headers_set_host_identical(self): + tunnel_host = 'destination.com' + tunnel_port = 8888 + tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)', + 'Host': '%s:%d' % (tunnel_host, tunnel_port)} + self.conn.set_tunnel(tunnel_host, port=tunnel_port, + headers=tunnel_headers) + self.conn.request('HEAD', '/', '') + self.assertEqual(self.conn.sock.host, self.host) + self.assertEqual(self.conn.sock.port, self.port) + self.assertEqual(self.conn._tunnel_host, tunnel_host) + self.assertEqual(self.conn._tunnel_port, tunnel_port) + self.assertEqual(self.conn._tunnel_headers, tunnel_headers) + + def test_set_tunnel_host_port_headers_set_host_different(self): + tunnel_host = 'destination.com' + tunnel_port = 8888 + tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)', + 'Host': '%s:%d' % ('example.com', 4200)} + self.conn.set_tunnel(tunnel_host, port=tunnel_port, + headers=tunnel_headers) + self.conn.request('HEAD', '/', '') + self.assertEqual(self.conn.sock.host, self.host) + self.assertEqual(self.conn.sock.port, self.port) self.assertEqual(self.conn._tunnel_host, tunnel_host) self.assertEqual(self.conn._tunnel_port, tunnel_port) self.assertEqual(self.conn._tunnel_headers, tunnel_headers) @@ -2223,17 +2254,96 @@ def test_disallow_set_tunnel_after_connect(self): 'destination.com') def test_connect_with_tunnel(self): - self.conn.set_tunnel('destination.com') + d = { + b'host': b'destination.com', + b'port': client.HTTP_PORT, + } + self.conn.set_tunnel(d[b'host'].decode('ascii')) + self.conn.request('HEAD', '/', '') + self.assertEqual(self.conn.sock.host, self.host) + self.assertEqual(self.conn.sock.port, self.port) + self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n' + b'Host: %(host)s:%(port)d\r\n\r\n' % d, + self.conn.sock.data) + self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d, + self.conn.sock.data) + + def test_connect_with_tunnel_with_default_port(self): + d = { + b'host': b'destination.com', + b'port': client.HTTP_PORT, + } + self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port']) + self.conn.request('HEAD', '/', '') + self.assertEqual(self.conn.sock.host, self.host) + self.assertEqual(self.conn.sock.port, self.port) + self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n' + b'Host: %(host)s:%(port)d\r\n\r\n' % d, + self.conn.sock.data) + self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d, + self.conn.sock.data) + + def test_connect_with_tunnel_with_nonstandard_port(self): + d = { + b'host': b'destination.com', + b'port': 8888, + } + self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port']) + self.conn.request('HEAD', '/', '') + self.assertEqual(self.conn.sock.host, self.host) + self.assertEqual(self.conn.sock.port, self.port) + self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n' + b'Host: %(host)s:%(port)d\r\n\r\n' % d, + self.conn.sock.data) + self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s:%(port)d\r\n' % d, + self.conn.sock.data) + + # This request is not RFC-valid, but it's been possible with the library + # for years, so don't break it unexpectedly... This also tests + # case-insensitivity when injecting Host: headers if they're missing. + def test_connect_with_tunnel_with_different_host_header(self): + d = { + b'host': b'destination.com', + b'tunnel_host_header': b'example.com:9876', + b'port': client.HTTP_PORT, + } + self.conn.set_tunnel( + d[b'host'].decode('ascii'), + headers={'HOST': d[b'tunnel_host_header'].decode('ascii')}) + self.conn.request('HEAD', '/', '') + self.assertEqual(self.conn.sock.host, self.host) + self.assertEqual(self.conn.sock.port, self.port) + self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n' + b'HOST: %(tunnel_host_header)s\r\n\r\n' % d, + self.conn.sock.data) + self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d, + self.conn.sock.data) + + def test_connect_with_tunnel_different_host(self): + d = { + b'host': b'destination.com', + b'port': client.HTTP_PORT, + } + self.conn.set_tunnel(d[b'host'].decode('ascii')) + self.conn.request('HEAD', '/', '') + self.assertEqual(self.conn.sock.host, self.host) + self.assertEqual(self.conn.sock.port, self.port) + self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n' + b'Host: %(host)s:%(port)d\r\n\r\n' % d, + self.conn.sock.data) + self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d, + self.conn.sock.data) + + def test_connect_with_tunnel_idna(self): + dest = '\u03b4\u03c0\u03b8.gr' + dest_port = b'%s:%d' % (dest.encode('idna'), client.HTTP_PORT) + expected = b'CONNECT %s HTTP/1.1\r\nHost: %s\r\n\r\n' % ( + dest_port, dest_port) + self.conn.set_tunnel(dest) self.conn.request('HEAD', '/', '') self.assertEqual(self.conn.sock.host, self.host) self.assertEqual(self.conn.sock.port, client.HTTP_PORT) - self.assertIn(b'CONNECT destination.com', self.conn.sock.data) - # issue22095 - self.assertNotIn(b'Host: destination.com:None', self.conn.sock.data) - self.assertIn(b'Host: destination.com', self.conn.sock.data) - - # This test should be removed when CONNECT gets the HTTP/1.1 blessing - self.assertNotIn(b'Host: proxy.com', self.conn.sock.data) + self.assertIn(expected, self.conn.sock.data) def test_tunnel_connect_single_send_connection_setup(self): """Regresstion test for https://bugs.python.org/issue43332.""" @@ -2253,12 +2363,19 @@ def test_tunnel_connect_single_send_connection_setup(self): msg=f'unexpected proxy data sent {proxy_setup_data_sent!r}') def test_connect_put_request(self): - self.conn.set_tunnel('destination.com') + d = { + b'host': b'destination.com', + b'port': client.HTTP_PORT, + } + self.conn.set_tunnel(d[b'host'].decode('ascii')) self.conn.request('PUT', '/', '') self.assertEqual(self.conn.sock.host, self.host) - self.assertEqual(self.conn.sock.port, client.HTTP_PORT) - self.assertIn(b'CONNECT destination.com', self.conn.sock.data) - self.assertIn(b'Host: destination.com', self.conn.sock.data) + self.assertEqual(self.conn.sock.port, self.port) + self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n' + b'Host: %(host)s:%(port)d\r\n\r\n' % d, + self.conn.sock.data) + self.assertIn(b'PUT / HTTP/1.1\r\nHost: %(host)s\r\n' % d, + self.conn.sock.data) def test_tunnel_debuglog(self): expected_header = 'X-Dummy: 1' diff --git a/Misc/ACKS b/Misc/ACKS index 8cf5166a2bb1f4..929e06a87cb794 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -693,6 +693,7 @@ Anders Hammarquist Mark Hammond Harald Hanche-Olsen Manus Hand +Michael Handler Andreas Hangauer Milton L. Hankins Carl Bordum Hansen diff --git a/Misc/NEWS.d/next/Library/2018-07-16-14-10-29.bpo-22708.592iRR.rst b/Misc/NEWS.d/next/Library/2018-07-16-14-10-29.bpo-22708.592iRR.rst new file mode 100644 index 00000000000000..00bcf38bbcdf51 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-07-16-14-10-29.bpo-22708.592iRR.rst @@ -0,0 +1,3 @@ +http.client CONNECT method tunnel improvements: Use HTTP 1.1 protocol; send +a matching Host: header with CONNECT, if one is not provided; convert IDN +domain names to Punycode. Patch by Michael Handler.