Skip to content

Commit

Permalink
Fix #398 HTTP/1.0 related issue
Browse files Browse the repository at this point in the history
  • Loading branch information
abhinavsingh committed Nov 12, 2021
1 parent 5a5a1bb commit 14af159
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 32 deletions.
1 change: 1 addition & 0 deletions proxy/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def _env_threadless_compliant() -> bool:
COMMA = b','
DOT = b'.'
SLASH = b'/'
HTTP_1_0 = b'HTTP/1.0'
HTTP_1_1 = b'HTTP/1.1'

PROXY_AGENT_HEADER_KEY = b'Proxy-agent'
Expand Down
70 changes: 46 additions & 24 deletions proxy/http/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"""
from typing import TypeVar, NamedTuple, Optional, Dict, Type, Tuple, List

from ...common.constants import DEFAULT_DISABLE_HEADERS, COLON, SLASH, CRLF
from ...common.constants import DEFAULT_DISABLE_HEADERS, COLON, HTTP_1_0, SLASH, CRLF
from ...common.constants import WHITESPACE, HTTP_1_1, DEFAULT_HTTP_PORT
from ...common.utils import build_http_request, build_http_response, find_http_line, text_

Expand Down Expand Up @@ -162,20 +162,26 @@ def body_expected(self) -> bool:
"""Returns true if content or chunked response is expected."""
return self.content_expected() or self.is_chunked_encoded()

def parse(self, raw: bytes) -> None:
def parse(self, oraw: bytes) -> None:
"""Parses Http request out of raw bytes.
Check for `HttpParser.state` after `parse` has successfully returned.
"""
self.total_size += len(raw)
raw = self.buffer + raw
self.buffer, more = b'', len(raw) > 0
while more and self.state != httpParserStates.COMPLETE:
# gte with HEADERS_COMPLETE also encapsulated RCVING_BODY state
more, raw = self._process_body(raw) \
if self.state >= httpParserStates.HEADERS_COMPLETE else \
self._process_line_and_headers(raw)
self.buffer = raw
try:
self.total_size += len(oraw)
raw = self.buffer + oraw
self.buffer, more = b'', len(raw) > 0
while more and self.state != httpParserStates.COMPLETE:
# gte with HEADERS_COMPLETE also encapsulated RCVING_BODY state
more, raw = self._process_body(raw) \
if self.state >= httpParserStates.HEADERS_COMPLETE else \
self._process_line_and_headers(raw)
self.buffer = raw
except Exception as e:
print(self.buffer)
print(oraw)
print(self.headers)
raise e

def build(self, disable_headers: Optional[List[bytes]] = None, for_proxy: bool = False) -> bytes:
"""Rebuild the request object."""
Expand Down Expand Up @@ -217,7 +223,25 @@ def build_response(self) -> bytes:
)

def _process_body(self, raw: bytes) -> Tuple[bool, bytes]:
if b'content-length' in self.headers:
# Ref: http://www.ietf.org/rfc/rfc2616.txt
# 3.If a Content-Length header field (section 14.13) is present, its
# decimal value in OCTETs represents both the entity-length and the
# transfer-length. The Content-Length header field MUST NOT be sent
# if these two lengths are different (i.e., if a Transfer-Encoding
# header field is present). If a message is received with both a
# Transfer-Encoding header field and a Content-Length header field,
# the latter MUST be ignored.
#
# TL;DR -- Give transfer-encoding header preference over content-length.
if self.is_chunked_encoded():
if not self.chunk:
self.chunk = ChunkParser()
raw = self.chunk.parse(raw)
if self.chunk.state == chunkParserStates.COMPLETE:
self.body = self.chunk.body
self.state = httpParserStates.COMPLETE
more = False
elif b'content-length' in self.headers:
self.state = httpParserStates.RCVING_BODY
if self.body is None:
self.body = b''
Expand All @@ -228,19 +252,17 @@ def _process_body(self, raw: bytes) -> Tuple[bool, bytes]:
len(self.body) == int(self.header(b'content-length')):
self.state = httpParserStates.COMPLETE
more, raw = len(raw) > 0, raw[total_size - received_size:]
elif self.is_chunked_encoded():
if not self.chunk:
self.chunk = ChunkParser()
raw = self.chunk.parse(raw)
if self.chunk.state == chunkParserStates.COMPLETE:
self.body = self.chunk.body
self.state = httpParserStates.COMPLETE
more = False
else:
raise NotImplementedError(
'Parser shouldn\'t have reached here. ' +
'This can happen when content length header is missing but their is a body in the payload',
)
# HTTP/1.0 scenario only
assert self.version == HTTP_1_0
self.state = httpParserStates.RCVING_BODY
# Received a packet without content-length header
# and no transfer-encoding specified.
#
# Ref https://github.com/abhinavsingh/proxy.py/issues/398
# See TestHttpParser.test_issue_398 scenario
self.body = raw
more, raw = False, b''
return more, raw

def _process_line_and_headers(self, raw: bytes) -> Tuple[bool, bytes]:
Expand Down
31 changes: 23 additions & 8 deletions tests/http/test_http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"""
import unittest

from proxy.common.constants import CRLF
from proxy.common.constants import CRLF, HTTP_1_0
from proxy.common.utils import build_http_request, build_http_response, build_http_header
from proxy.common.utils import find_http_line, bytes_
from proxy.http.parser import HttpParser, httpParserTypes, httpParserStates, httpStatusCodes, httpMethods
Expand All @@ -21,6 +21,28 @@ class TestHttpParser(unittest.TestCase):
def setUp(self) -> None:
self.parser = HttpParser(httpParserTypes.REQUEST_PARSER)

def test_issue_398(self) -> None:
p = HttpParser(httpParserTypes.RESPONSE_PARSER)
p.parse(HTTP_1_0 + b' 200 OK' + CRLF)
self.assertEqual(p.version, HTTP_1_0)
self.assertEqual(p.code, b'200')
self.assertEqual(p.reason, b'OK')
self.assertEqual(p.state, httpParserStates.LINE_RCVD)
p.parse(
b'CP=CAO PSA OUR' + CRLF +
b'Cache-Control:private,max-age=0;' + CRLF +
b'X-Frame-Options:SAMEORIGIN' + CRLF +
b'X-Content-Type-Options:nosniff' + CRLF +
b'X-XSS-Protection:1; mode=block' + CRLF +
b'Content-Security-Policy:default-src \'self\' \'unsafe-inline\' \'unsafe-eval\'' + CRLF +
b'Strict-Transport-Security:max-age=2592000; includeSubdomains' + CRLF +
b'Set-Cookie: lang=eng; path=/;HttpOnly;' + CRLF +
b'Content-type:text/html;charset=UTF-8;' + CRLF + CRLF +
b'<!-- HTML RESPONSE HERE -->',
)
self.assertEqual(p.body, b'<!-- HTML RESPONSE HERE -->')
self.assertEqual(p.state, httpParserStates.RCVING_BODY)

def test_urlparse(self) -> None:
self.parser.parse(b'CONNECT httpbin.org:443 HTTP/1.1\r\n')
self.assertTrue(self.parser.is_https_tunnel())
Expand Down Expand Up @@ -695,10 +717,3 @@ def test_response_factory(self) -> None:
self.assertEqual(r.code, b'200')
self.assertEqual(r.reason, b'OK')
self.assertEqual(r.header(b'key'), b'value')

def test_parser_shouldnt_have_reached_here(self) -> None:
with self.assertRaises(NotImplementedError):
HttpParser.request(
b'POST http://localhost:12345 HTTP/1.1' + CRLF +
b'key: value' + CRLF + CRLF + b'Hello from py',
)

0 comments on commit 14af159

Please sign in to comment.