Skip to content

Commit

Permalink
Replace quadratic algo in LineDecoder
Browse files Browse the repository at this point in the history
Leading to enormous speedups when doing things such as
Response(...).iter_lines() as described on issue encode#2422
  • Loading branch information
giannitedesco committed Nov 4, 2022
1 parent 9e97d7d commit 7b928cb
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 54 deletions.
57 changes: 17 additions & 40 deletions httpx/_decoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,53 +266,30 @@ def __init__(self) -> None:
self.buffer = ""

def decode(self, text: str) -> typing.List[str]:
lines = []

if text and self.buffer and self.buffer[-1] == "\r":
if text.startswith("\n"):
# Handle the case where we have an "\r\n" split across
# our previous input, and our new chunk.
lines.append(self.buffer[:-1] + "\n")
self.buffer = ""
text = text[1:]
else:
# Handle the case where we have "\r" at the end of our
# previous input.
lines.append(self.buffer[:-1] + "\n")
self.buffer = ""

while text:
num_chars = len(text)
for idx in range(num_chars):
char = text[idx]
next_char = None if idx + 1 == num_chars else text[idx + 1]
if char == "\n":
lines.append(self.buffer + text[: idx + 1])
self.buffer = ""
text = text[idx + 1 :]
break
elif char == "\r" and next_char == "\n":
lines.append(self.buffer + text[:idx] + "\n")
self.buffer = ""
text = text[idx + 2 :]
break
elif char == "\r" and next_char is not None:
lines.append(self.buffer + text[:idx] + "\n")
self.buffer = ""
text = text[idx + 1 :]
break
elif next_char is None:
self.buffer += text
text = ""
break
if self.buffer:
text = self.buffer + text

if not text:
return []

lines = text.splitlines()
if text.endswith("\n"):
self.buffer = ""
else:
remainder = lines.pop()
if text.endswith("\r"):
#if lines:
# remainder = lines.pop() + "\n" + remainder
remainder += "\r"
self.buffer = remainder

return lines

def flush(self) -> typing.List[str]:
if self.buffer.endswith("\r"):
# Handle the case where we had a trailing '\r', which could have
# been a '\r\n' pair.
lines = [self.buffer[:-1] + "\n"]
lines = [self.buffer[:-1]]
elif self.buffer:
lines = [self.buffer]
else:
Expand Down
28 changes: 14 additions & 14 deletions tests/test_decoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,69 +236,69 @@ def test_text_decoder_empty_cases():
def test_line_decoder_nl():
decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\n\nb\nc") == ["a\n", "\n", "b\n"]
assert decoder.decode("a\n\nb\nc") == ["a", "", "b"]
assert decoder.flush() == ["c"]

decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\n\nb\nc\n") == ["a\n", "\n", "b\n", "c\n"]
assert decoder.decode("a\n\nb\nc\n") == ["a", "", "b", "c"]
assert decoder.flush() == []

# Issue #1033
decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("12345\n") == ["12345\n"]
assert decoder.decode("12345\n") == ["12345"]
assert decoder.decode("foo ") == []
assert decoder.decode("bar ") == []
assert decoder.decode("baz\n") == ["foo bar baz\n"]
assert decoder.decode("baz\n") == ["foo bar baz"]
assert decoder.flush() == []


def test_line_decoder_cr():
decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\r\rb\rc") == ["a\n", "\n", "b\n"]
assert decoder.decode("a\r\rb\rc") == ["a", "", "b"]
assert decoder.flush() == ["c"]

decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\r\rb\rc\r") == ["a\n", "\n", "b\n"]
assert decoder.flush() == ["c\n"]
assert decoder.decode("a\r\rb\rc\r") == ["a", "", "b"]
assert decoder.flush() == ["c"]

# Issue #1033
decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("12345\r") == []
assert decoder.decode("foo ") == ["12345\n"]
assert decoder.decode("foo ") == ["12345"]
assert decoder.decode("bar ") == []
assert decoder.decode("baz\r") == []
assert decoder.flush() == ["foo bar baz\n"]
assert decoder.flush() == ["foo bar baz"]


def test_line_decoder_crnl():
decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\r\n\r\nb\r\nc") == ["a\n", "\n", "b\n"]
assert decoder.decode("a\r\n\r\nb\r\nc") == ["a", "", "b"]
assert decoder.flush() == ["c"]

decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\r\n\r\nb\r\nc\r\n") == ["a\n", "\n", "b\n", "c\n"]
assert decoder.decode("a\r\n\r\nb\r\nc\r\n") == ["a", "", "b", "c"]
assert decoder.flush() == []

decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\r") == []
assert decoder.decode("\n\r\nb\r\nc") == ["a\n", "\n", "b\n"]
assert decoder.decode("\n\r\nb\r\nc") == ["a", "", "b"]
assert decoder.flush() == ["c"]

# Issue #1033
decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("12345\r\n") == ["12345\n"]
assert decoder.decode("12345\r\n") == ["12345"]
assert decoder.decode("foo ") == []
assert decoder.decode("bar ") == []
assert decoder.decode("baz\r\n") == ["foo bar baz\n"]
assert decoder.decode("baz\r\n") == ["foo bar baz"]
assert decoder.flush() == []


Expand Down

0 comments on commit 7b928cb

Please sign in to comment.