Skip to content

Commit

Permalink
Read chunked HTTP responses.
Browse files Browse the repository at this point in the history
Fix #1550.
  • Loading branch information
aaugustin committed Jan 4, 2025
1 parent 197b3ec commit df1e681
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 79 deletions.
2 changes: 1 addition & 1 deletion src/websockets/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def parse(self) -> Generator[None]:
self.logger.debug("< HTTP/1.1 %d %s", code, phrase)
for key, value in response.headers.raw_items():
self.logger.debug("< %s: %s", key, value)
if response.body is not None:
if response.body:
self.logger.debug("< [body] (%d bytes)", len(response.body))

try:
Expand Down
145 changes: 89 additions & 56 deletions src/websockets/http11.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,14 +180,14 @@ class Response:
status_code: Response code.
reason_phrase: Response reason.
headers: Response headers.
body: Response body, if any.
body: Response body.
"""

status_code: int
reason_phrase: str
headers: Headers
body: bytes | None = None
body: bytes = b""

_exception: Exception | None = None

Expand Down Expand Up @@ -261,36 +261,9 @@ def parse(

headers = yield from parse_headers(read_line)

# https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3

if "Transfer-Encoding" in headers:
raise NotImplementedError("transfer codings aren't supported")

# Since websockets only does GET requests (no HEAD, no CONNECT), all
# responses except 1xx, 204, and 304 include a message body.
if 100 <= status_code < 200 or status_code == 204 or status_code == 304:
body = None
else:
content_length: int | None
try:
# MultipleValuesError is sufficiently unlikely that we don't
# attempt to handle it. Instead we document that its parent
# class, LookupError, may be raised.
raw_content_length = headers["Content-Length"]
except KeyError:
content_length = None
else:
content_length = int(raw_content_length)

if content_length is None:
try:
body = yield from read_to_eof(MAX_BODY_SIZE)
except RuntimeError:
raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes")
elif content_length > MAX_BODY_SIZE:
raise SecurityError(f"body too large: {content_length} bytes")
else:
body = yield from read_exact(content_length)
body = yield from read_body(
status_code, headers, read_line, read_exact, read_to_eof
)

return cls(status_code, reason, headers, body)

Expand All @@ -303,11 +276,37 @@ def serialize(self) -> bytes:
# we can keep this simple.
response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode()
response += self.headers.serialize()
if self.body is not None:
response += self.body
response += self.body
return response


def parse_line(
read_line: Callable[[int], Generator[None, None, bytes]],
) -> Generator[None, None, bytes]:
"""
Parse a single line.
CRLF is stripped from the return value.
Args:
read_line: Generator-based coroutine that reads a LF-terminated line
or raises an exception if there isn't enough data.
Raises:
EOFError: If the connection is closed without a CRLF.
SecurityError: If the response exceeds a security limit.
"""
try:
line = yield from read_line(MAX_LINE_LENGTH)
except RuntimeError:
raise SecurityError("line too long")
# Not mandatory but safe - https://datatracker.ietf.org/doc/html/rfc7230#section-3.5
if not line.endswith(b"\r\n"):
raise EOFError("line without CRLF")
return line[:-2]


def parse_headers(
read_line: Callable[[int], Generator[None, None, bytes]],
) -> Generator[None, None, Headers]:
Expand Down Expand Up @@ -359,28 +358,62 @@ def parse_headers(
return headers


def parse_line(
def read_body(
status_code: int,
headers: Headers,
read_line: Callable[[int], Generator[None, None, bytes]],
read_exact: Callable[[int], Generator[None, None, bytes]],
read_to_eof: Callable[[int], Generator[None, None, bytes]],
) -> Generator[None, None, bytes]:
"""
Parse a single line.
CRLF is stripped from the return value.
Args:
read_line: Generator-based coroutine that reads a LF-terminated line
or raises an exception if there isn't enough data.
Raises:
EOFError: If the connection is closed without a CRLF.
SecurityError: If the response exceeds a security limit.
# https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3

# Since websockets only does GET requests (no HEAD, no CONNECT), all
# responses except 1xx, 204, and 304 include a message body.
if 100 <= status_code < 200 or status_code == 204 or status_code == 304:
return b""

# MultipleValuesError is sufficiently unlikely that we don't attempt to
# handle it when accessing headers. Instead we document that its parent
# class, LookupError, may be raised.
# Conversions from str to int are protected by sys.set_int_max_str_digits..

elif (coding := headers.get("Transfer-Encoding")) is not None:
if coding != "chunked":
raise NotImplementedError(f"transfer coding {coding} isn't supported")

body = b""
while True:
chunk_size_line = yield from parse_line(read_line)
raw_chunk_size = chunk_size_line.split(b";", 1)[0]
# Set a lower limit than default_max_str_digits; 1 EB is plenty.
if len(raw_chunk_size) > 15:
str_chunk_size = raw_chunk_size.decode(errors="backslashreplace")
raise SecurityError(f"chunk too large: 0x{str_chunk_size} bytes")
chunk_size = int(raw_chunk_size, 16)
if chunk_size == 0:
break
if len(body) + chunk_size > MAX_BODY_SIZE:
raise SecurityError(
f"chunk too large: {chunk_size} bytes after {len(body)} bytes"
)
body += yield from read_exact(chunk_size)
if (yield from read_exact(2)) != b"\r\n":
raise ValueError("chunk without CRLF")
# Read the trailer.
yield from parse_headers(read_line)
return body

elif (raw_content_length := headers.get("Content-Length")) is not None:
# Set a lower limit than default_max_str_digits; 1 EiB is plenty.
if len(raw_content_length) > 18:
raise SecurityError(f"body too large: {raw_content_length} bytes")
content_length = int(raw_content_length)
if content_length > MAX_BODY_SIZE:
raise SecurityError(f"body too large: {content_length} bytes")
return (yield from read_exact(content_length))

"""
try:
line = yield from read_line(MAX_LINE_LENGTH)
except RuntimeError:
raise SecurityError("line too long")
# Not mandatory but safe - https://datatracker.ietf.org/doc/html/rfc7230#section-3.5
if not line.endswith(b"\r\n"):
raise EOFError("line without CRLF")
return line[:-2]
else:
try:
return (yield from read_to_eof(MAX_BODY_SIZE))
except RuntimeError:
raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes")
2 changes: 1 addition & 1 deletion src/websockets/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def send_response(self, response: Response) -> None:
self.logger.debug("> HTTP/1.1 %d %s", code, phrase)
for key, value in response.headers.raw_items():
self.logger.debug("> %s: %s", key, value)
if response.body is not None:
if response.body:
self.logger.debug("> [body] (%d bytes)", len(response.body))

self.writes.append(response.serialize())
Expand Down
2 changes: 1 addition & 1 deletion tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def test_receive_successful_response(self, _generate_key):
}
),
)
self.assertIsNone(response.body)
self.assertEqual(response.body, b"")
self.assertIsNone(client.handshake_exc)

def test_receive_failed_response(self, _generate_key):
Expand Down
93 changes: 74 additions & 19 deletions tests/test_http11.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_parse_body(self):
)

def test_parse_body_with_transfer_encoding(self):
self.reader.feed_data(b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n")
self.reader.feed_data(b"GET / HTTP/1.1\r\nTransfer-Encoding: compress\r\n\r\n")
with self.assertRaises(NotImplementedError) as raised:
next(self.parse())
self.assertEqual(
Expand Down Expand Up @@ -151,7 +151,7 @@ def test_parse(self):
self.assertEqual(response.status_code, 101)
self.assertEqual(response.reason_phrase, "Switching Protocols")
self.assertEqual(response.headers["Upgrade"], "websocket")
self.assertIsNone(response.body)
self.assertEqual(response.body, b"")

def test_parse_empty(self):
self.reader.feed_eof()
Expand Down Expand Up @@ -215,22 +215,31 @@ def test_parse_invalid_header(self):
"invalid HTTP header line: Oops",
)

def test_parse_body_with_content_length(self):
self.reader.feed_data(
b"HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\nHello world!\n"
)
response = self.assertGeneratorReturns(self.parse())
self.assertEqual(response.body, b"Hello world!\n")

def test_parse_body_without_content_length(self):
def test_parse_body(self):
self.reader.feed_data(b"HTTP/1.1 200 OK\r\n\r\nHello world!\n")
gen = self.parse()
self.assertGeneratorRunning(gen)
self.reader.feed_eof()
response = self.assertGeneratorReturns(gen)
self.assertEqual(response.body, b"Hello world!\n")

def test_parse_body_with_content_length_too_long(self):
def test_parse_body_too_large(self):
self.reader.feed_data(b"HTTP/1.1 200 OK\r\n\r\n" + b"a" * 1048577)
with self.assertRaises(SecurityError) as raised:
next(self.parse())
self.assertEqual(
str(raised.exception),
"body too large: over 1048576 bytes",
)

def test_parse_body_with_content_length(self):
self.reader.feed_data(
b"HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\nHello world!\n"
)
response = self.assertGeneratorReturns(self.parse())
self.assertEqual(response.body, b"Hello world!\n")

def test_parse_body_with_content_length_and_body_too_large(self):
self.reader.feed_data(b"HTTP/1.1 200 OK\r\nContent-Length: 1048577\r\n\r\n")
with self.assertRaises(SecurityError) as raised:
next(self.parse())
Expand All @@ -239,33 +248,79 @@ def test_parse_body_with_content_length_too_long(self):
"body too large: 1048577 bytes",
)

def test_parse_body_without_content_length_too_long(self):
self.reader.feed_data(b"HTTP/1.1 200 OK\r\n\r\n" + b"a" * 1048577)
def test_parse_body_with_content_length_and_body_way_too_large(self):
self.reader.feed_data(
b"HTTP/1.1 200 OK\r\nContent-Length: 1234567890123456789\r\n\r\n"
)
with self.assertRaises(SecurityError) as raised:
next(self.parse())
self.assertEqual(
str(raised.exception),
"body too large: over 1048576 bytes",
"body too large: 1234567890123456789 bytes",
)

def test_parse_body_with_transfer_encoding(self):
self.reader.feed_data(b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n")
def test_parse_body_with_chunked_transfer_encoding(self):
self.reader.feed_data(
b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n"
b"6\r\nHello \r\n7\r\nworld!\n\r\n0\r\n\r\n"
)
response = self.assertGeneratorReturns(self.parse())
self.assertEqual(response.body, b"Hello world!\n")

def test_parse_body_with_chunked_transfer_encoding_and_chunk_without_crlf(self):
self.reader.feed_data(
b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n"
b"6\r\nHello 7\r\nworld!\n0\r\n"
)
with self.assertRaises(ValueError) as raised:
next(self.parse())
self.assertEqual(
str(raised.exception),
"chunk without CRLF",
)

def test_parse_body_with_chunked_transfer_encoding_and_chunk_too_large(self):
self.reader.feed_data(
b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n"
b"100000\r\n" + b"a" * 1048576 + b"\r\n1\r\na\r\n0\r\n\r\n"
)
with self.assertRaises(SecurityError) as raised:
next(self.parse())
self.assertEqual(
str(raised.exception),
"chunk too large: 1 bytes after 1048576 bytes",
)

def test_parse_body_with_chunked_transfer_encoding_and_chunk_way_too_large(self):
self.reader.feed_data(
b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n"
b"1234567890ABCDEF\r\n\r\n"
)
with self.assertRaises(SecurityError) as raised:
next(self.parse())
self.assertEqual(
str(raised.exception),
"chunk too large: 0x1234567890ABCDEF bytes",
)

def test_parse_body_with_unsupported_transfer_encoding(self):
self.reader.feed_data(b"HTTP/1.1 200 OK\r\nTransfer-Encoding: compress\r\n\r\n")
with self.assertRaises(NotImplementedError) as raised:
next(self.parse())
self.assertEqual(
str(raised.exception),
"transfer codings aren't supported",
"transfer coding compress isn't supported",
)

def test_parse_body_no_content(self):
self.reader.feed_data(b"HTTP/1.1 204 No Content\r\n\r\n")
response = self.assertGeneratorReturns(self.parse())
self.assertIsNone(response.body)
self.assertEqual(response.body, b"")

def test_parse_body_not_modified(self):
self.reader.feed_data(b"HTTP/1.1 304 Not Modified\r\n\r\n")
response = self.assertGeneratorReturns(self.parse())
self.assertIsNone(response.body)
self.assertEqual(response.body, b"")

def test_serialize(self):
# Example from the protocol overview in RFC 6455
Expand Down
2 changes: 1 addition & 1 deletion tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def test_accept_response(self, _formatdate):
}
),
)
self.assertIsNone(response.body)
self.assertEqual(response.body, b"")

@patch("email.utils.formatdate", return_value=DATE)
def test_reject_response(self, _formatdate):
Expand Down

0 comments on commit df1e681

Please sign in to comment.