Skip to content

Commit 12bb16d

Browse files
FarhanAliRazahuoyinghui
authored andcommitted
Fixed #36293 -- Avoided buffering streaming responses in GZipMiddleware.
This avoids latency and/or blocking. The example of streaming a CSV file was rewritten to employ batching for greater efficiency in all layers (db, HTTP, etc.). The improved performance from batching should outweigh the drag introduced by an additional byte for each flush. Co-authored-by: huoyinghui <huoyinghui@users.noreply.github.com>
1 parent 787166f commit 12bb16d

4 files changed

Lines changed: 56 additions & 10 deletions

File tree

django/utils/text.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,7 @@ def compress_sequence(sequence, *, max_random_bytes=None):
382382
yield buf.read()
383383
for item in sequence:
384384
zfile.write(item)
385+
zfile.flush()
385386
data = buf.read()
386387
if data:
387388
yield data
@@ -398,6 +399,7 @@ async def acompress_sequence(sequence, *, max_random_bytes=None):
398399
yield buf.read()
399400
async for item in sequence:
400401
zfile.write(item)
402+
zfile.flush()
401403
data = buf.read()
402404
if data:
403405
yield data

docs/howto/outputting-csv.txt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,12 @@ avoid a load balancer dropping a connection that might have otherwise timed out
6767
while the server was generating the response.
6868

6969
In this example, we make full use of Python generators to efficiently handle
70-
the assembly and transmission of a large CSV file::
70+
the assembly and transmission of a large CSV file. Rows are batched together
71+
to reduce HTTP overhead and improve compression efficiency when used with
72+
:class:`~django.middleware.gzip.GZipMiddleware`::
7173

7274
import csv
75+
from itertools import batched
7376

7477
from django.http import StreamingHttpResponse
7578

@@ -92,8 +95,13 @@ the assembly and transmission of a large CSV file::
9295
rows = (["Row {}".format(idx), str(idx)] for idx in range(65536))
9396
pseudo_buffer = Echo()
9497
writer = csv.writer(pseudo_buffer)
98+
99+
def stream_batched_rows():
100+
for batch in batched(rows, 100):
101+
yield "".join(writer.writerow(row) for row in batch)
102+
95103
return StreamingHttpResponse(
96-
(writer.writerow(row) for row in rows),
104+
stream_batched_rows(),
97105
content_type="text/csv",
98106
headers={"Content-Disposition": 'attachment; filename="somefilename.csv"'},
99107
)

tests/decorators/test_gzip.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from inspect import iscoroutinefunction
22

3-
from django.http import HttpRequest, HttpResponse
3+
from django.http import HttpRequest, HttpResponse, StreamingHttpResponse
44
from django.test import SimpleTestCase
55
from django.views.decorators.gzip import gzip_page
66

@@ -44,3 +44,33 @@ async def async_view(request):
4444
response = await async_view(request)
4545
self.assertEqual(response.status_code, 200)
4646
self.assertEqual(response.get("Content-Encoding"), "gzip")
47+
48+
def test_streaming_response_yields_chunks_incrementally(self):
49+
@gzip_page
50+
def stream_view(request):
51+
return StreamingHttpResponse(self.content.encode() for _ in range(5))
52+
53+
request = HttpRequest()
54+
request.META["HTTP_ACCEPT_ENCODING"] = "gzip"
55+
response = stream_view(request)
56+
compressed_chunks = list(response)
57+
# Each input chunk should produce compressed output, not buffer
58+
# everything into a single chunk.
59+
self.assertGreater(len(compressed_chunks), 2)
60+
61+
async def test_async_streaming_response_yields_chunks_incrementally(self):
62+
@gzip_page
63+
async def stream_view(request):
64+
async def content():
65+
for _ in range(5):
66+
yield self.content.encode()
67+
68+
return StreamingHttpResponse(content())
69+
70+
request = HttpRequest()
71+
request.META["HTTP_ACCEPT_ENCODING"] = "gzip"
72+
response = await stream_view(request)
73+
compressed_chunks = [chunk async for chunk in response]
74+
# Each input chunk should produce compressed output, not buffer
75+
# everything into a single chunk.
76+
self.assertGreater(len(compressed_chunks), 2)

tests/utils_tests/test_text.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import gzip
12
import json
23
import sys
34

@@ -404,13 +405,18 @@ def test_get_valid_filename(self):
404405
text.get_valid_filename("$.$.$")
405406

406407
def test_compress_sequence(self):
407-
data = [{"key": i} for i in range(10)]
408-
seq = list(json.JSONEncoder().iterencode(data))
409-
seq = [s.encode() for s in seq]
410-
actual_length = len(b"".join(seq))
411-
out = text.compress_sequence(seq)
412-
compressed_length = len(b"".join(out))
413-
self.assertLess(compressed_length, actual_length)
408+
data = [{"key": i} for i in range(100)]
409+
seq = [s.encode() for s in json.JSONEncoder().iterencode(data)]
410+
original = b"".join(seq)
411+
batch_size = 256
412+
batched_seq = (
413+
original[i : i + batch_size] for i in range(0, len(original), batch_size)
414+
)
415+
compressed_chunks = list(text.compress_sequence(batched_seq))
416+
out = b"".join(compressed_chunks)
417+
self.assertEqual(gzip.decompress(out), original)
418+
self.assertLess(len(out), len(original))
419+
self.assertGreater(len(compressed_chunks), 2)
414420

415421
def test_format_lazy(self):
416422
self.assertEqual("django/test", format_lazy("{}/{}", "django", lazystr("test")))

0 commit comments

Comments
 (0)