Skip to content

Commit 1f4ad7f

Browse files
martinrodeclaude
andcommitted
stream PrintToPDF output instead of buffering it inline
Large PDF exports failed with chromium's DevTools error "Too large write data is pending: size=... max_buffer_size=268435456". Page.printToPDF returned the whole document in a single protocol message, which overflows chromium's 256 MB remote-debugging buffer once an export grows big enough (reported at ~250 objects, ~323 MB rendered). Request the PDF with transferMode "ReturnAsStream" and copy it to stdout in chunks via IO.read, so neither chromium's buffer nor the plugin process ever holds the whole document at once. Falls back to the inline data if no stream handle is returned (older chromium ignoring transferMode). While touching these returns, migrate the file off github.com/pkg/errors to stdlib fmt.Errorf("...: %w", err). see #79887 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 516dc4c commit 1f4ad7f

3 files changed

Lines changed: 49 additions & 25 deletions

File tree

html2pdf/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ go 1.20
44

55
require (
66
github.com/mafredri/cdp v0.34.1
7-
github.com/pkg/errors v0.9.1
87
github.com/programmfabrik/golib v0.0.0-20230503055439-c28ab3558111
98
)
109

@@ -19,6 +18,7 @@ require (
1918
github.com/gorilla/websocket v1.5.0 // indirect
2019
github.com/klauspost/compress v1.15.11 // indirect
2120
github.com/logrusorgru/aurora v2.0.3+incompatible // indirect
21+
github.com/pkg/errors v0.9.1 // indirect
2222
github.com/stretchr/testify v1.9.0 // indirect
2323
github.com/yuin/goldmark v1.7.4 // indirect
2424
golang.org/x/crypto v0.26.0 // indirect

html2pdf/main.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,19 +95,17 @@ func main() {
9595

9696
log.Printf("wrote %d bytes into temp file %s", len(body.Document), f.Name())
9797

98-
data, err := createPdf(ctx, "file://"+f.Name(), port, body.Properties)
98+
// Stream the rendered PDF straight to stdout (see createPdf): the
99+
// document never has to be buffered whole, so large exports no longer
100+
// overflow chromium's DevTools buffer or this process's memory.
101+
n, err := createPdf(ctx, "file://"+f.Name(), port, body.Properties, os.Stdout)
99102
if err != nil {
100103
endWithError(err)
101104
return
102105
}
103106

104107
tookProduce := time.Since(timeProduce)
105108

106-
n, err := os.Stdout.Write(data)
107-
if err != nil {
108-
endWithError(err)
109-
return
110-
}
111109
sendEvent(event{Type: "SERVER_PDF_GENERATE", Info: map[string]any{
112110
"time startup": tookStartup.String(),
113111
"time pdf produce": tookProduce.String(),

html2pdf/pdf.go

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@ package main
33
import (
44
"context"
55
"fmt"
6+
"io"
67
"net/url"
78
"strconv"
89

9-
"github.com/pkg/errors"
10-
1110
"github.com/mafredri/cdp"
1211
"github.com/mafredri/cdp/devtool"
1312
"github.com/mafredri/cdp/protocol/network"
@@ -23,7 +22,7 @@ type pdfCreatorBody struct {
2322
Properties page.PrintToPDFArgs `json:"properties"`
2423
}
2524

26-
func createPdf(ectx context.Context, urlToRender string, port int, printToPDFArgs page.PrintToPDFArgs) ([]byte, error) {
25+
func createPdf(ectx context.Context, urlToRender string, port int, printToPDFArgs page.PrintToPDFArgs, w io.Writer) (int64, error) {
2726
ctx, cancel := context.WithCancel(ectx)
2827
defer func() {
2928
// Ensure to all executions on context close
@@ -38,21 +37,21 @@ func createPdf(ectx context.Context, urlToRender string, port int, printToPDFArg
3837
// Use the DevTools API to manage targets
3938
pt, err := devtool.New(devtoolUrl.String()).Version(ctx)
4039
if err != nil {
41-
return nil, errors.Wrap(err, "Tried to connect to headless chrome. Could not create new devtool connection")
40+
return 0, fmt.Errorf("Tried to connect to headless chrome. Could not create new devtool connection: %w", err)
4241
}
4342

4443
// Open a new RPC connection to the Chrome Debugging Protocol target
4544
conn, err := rpcc.DialContext(ctx, pt.WebSocketDebuggerURL)
4645
if err != nil {
47-
return nil, errors.Wrap(err, "Tried to connect to headless chrome. Could not create DialContext")
46+
return 0, fmt.Errorf("Tried to connect to headless chrome. Could not create DialContext: %w", err)
4847
}
4948
defer conn.Close()
5049

5150
// Create new browser context
5251
baseBrowser := cdp.NewClient(conn)
5352
err = baseBrowser.Security.SetIgnoreCertificateErrors(ctx, &security.SetIgnoreCertificateErrorsArgs{Ignore: true})
5453
if err != nil {
55-
return nil, errors.Wrap(err, "Could not set ignore certificate error to true")
54+
return 0, fmt.Errorf("Could not set ignore certificate error to true: %w", err)
5655
}
5756
newContextTarget, err := baseBrowser.Target.CreateBrowserContext(ctx, &target.CreateBrowserContextArgs{})
5857
if err != nil {
@@ -64,7 +63,7 @@ func createPdf(ectx context.Context, urlToRender string, port int, printToPDFArg
6463
newTargetArgs := target.NewCreateTargetArgs("about:blank").SetBrowserContextID(newContextTarget.BrowserContextID)
6564
newTarget, err := baseBrowser.Target.CreateTarget(ctx, newTargetArgs)
6665
if err != nil {
67-
return nil, errors.Wrap(err, "Could not open new blank target")
66+
return 0, fmt.Errorf("Could not open new blank target: %w", err)
6867
}
6968
defer baseBrowser.Target.CloseTarget(context.Background(), &target.CloseTargetArgs{TargetID: newTarget.TargetID})
7069

@@ -73,7 +72,7 @@ func createPdf(ectx context.Context, urlToRender string, port int, printToPDFArg
7372
devtoolUrl.Path = fmt.Sprintf("/devtools/page/%s", newTarget.TargetID)
7473
newContextConn, err := rpcc.DialContext(ctx, devtoolUrl.String(), rpcc.WithWriteBufferSize(104857586), rpcc.WithCompression())
7574
if err != nil {
76-
return nil, errors.Wrap(err, "Could not create dial context to new target")
75+
return 0, fmt.Errorf("Could not create dial context to new target: %w", err)
7776
}
7877
defer newContextConn.Close()
7978
c := cdp.NewClient(newContextConn)
@@ -86,34 +85,34 @@ func createPdf(ectx context.Context, urlToRender string, port int, printToPDFArg
8685
// Enable the runtime
8786
err = c.Runtime.Enable(ctx)
8887
if err != nil {
89-
return nil, errors.Wrap(err, "Could not enable runtime")
88+
return 0, fmt.Errorf("Could not enable runtime: %w", err)
9089
}
9190

9291
// Enable the network
9392
err = c.Network.Enable(ctx, network.NewEnableArgs())
9493
if err != nil {
95-
return nil, errors.Wrap(err, "Could not enable network")
94+
return 0, fmt.Errorf("Could not enable network: %w", err)
9695
}
9796

9897
// Enable events on the Page domain
9998
err = c.Page.Enable(ctx)
10099
if err != nil {
101-
return nil, errors.Wrap(err, "Could not enable events")
100+
return 0, fmt.Errorf("Could not enable events: %w", err)
102101
}
103102

104103
// CSP bypass
105104
err = c.Page.SetBypassCSP(ctx, &page.SetBypassCSPArgs{Enabled: true})
106105
if err != nil {
107-
return nil, errors.Wrap(err, "Could not set bypass CSP to enabled")
106+
return 0, fmt.Errorf("Could not set bypass CSP to enabled: %w", err)
108107
}
109108

110109
// Listen for lifecycle events, which include loading external resources (CSS, JS, etc)
111110
if err := c.Page.SetLifecycleEventsEnabled(ctx, page.NewSetLifecycleEventsEnabledArgs(true)); err != nil {
112-
return nil, errors.Wrap(err, "Could not enable lifecycle events")
111+
return 0, fmt.Errorf("Could not enable lifecycle events: %w", err)
113112
}
114113
lifecycleEvent, err := c.Page.LifecycleEvent(ctx)
115114
if err != nil {
116-
return nil, errors.Wrap(err, "Could not create lifecycle events listener")
115+
return 0, fmt.Errorf("Could not create lifecycle events listener: %w", err)
117116
}
118117
defer lifecycleEvent.Close()
119118

@@ -129,14 +128,14 @@ func createPdf(ectx context.Context, urlToRender string, port int, printToPDFArg
129128
navArgs := page.NewNavigateArgs(urlToRender)
130129
_, err = c.Page.Navigate(ctx, navArgs)
131130
if err != nil {
132-
return nil, errors.Wrap(err, "Could not navigate to page")
131+
return 0, fmt.Errorf("Could not navigate to page: %w", err)
133132
}
134133

135134
// Wait for lifecycle events to finish
136135
for {
137136
ev, err := lifecycleEvent.Recv()
138137
if err != nil {
139-
return nil, errors.Wrap(err, "Could not wait for finish lifecycle events")
138+
return 0, fmt.Errorf("Could not wait for finish lifecycle events: %w", err)
140139
}
141140
if ev.Name == "networkIdle" {
142141
break
@@ -149,9 +148,36 @@ func createPdf(ectx context.Context, urlToRender string, port int, printToPDFArg
149148
// return nil, errors.Wrap(err, "Could not wait for finish loading event")
150149
// }
151150

151+
// Request the PDF as a stream instead of inline. PrintToPDF otherwise
152+
// returns the whole document in a single DevTools message, which
153+
// overflows chromium's remote-debugging buffer (max_buffer_size, 256 MB)
154+
// for large exports and fails the entire job (see ticket 79887). As a
155+
// stream the PDF is read back in small chunks via IO.read and copied
156+
// straight to w, so neither chromium's buffer nor this process ever holds
157+
// the full document at once.
158+
printToPDFArgs.SetTransferMode("ReturnAsStream")
159+
152160
printDa, err := c.Page.PrintToPDF(ctx, &printToPDFArgs)
153161
if err != nil {
154-
return nil, errors.Wrap(err, "Could not print to pdf")
162+
return 0, fmt.Errorf("Could not print to pdf: %w", err)
163+
}
164+
165+
if printDa.Stream == nil {
166+
// No stream handle (e.g. an older chromium ignoring transferMode):
167+
// fall back to the inline data.
168+
n, err := w.Write(printDa.Data)
169+
if err != nil {
170+
return int64(n), fmt.Errorf("Could not write inline pdf data: %w", err)
171+
}
172+
return int64(n), nil
173+
}
174+
175+
sr := c.NewIOStreamReader(ctx, *printDa.Stream)
176+
defer sr.Close()
177+
178+
n, err := io.Copy(w, sr)
179+
if err != nil {
180+
return n, fmt.Errorf("Could not read pdf stream: %w", err)
155181
}
156-
return printDa.Data, nil
182+
return n, nil
157183
}

0 commit comments

Comments
 (0)