Skip to content

Commit 4166c0e

Browse files
committed
web: address feedback
Signed-off-by: Peter Gadfort <gadfort@zeroasic.com>
1 parent 9340f4a commit 4166c0e

5 files changed

Lines changed: 164 additions & 68 deletions

File tree

src/web/include/web/web.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ ListenerHandle createAndRunListener(
6060
std::shared_ptr<TimingReport> timing_report,
6161
std::shared_ptr<ClockTreeReport> clock_report,
6262
utl::Logger* logger,
63-
WebViewerHook* viewer_hook);
63+
WebViewerHook* viewer_hook,
64+
int max_in_flight);
6465

6566
// A layout web server. serve() starts the server in background I/O
6667
// threads; waitForStop() blocks the calling thread until requestStop()

src/web/src/web.cpp

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -157,14 +157,18 @@ class WebSocketSession : public std::enable_shared_from_this<WebSocketSession>,
157157
WebViewerHook* viewer_hook_ = nullptr;
158158
std::size_t viewer_token_ = 0;
159159

160+
// In-flight request window announced to the client on connect.
161+
int max_in_flight_ = 16;
162+
160163
public:
161164
WebSocketSession(Tcp::socket&& socket,
162165
std::shared_ptr<TileGenerator> generator,
163166
std::shared_ptr<TclEvaluator> tcl_eval,
164167
std::shared_ptr<TimingReport> timing_report,
165168
std::shared_ptr<ClockTreeReport> clock_report,
166169
utl::Logger* logger,
167-
WebViewerHook* viewer_hook);
170+
WebViewerHook* viewer_hook,
171+
int max_in_flight);
168172
~WebSocketSession();
169173

170174
void run(http::request<http::string_body>&& req);
@@ -223,7 +227,8 @@ WebSocketSession::WebSocketSession(
223227
std::shared_ptr<TimingReport> timing_report,
224228
std::shared_ptr<ClockTreeReport> clock_report,
225229
utl::Logger* logger,
226-
WebViewerHook* viewer_hook)
230+
WebViewerHook* viewer_hook,
231+
int max_in_flight)
227232
: websocket_(std::move(socket)),
228233
logger_(logger),
229234
select_handler_(generator, tcl_eval),
@@ -234,7 +239,8 @@ WebSocketSession::WebSocketSession(
234239
drc_handler_(generator),
235240
strand_(net::make_strand(websocket_.get_executor())),
236241
generator_(std::move(generator)),
237-
viewer_hook_(viewer_hook)
242+
viewer_hook_(viewer_hook),
243+
max_in_flight_(max_in_flight)
238244
{
239245
if (generator_) {
240246
odb::dbChip* chip = generator_->getChip();
@@ -433,25 +439,15 @@ void WebSocketSession::on_accept(beast::error_code ec)
433439

434440
// Tell the client how many requests to keep in flight at once. This bounds
435441
// the client's send rate so a burst of tile requests (rapid pan/zoom) can't
436-
// flood the socket send buffer and wedge the connection. Scale with the
437-
// machine size — a larger box can absorb more concurrency — and clamp to a
438-
// sane range. Sent first so the client has it before requesting any tiles.
442+
// flood the socket send buffer and wedge the connection. The limit is scaled
443+
// to the server's I/O worker count (see WebServer::serve). Sent first so the
444+
// client has it before requesting any tiles.
439445
{
440-
unsigned int cores = std::thread::hardware_concurrency();
441-
if (cores == 0) {
442-
cores = 4; // hardware_concurrency may report 0; pick a modest default
443-
}
444-
int max_in_flight = static_cast<int>(cores) * 4;
445-
if (max_in_flight < 16) {
446-
max_in_flight = 16;
447-
} else if (max_in_flight > 256) {
448-
max_in_flight = 256;
449-
}
450446
WebSocketResponse cfg;
451447
cfg.id = 0;
452448
cfg.type = WebSocketResponse::kJson;
453449
const std::string cfg_json = R"({"type":"config","max_in_flight":)"
454-
+ std::to_string(max_in_flight) + "}";
450+
+ std::to_string(max_in_flight_) + "}";
455451
cfg.payload.assign(cfg_json.begin(), cfg_json.end());
456452
queue_response(cfg);
457453
}
@@ -749,6 +745,7 @@ class DetectSession : public std::enable_shared_from_this<DetectSession>
749745
http::request<http::string_body> req_;
750746
utl::Logger* logger_;
751747
WebViewerHook* viewer_hook_ = nullptr;
748+
int max_in_flight_ = 16;
752749

753750
public:
754751
DetectSession(Tcp::socket&& socket,
@@ -757,7 +754,8 @@ class DetectSession : public std::enable_shared_from_this<DetectSession>
757754
std::shared_ptr<TimingReport> timing_report,
758755
std::shared_ptr<ClockTreeReport> clock_report,
759756
utl::Logger* logger,
760-
WebViewerHook* viewer_hook);
757+
WebViewerHook* viewer_hook,
758+
int max_in_flight);
761759

762760
void run();
763761

@@ -771,14 +769,16 @@ DetectSession::DetectSession(Tcp::socket&& socket,
771769
std::shared_ptr<TimingReport> timing_report,
772770
std::shared_ptr<ClockTreeReport> clock_report,
773771
utl::Logger* logger,
774-
WebViewerHook* viewer_hook)
772+
WebViewerHook* viewer_hook,
773+
int max_in_flight)
775774
: stream_(std::move(socket)),
776775
generator_(std::move(generator)),
777776
tcl_eval_(std::move(tcl_eval)),
778777
timing_report_(std::move(timing_report)),
779778
clock_report_(std::move(clock_report)),
780779
logger_(logger),
781-
viewer_hook_(viewer_hook)
780+
viewer_hook_(viewer_hook),
781+
max_in_flight_(max_in_flight)
782782
{
783783
}
784784

@@ -810,7 +810,8 @@ void DetectSession::on_read(beast::error_code ec)
810810
timing_report_,
811811
clock_report_,
812812
logger_,
813-
viewer_hook_);
813+
viewer_hook_,
814+
max_in_flight_);
814815
websocket_session->run(std::move(req_));
815816
} else {
816817
// Regular HTTP - hand off to session with already-read request
@@ -833,6 +834,7 @@ class Listener : public std::enable_shared_from_this<Listener>
833834
std::shared_ptr<ClockTreeReport> clock_report_;
834835
utl::Logger* logger_;
835836
WebViewerHook* viewer_hook_ = nullptr;
837+
int max_in_flight_ = 16;
836838

837839
public:
838840
Listener(net::io_context& ioc,
@@ -842,7 +844,8 @@ class Listener : public std::enable_shared_from_this<Listener>
842844
std::shared_ptr<TimingReport> timing_report,
843845
std::shared_ptr<ClockTreeReport> clock_report,
844846
utl::Logger* logger,
845-
WebViewerHook* viewer_hook);
847+
WebViewerHook* viewer_hook,
848+
int max_in_flight);
846849

847850
void run() { do_accept(); }
848851

@@ -870,15 +873,17 @@ Listener::Listener(net::io_context& ioc,
870873
std::shared_ptr<TimingReport> timing_report,
871874
std::shared_ptr<ClockTreeReport> clock_report,
872875
utl::Logger* logger,
873-
WebViewerHook* viewer_hook)
876+
WebViewerHook* viewer_hook,
877+
int max_in_flight)
874878
: ioc_(ioc),
875879
acceptor_(ioc),
876880
generator_(std::move(generator)),
877881
tcl_eval_(std::move(tcl_eval)),
878882
timing_report_(std::move(timing_report)),
879883
clock_report_(std::move(clock_report)),
880884
logger_(logger),
881-
viewer_hook_(viewer_hook)
885+
viewer_hook_(viewer_hook),
886+
max_in_flight_(max_in_flight)
882887
{
883888
beast::error_code ec;
884889

@@ -929,7 +934,8 @@ void Listener::on_accept(beast::error_code ec, Tcp::socket socket)
929934
timing_report_,
930935
clock_report_,
931936
logger_,
932-
viewer_hook_)
937+
viewer_hook_,
938+
max_in_flight_)
933939
->run();
934940
}
935941
do_accept();
@@ -1317,7 +1323,8 @@ ListenerHandle createAndRunListener(
13171323
std::shared_ptr<TimingReport> timing_report,
13181324
std::shared_ptr<ClockTreeReport> clock_report,
13191325
utl::Logger* logger,
1320-
WebViewerHook* viewer_hook)
1326+
WebViewerHook* viewer_hook,
1327+
int max_in_flight)
13211328
{
13221329
auto listener = std::make_shared<Listener>(ioc,
13231330
endpoint,
@@ -1326,7 +1333,8 @@ ListenerHandle createAndRunListener(
13261333
std::move(timing_report),
13271334
std::move(clock_report),
13281335
logger,
1329-
viewer_hook);
1336+
viewer_hook,
1337+
max_in_flight);
13301338
listener->run();
13311339
return {.shutdown = [listener]() { listener->close(); },
13321340
.port = listener->port()};

src/web/src/web_serve.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// references (which would require the full gui library including Qt
77
// SWIG wrappers and ord::OpenRoad symbols).
88

9+
#include <algorithm>
910
#include <chrono>
1011
#include <cstdint>
1112
#include <cstdlib>
@@ -241,6 +242,13 @@ void WebServer::serve(int port)
241242
uint16_t const u_port = port;
242243
int const num_threads = num_threads_;
243244

245+
// Bound how many requests the client keeps in flight at once. Scale with
246+
// the server's I/O worker count (the threads that actually service
247+
// requests) so the window tracks the configured thread budget, with an
248+
// absolute cap so a many-core box doesn't hand out an unbounded window.
249+
// Announced to the client on connect; see WebSocketSession::on_accept.
250+
int const max_in_flight = std::clamp(num_threads * 4, 16, 256);
251+
244252
ioc_ = std::make_unique<net::io_context>(num_threads);
245253

246254
auto handle = createAndRunListener(*ioc_,
@@ -250,7 +258,8 @@ void WebServer::serve(int port)
250258
timing_report,
251259
clock_report,
252260
logger_,
253-
viewer_hook_.get());
261+
viewer_hook_.get(),
262+
max_in_flight);
254263
shutdown_listener_ = std::move(handle.shutdown);
255264

256265
const std::string url = "http://localhost:" + std::to_string(handle.port);

src/web/src/websocket-manager.js

Lines changed: 63 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,16 @@ const MAX_BUFFERED_BYTES = 512 * 1024;
3333
// 2. We are saturated (in-flight at the cap) with work still queued, yet have
3434
// heard nothing back for DEAD_MS. A slow render is one slow reply among
3535
// fast ones, so replies keep flowing; total silence while saturated and
36-
// backed up means the server stopped servicing us entirely.
36+
// backed up means the server stopped servicing us entirely. This only ever
37+
// arms during a request flood (the window is full with more queued) — a
38+
// lone slow command (e.g. a timing query that triggers an STA update)
39+
// never saturates the window, so it cannot trip this. DEAD_MS is also set
40+
// well beyond any plausible single render so a saturated-but-slow burst
41+
// can't be mistaken for a dead server.
3742
// On either, we force-close the socket so onclose reconnects and recovers.
3843
const STUCK_BYTES = 256 * 1024;
3944
const STUCK_MS = 12000;
40-
const DEAD_MS = 15000;
45+
const DEAD_MS = 30000;
4146
const LIVENESS_INTERVAL_MS = 3000;
4247

4348
export class WebSocketManager {
@@ -128,36 +133,64 @@ export class WebSocketManager {
128133
this.handleMessage(event.data);
129134
};
130135

131-
this.socket.onclose = () => {
132-
this._isConnected = false;
133-
this._inFlight = 0;
134-
this._bufStuckSince = 0;
135-
this.onStatusChange();
136-
for (const [id, handler] of this.pending) {
137-
handler.reject(new Error('WebSocket closed'));
138-
}
139-
this.pending.clear();
140-
// Drop anything still queued — it never reached the wire. Reject so
141-
// callers settle; tile callers .catch() and re-request on reconnect.
142-
for (const [id, entry] of this._queue) {
143-
entry.reject(new Error('WebSocket closed'));
144-
}
145-
this._queue.clear();
146-
if (this._shutdown) {
147-
console.log('WebSocket closed (server stopped)');
148-
this._stopLivenessMonitor(); // no socket will reopen
149-
return; // don't reconnect after intentional shutdown
150-
}
151-
console.log('WebSocket closed, reconnecting...');
152-
setTimeout(() => this.connect(), this.reconnectDelay);
153-
this.reconnectDelay = Math.min(this.reconnectDelay * 2, 30000);
154-
};
136+
this.socket.onclose = () => this._handleClose();
155137

156138
this.socket.onerror = (err) => {
157139
console.error('WebSocket error:', err);
158140
};
159141
}
160142

143+
// Tear down after the socket is gone (rejecting outstanding work) and
144+
// schedule a reconnect unless the server told us to shut down. Invoked by
145+
// socket.onclose and, for a wedged socket, directly by _forceReconnect().
146+
_handleClose() {
147+
this._isConnected = false;
148+
this._inFlight = 0;
149+
this._bufStuckSince = 0;
150+
this.onStatusChange();
151+
for (const [id, handler] of this.pending) {
152+
handler.reject(new Error('WebSocket closed'));
153+
}
154+
this.pending.clear();
155+
// Drop anything still queued — it never reached the wire. Reject so
156+
// callers settle; tile callers .catch() and re-request on reconnect.
157+
for (const [id, entry] of this._queue) {
158+
entry.reject(new Error('WebSocket closed'));
159+
}
160+
this._queue.clear();
161+
if (this._shutdown) {
162+
console.log('WebSocket closed (server stopped)');
163+
this._stopLivenessMonitor(); // no socket will reopen
164+
return; // don't reconnect after intentional shutdown
165+
}
166+
console.log('WebSocket closed, reconnecting...');
167+
setTimeout(() => this.connect(), this.reconnectDelay);
168+
this.reconnectDelay = Math.min(this.reconnectDelay * 2, 30000);
169+
}
170+
171+
// Abandon a wedged socket and reconnect now, without waiting for close()
172+
// to fire onclose. When the send buffer is stuck, close() only starts its
173+
// handshake after the already-buffered bytes drain (which they aren't), so
174+
// onclose may not arrive until the TCP timeout — minutes away. Detach the
175+
// dead socket's handlers so it can no longer drive our state, run the same
176+
// teardown onclose would, then let _handleClose() schedule the reconnect.
177+
_forceReconnect() {
178+
const dead = this.socket;
179+
this.socket = null;
180+
if (dead) {
181+
dead.onopen = null;
182+
dead.onmessage = null;
183+
dead.onclose = null;
184+
dead.onerror = null;
185+
try {
186+
dead.close(); // best effort; we no longer depend on it
187+
} catch (e) {
188+
/* already gone */
189+
}
190+
}
191+
this._handleClose();
192+
}
193+
161194
handleMessage(data) {
162195
// Binary frame: [4B id][1B type][3B reserved][payload...]
163196
const view = new DataView(data);
@@ -359,13 +392,11 @@ export class WebSocketManager {
359392

360393
if (wedged) {
361394
console.warn(`WebSocket connection wedged — ${why}; `
362-
+ `closing to recover`);
395+
+ `reconnecting to recover`);
363396
this._bufStuckSince = 0;
364-
try {
365-
s.close(); // onclose clears state, rejects pending, reconnects
366-
} catch (e) {
367-
/* onclose still fires */
368-
}
397+
// Don't depend on close()/onclose: a stuck send buffer can defer
398+
// the close handshake indefinitely. Reconnect immediately.
399+
this._forceReconnect();
369400
}
370401
}
371402

0 commit comments

Comments
 (0)