Skip to content

Ratchet on 100% CPU load #939

@ollm

Description

@ollm

Hi.

During the last month, I am having a problem with running a Ratchet server in a production environment, suddenly the process goes from normally using 0-1% of a CPU core to 98-100%, the websocket server still working but it takes longer to respond, this happens in a variety of ways, sometimes it takes a few hours to happen and others it takes days / weeks.

When it enters this state it does not exit again until I kill the process and supervisor starts a new one.

I am also checking the logic of my application so that it is not a problem on my part.

Apache version

Server Version: Apache/2.4.51 (cPanel) OpenSSL/1.1.1g mod_bwlimited/1.4
Server MPM: event
Server Built: Nov 7 2021 17:32:31

PHP version

PHP 8.0.12 (cli) (built: Nov 10 2021 01:18:26) ( NTS )
Copyright (c) The PHP Group
Zend Engine v4.0.12, Copyright (c) Zend Technologies
    with Zend OPcache v8.0.12, Copyright (c), by Zend Technologies

Ratchet version being used

cboden/ratchet v0.4.4 PHP WebSocket library
├──guzzlehttp/psr7 v1.8.3
├──ratchet/rfc6455 v0.3.1
├──react/event-loop v1.2.0
├──react/socket v1.10.0
├──symfony/http-foundation v6.0.1
└──symfony/routing v4.4.34

Event loop

ExtEvLoop
ev (1.1.4)

supervisor.conf

[unix_http_server]
file = /tmp/supervisor.sock

[supervisord]
logfile          = ./logs/supervisord.log
logfile_maxbytes = 50MB
logfile_backups  = 10
loglevel         = info
pidfile          = /tmp/supervisord.pid
nodaemon         = false
minfds           = 4096
minprocs         = 200

[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface

[supervisorctl]
serverurl = unix:///tmp/supervisor.sock

[program:ratchet]
directory				= /home/username/public_html
command                 = bash -c "ulimit -n 10000; exec php chat.php"
process_name            = Chat
numprocs                = 1
autostart               = true
autorestart             = true
user                    = root
stdout_logfile          = ./logs/chat_info.log
stdout_logfile_maxbytes = 1MB
stderr_logfile          = ./logs/chat_error.log
stderr_logfile_maxbytes = 1MB

I have this code in the app, but there is no record in chat_info.log or chat_error.log

public function onError(ConnectionInterface $conn, \Exception $e)
{
	echo "An error has occurred: {$e->getMessage()}\n";

	$conn->close();
}

This is what appears if I run strace in the pid

strace

--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10400) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10400) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10400) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10400) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10400) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10399) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10399) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10399) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10399) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10399) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10398) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10398) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10398) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10398) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10398) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10397) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10397) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10397) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10397) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)

Thank you for your replies.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions