From fd388b7ac36d11d4d5fc8ebbde74baa629c332af Mon Sep 17 00:00:00 2001 From: Riddhesh Sanghvi Date: Tue, 2 Jun 2026 20:54:37 +0530 Subject: [PATCH 1/5] fix(backup): cap rclone upload memory to prevent OOM kills rclone allocates one --buffer-size read-ahead buffer per --transfers and, for S3 backends, an additional --s3-chunk-size x --s3-upload-concurrency multipart buffer per transfer. The previous formula set buffer_size = available_ram / transfers, so the read-ahead buffers alone consumed ~100% of available memory (e.g. --buffer-size 1328M --transfers 2 on a 4 GB host), with the S3 multipart buffers unaccounted on top. This saturated RAM and triggered the OOM killer during backups. Introduce compute_rclone_resources() which budgets rclone's total in-memory footprint -- transfers * (buffer_size + s3_chunk * s3_concurrency) -- against a fraction of currently-available RAM (default 50%, clamped 10-90%). It scales parallelism and buffer size up on larger hosts and shrinks S3 concurrency then transfers on constrained hosts, with buffer-size clamped to [16M, 256M]. Add optional global config knobs rclone-mem-fraction and rclone-max-buffer-size, plus an EE::debug line logging the chosen values and estimated peak memory. --- src/helper/Site_Backup_Restore.php | 104 ++++++++++++++++++++++++++--- 1 file changed, 93 insertions(+), 11 deletions(-) diff --git a/src/helper/Site_Backup_Restore.php b/src/helper/Site_Backup_Restore.php index 46aa9593..84df4ca7 100644 --- a/src/helper/Site_Backup_Restore.php +++ b/src/helper/Site_Backup_Restore.php @@ -1284,23 +1284,33 @@ private function rclone_download( $path ) { private function rclone_upload( $path ) { - $cpu_cores = intval( EE::launch( 'nproc' )->stdout ); - $ram = intval( EE::launch( "free -m | grep Mem | awk '{print $7}'" )->stdout ); - $transfers = max( 2, min( intval( $cpu_cores / 2 ), 4 ) ); - $max_buffer_size = 4096; - + $cpu_cores = intval( EE::launch( 'nproc' )->stdout ); + $available_ram = intval( EE::launch( "free -m | grep Mem | awk '{print $7}'" )->stdout ); - $buffer_size = min( floor( $ram / $transfers ), $max_buffer_size ) . 'M'; + // Detect S3 backends, which require additional multipart-upload tuning. + $rclone_type = EE::launch( 'rclone config show easyengine | grep type' )->stdout; + $is_s3 = ( strpos( $rclone_type, 's3' ) !== false ); + $res = $this->compute_rclone_resources( $cpu_cores, $available_ram, $is_s3 ); + $transfers = $res['transfers']; + $buffer_size = $res['buffer_size'] . 'M'; - $command = 'rclone config show easyengine | grep type'; - $output = EE::launch( $command )->stdout; $s3_flag = ''; - - if ( strpos( $output, 's3' ) !== false ) { - $s3_flag = ' --s3-chunk-size=64M --s3-upload-concurrency ' . min( intval( $cpu_cores ) * 2, 32 ); + if ( $is_s3 ) { + $s3_flag = sprintf( ' --s3-chunk-size=%dM --s3-upload-concurrency %d', $res['s3_chunk_size'], $res['s3_concurrency'] ); } + EE::debug( sprintf( + 'rclone upload tuning: available_ram=%dMB transfers=%d buffer-size=%s s3=%s s3-chunk-size=%dM s3-upload-concurrency=%d (est. peak ~%dMB)', + $available_ram, + $transfers, + $buffer_size, + $is_s3 ? 'yes' : 'no', + $res['s3_chunk_size'], + $res['s3_concurrency'], + $transfers * ( $res['buffer_size'] + ( $res['s3_chunk_size'] * $res['s3_concurrency'] ) ) + ) ); + $command = sprintf( "rclone copy -P %s --transfers %d --checkers %d --buffer-size %s %s %s", $s3_flag, $transfers, $transfers, $buffer_size, escapeshellarg( $path ), escapeshellarg( $this->get_remote_path() ) ); $output = EE::launch( $command ); @@ -1331,6 +1341,78 @@ private function rclone_upload( $path ) { } } + /** + * Compute memory-safe rclone transfer settings for an upload. + * + * rclone allocates one `--buffer-size` read-ahead buffer per concurrent + * `--transfers`, and, for S3 backends, an additional + * `--s3-chunk-size * --s3-upload-concurrency` multipart buffer per transfer. + * The total in-memory footprint is therefore: + * + * transfers * ( buffer_size + s3_chunk_size * s3_upload_concurrency ) + * + * The previous implementation set `buffer_size = available_ram / transfers`, + * which made the read-ahead buffers alone consume ~100% of available memory + * (e.g. `--buffer-size 1328M --transfers 2` on a 4 GB host) and routinely + * triggered the OOM killer during backups. This helper instead caps rclone's + * total footprint at a fraction of currently-available memory, while still + * scaling parallelism and buffer size up on larger hosts so spare RAM is used. + * + * Tunable via global config: `rclone-mem-fraction` (default 0.5) and + * `rclone-max-buffer-size` in MB (default 256). + * + * @param int $cpu_cores Number of CPU cores (nproc). + * @param int $available_ram Currently available memory in MB. + * @param bool $is_s3 Whether the remote is an S3 backend. + * + * @return array{transfers:int,buffer_size:int,s3_concurrency:int,s3_chunk_size:int} + */ + private function compute_rclone_resources( $cpu_cores, $available_ram, $is_s3 ) { + $cpu_cores = max( 1, intval( $cpu_cores ) ); + $available_ram = max( 0, intval( $available_ram ) ); + + // Fraction of *available* RAM rclone may use, clamped to a safe range so + // a backup never starves the host (MariaDB, PHP-FPM, nginx) or itself. + $mem_fraction = floatval( get_config_value( 'rclone-mem-fraction', 0.5 ) ); + $mem_fraction = min( 0.9, max( 0.1, $mem_fraction ) ); + + $min_buffer = 16; // rclone's default; never go below it. + $max_buffer = max( $min_buffer, intval( get_config_value( 'rclone-max-buffer-size', 256 ) ) ); + $s3_chunk_size = $is_s3 ? 64 : 0; + + // Total memory budget for rclone transfer/multipart buffers. + $budget = (int) floor( $available_ram * $mem_fraction ); + + // Desired parallelism, scaled with cores but capped to sane bounds. + $transfers = max( 2, min( $cpu_cores, 8 ) ); + $s3_concurrency = $is_s3 ? max( 2, min( $cpu_cores * 2, 32 ) ) : 0; + + // If the budget can't fit the desired parallelism even at the minimum + // buffer size, first shrink S3 multipart concurrency (the biggest memory + // lever at 64M/chunk), then the number of parallel transfers. + while ( $is_s3 && $s3_concurrency > 2 && + $transfers * ( $min_buffer + $s3_chunk_size * $s3_concurrency ) > $budget ) { + $s3_concurrency = max( 2, intval( $s3_concurrency / 2 ) ); + } + while ( $transfers > 2 && + $transfers * ( $min_buffer + $s3_chunk_size * $s3_concurrency ) > $budget ) { + $transfers--; + } + + // Spend whatever budget remains after reserving S3 multipart buffers on + // the read-ahead buffer, clamped to [$min_buffer, $max_buffer]. + $per_transfer_budget = intval( floor( $budget / $transfers ) ); + $buffer_size = $per_transfer_budget - ( $s3_chunk_size * $s3_concurrency ); + $buffer_size = max( $min_buffer, min( $buffer_size, $max_buffer ) ); + + return [ + 'transfers' => $transfers, + 'buffer_size' => $buffer_size, + 's3_concurrency' => $s3_concurrency, + 's3_chunk_size' => $s3_chunk_size, + ]; + } + /** * Delete old backups from remote storage after successful upload. * Keeps only the configured number of most recent backups. From 4bba36247b9abe178b3b614735dd5257dbcfeed8 Mon Sep 17 00:00:00 2001 From: Riddhesh Sanghvi Date: Tue, 2 Jun 2026 21:00:48 +0530 Subject: [PATCH 2/5] fix(restore): cap rclone download streams and transfers to available memory rclone_download() ran 'rclone copy -P --multi-thread-streams min(cpu*2,32)' with no memory awareness and relied on rclone's default --transfers 4. rclone fans out up to transfers * multi-thread-streams concurrent download streams for large files, each holding a --multi-thread-write-buffer-size buffer plus one in-flight --multi-thread-chunk-size range, so a many-core/low-RAM host (e.g. the same 4 GB box) could spawn 4 * 32 = 128 streams and be OOM-killed during restore or rollback. An empty nproc also yielded the degenerate --multi-thread-streams 0. Reuse compute_rclone_resources() (is_s3 = false, since S3 multipart-upload concurrency does not apply to downloads) to derive memory-safe --transfers, --checkers and --buffer-size, then cap --multi-thread-streams against the same available-RAM budget, flooring at 1 (which also fixes the nproc->0 case). Per-stream memory is modeled from rclone's real levers --multi-thread-write-buffer-size and --multi-thread-chunk-size, exposed as the rclone-mt-write-buffer-size and rclone-mt-chunk-size config knobs, and an EE::debug line logs the chosen values and estimated peak. --- src/helper/Site_Backup_Restore.php | 58 ++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/src/helper/Site_Backup_Restore.php b/src/helper/Site_Backup_Restore.php index 84df4ca7..e2a665e0 100644 --- a/src/helper/Site_Backup_Restore.php +++ b/src/helper/Site_Backup_Restore.php @@ -1271,9 +1271,61 @@ private function get_remote_path( $upload = true ) { private function rclone_download( $path ) { $cpu_cores = intval( EE::launch( 'nproc' )->stdout ); - $multi_threads = min( intval( $cpu_cores ) * 2, 32 ); - $command = sprintf( "rclone copy -P --multi-thread-streams %d %s %s", $multi_threads, escapeshellarg( $this->get_remote_path( false ) ), escapeshellarg( $path ) ); - $output = EE::launch( $command ); + $available_ram = intval( EE::launch( "free -m | grep Mem | awk '{print $7}'" )->stdout ); + + // Size download parallelism and read-ahead buffers (one --buffer-size + // buffer per --transfers) against available RAM, reusing the upload budget + // helper. The helper's S3 term models --s3-upload-concurrency multipart + // buffers, which downloads never allocate, so pass is_s3 = false here; the + // multi-thread download streams are budgeted separately below. + $res = $this->compute_rclone_resources( $cpu_cores, $available_ram, false ); + $transfers = $res['transfers']; + $buffer_size = $res['buffer_size'] . 'M'; + + // On top of the read-ahead buffers, rclone splits large files into + // --multi-thread-streams concurrent download streams. Each stream holds a + // --multi-thread-write-buffer-size in-memory buffer (rclone default + // 128KiB) and pulls up to one --multi-thread-chunk-size range at a time, + // and every running --transfers can be multi-threaded at once, so the + // in-flight multi-thread footprint scales as: + // + // transfers * multi_thread_streams * per_stream_mem + // + // The previous code set streams = min( cpu_cores * 2, 32 ) with no memory + // awareness and relied on rclone's default --transfers 4, so a many-core + // VM with little RAM (e.g. 4 GB) could fan out 4 * 32 = 128 streams and be + // OOM-killed during restore/rollback. It also produced + // --multi-thread-streams 0 when nproc returned empty. Cap the stream count + // so the combined multi-thread buffers fit the same memory budget used for + // upload, and floor it at 1. + $mt_write_buffer = max( 1, intval( get_config_value( 'rclone-mt-write-buffer-size', 128 ) ) ); // KiB; rclone default. + $mt_chunk_size = max( 1, intval( get_config_value( 'rclone-mt-chunk-size', 64 ) ) ); // MB; rclone default. + $mem_fraction = floatval( get_config_value( 'rclone-mem-fraction', 0.5 ) ); + $mem_fraction = min( 0.9, max( 0.1, $mem_fraction ) ); + $budget = (int) floor( max( 0, intval( $available_ram ) ) * $mem_fraction ); // MB. + + // Per-stream in-memory cost (MB): the write buffer plus a single in-flight + // chunk range being read from the source. + $per_stream_mem = ( $mt_write_buffer / 1024 ) + $mt_chunk_size; + + $mt_streams = max( 1, min( max( 1, intval( $cpu_cores ) ) * 2, 32 ) ); + // Shrink streams until transfers * streams * per_stream_mem fits the + // budget, but never below 1 (a single stream still works, just slower). + while ( $mt_streams > 1 && ( $transfers * $mt_streams * $per_stream_mem ) > $budget ) { + $mt_streams--; + } + + EE::debug( sprintf( + 'rclone download tuning: available_ram=%dMB transfers=%d buffer-size=%s multi-thread-streams=%d (est. peak ~%dMB)', + $available_ram, + $transfers, + $buffer_size, + $mt_streams, + (int) ( ( $transfers * $res['buffer_size'] ) + ( $transfers * $mt_streams * $per_stream_mem ) ) + ) ); + + $command = sprintf( "rclone copy -P --transfers %d --checkers %d --buffer-size %s --multi-thread-streams %d --multi-thread-chunk-size %dM %s %s", $transfers, $transfers, $buffer_size, $mt_streams, $mt_chunk_size, escapeshellarg( $this->get_remote_path( false ) ), escapeshellarg( $path ) ); + $output = EE::launch( $command ); if ( $output->return_code ) { EE::error( 'Error downloading backup from remote storage.' ); From dc3811880e0a6e82aaec84c5a75d5d343264b7b9 Mon Sep 17 00:00:00 2001 From: Riddhesh Sanghvi Date: Tue, 2 Jun 2026 23:23:28 +0530 Subject: [PATCH 3/5] fix(backup): bound rclone memory within one budget and harden RAM detection Follow-up to the rclone OOM fixes, addressing issues found in review: Download (#1): the --multi-thread-streams budget was checked independently of the read-ahead buffers, so the two pools were each sized against the full mem-fraction budget and the combined download footprint could reach ~2x the intended fraction (e.g. ~3.5 GB on an 8-core/4 GB host) and still OOM during restore/rollback. rclone_download() now splits one shared budget between the read-ahead buffer (up to half per transfer, capped at max_buffer) and the multi-thread streams, so transfers * (buffer + streams * per_stream_mem) <= budget by construction, while still scaling streams up on memory-rich hosts. Available-RAM detection (#3): the 'free -m | grep Mem | awk {print $7}' probe assumed an English locale and a fixed available-column index. It is now centralized in get_available_ram_mb(), which pins LC_ALL=C and locates the available column by its header name, falling back to free on older free/procps builds that lack it. multi-thread-write-buffer-size (#4): the rclone-mt-write-buffer-size config value fed the stream-memory model but was never passed to rclone, so the knob had no effect. rclone_download() now emits --multi-thread-write-buffer-size so the model matches the process. Download checkers (#5): rclone_download() no longer forces --checkers = transfers (it set none before, so rclone used its default of 8); checkers allocate no transfer buffers, so binding them to the memory-derived transfer count needlessly throttled the compare phase. Dedupe (#9): compute_rclone_resources() now returns the computed budget and max_buffer, so rclone_download() consumes them instead of recomputing the mem-fraction/budget formula, removing the risk of the two copies drifting. --- src/helper/Site_Backup_Restore.php | 108 +++++++++++++++++------------ 1 file changed, 62 insertions(+), 46 deletions(-) diff --git a/src/helper/Site_Backup_Restore.php b/src/helper/Site_Backup_Restore.php index e2a665e0..92381b23 100644 --- a/src/helper/Site_Backup_Restore.php +++ b/src/helper/Site_Backup_Restore.php @@ -1269,62 +1269,72 @@ private function get_remote_path( $upload = true ) { } + /** + * Read currently-available memory in MB. + * + * Pins `LC_ALL=C` so the `Mem:` label and column layout stay stable across + * locales, locates the "available" column by its header name (rather than a + * fixed field index, which differs across `free`/procps versions) and falls + * back to the "free" column on older builds that have no "available" column. + * + * @return int Available memory in MB (0 if it cannot be determined, in which + * case the resource helpers fall back to their safe minimums). + */ + private function get_available_ram_mb() { + $command = "LC_ALL=C free -m | awk 'NR==1{for(i=1;i<=NF;i++) if(\$i==\"available\") c=i+1} /^Mem:/{print (c ? \$c : \$4)}'"; + + return intval( EE::launch( $command )->stdout ); + } + private function rclone_download( $path ) { $cpu_cores = intval( EE::launch( 'nproc' )->stdout ); - $available_ram = intval( EE::launch( "free -m | grep Mem | awk '{print $7}'" )->stdout ); - - // Size download parallelism and read-ahead buffers (one --buffer-size - // buffer per --transfers) against available RAM, reusing the upload budget - // helper. The helper's S3 term models --s3-upload-concurrency multipart - // buffers, which downloads never allocate, so pass is_s3 = false here; the - // multi-thread download streams are budgeted separately below. - $res = $this->compute_rclone_resources( $cpu_cores, $available_ram, false ); - $transfers = $res['transfers']; - $buffer_size = $res['buffer_size'] . 'M'; - - // On top of the read-ahead buffers, rclone splits large files into - // --multi-thread-streams concurrent download streams. Each stream holds a - // --multi-thread-write-buffer-size in-memory buffer (rclone default - // 128KiB) and pulls up to one --multi-thread-chunk-size range at a time, - // and every running --transfers can be multi-threaded at once, so the - // in-flight multi-thread footprint scales as: + $available_ram = $this->get_available_ram_mb(); + + // Derive the memory-safe transfer count and the total RAM budget from the + // shared helper (is_s3 = false: downloads allocate no S3 multipart-upload + // buffers). The budget is reused below rather than recomputed. + $res = $this->compute_rclone_resources( $cpu_cores, $available_ram, false ); + $transfers = $res['transfers']; + $budget = $res['budget']; // MB; available_ram * rclone-mem-fraction. + $max_buffer = $res['max_buffer']; // MB. + + // Per concurrent --transfers a download holds one --buffer-size read-ahead + // buffer plus, for files above rclone's --multi-thread-cutoff, + // --multi-thread-streams streams -- each with a + // --multi-thread-write-buffer-size buffer and one in-flight + // --multi-thread-chunk-size range. The whole footprint must fit ONE budget: // - // transfers * multi_thread_streams * per_stream_mem + // transfers * ( buffer_size + multi_thread_streams * per_stream_mem ) <= budget // - // The previous code set streams = min( cpu_cores * 2, 32 ) with no memory - // awareness and relied on rclone's default --transfers 4, so a many-core - // VM with little RAM (e.g. 4 GB) could fan out 4 * 32 = 128 streams and be - // OOM-killed during restore/rollback. It also produced - // --multi-thread-streams 0 when nproc returned empty. Cap the stream count - // so the combined multi-thread buffers fit the same memory budget used for - // upload, and floor it at 1. + // so each transfer's share of the budget is split between the read-ahead + // buffer (up to half, capped at $max_buffer) and the multi-thread streams. + // Both scale with available RAM; on a tight budget streams floor at 1. The + // previous code budgeted the streams against the full budget independently + // of the read-ahead buffers, so the two pools could together reach ~2x the + // intended fraction and still OOM during restore/rollback. $mt_write_buffer = max( 1, intval( get_config_value( 'rclone-mt-write-buffer-size', 128 ) ) ); // KiB; rclone default. $mt_chunk_size = max( 1, intval( get_config_value( 'rclone-mt-chunk-size', 64 ) ) ); // MB; rclone default. - $mem_fraction = floatval( get_config_value( 'rclone-mem-fraction', 0.5 ) ); - $mem_fraction = min( 0.9, max( 0.1, $mem_fraction ) ); - $budget = (int) floor( max( 0, intval( $available_ram ) ) * $mem_fraction ); // MB. + $per_stream_mem = ( $mt_write_buffer / 1024 ) + $mt_chunk_size; // MB. - // Per-stream in-memory cost (MB): the write buffer plus a single in-flight - // chunk range being read from the source. - $per_stream_mem = ( $mt_write_buffer / 1024 ) + $mt_chunk_size; - - $mt_streams = max( 1, min( max( 1, intval( $cpu_cores ) ) * 2, 32 ) ); - // Shrink streams until transfers * streams * per_stream_mem fits the - // budget, but never below 1 (a single stream still works, just slower). - while ( $mt_streams > 1 && ( $transfers * $mt_streams * $per_stream_mem ) > $budget ) { - $mt_streams--; - } + $per_transfer = max( 1, intval( floor( $budget / $transfers ) ) ); + $buffer_mb = max( 16, min( intval( floor( $per_transfer / 2 ) ), $max_buffer ) ); + $stream_budget = max( 0, $per_transfer - $buffer_mb ); + $mt_streams = max( 1, min( $cpu_cores * 2, 32, intval( floor( $stream_budget / $per_stream_mem ) ) ) ); + $buffer_size = $buffer_mb . 'M'; EE::debug( sprintf( - 'rclone download tuning: available_ram=%dMB transfers=%d buffer-size=%s multi-thread-streams=%d (est. peak ~%dMB)', + 'rclone download tuning: available_ram=%dMB budget=%dMB transfers=%d buffer-size=%s multi-thread-streams=%d mt-write-buffer=%dKi mt-chunk-size=%dM (est. peak ~%dMB)', $available_ram, + $budget, $transfers, $buffer_size, $mt_streams, - (int) ( ( $transfers * $res['buffer_size'] ) + ( $transfers * $mt_streams * $per_stream_mem ) ) + $mt_write_buffer, + $mt_chunk_size, + (int) ( $transfers * ( $buffer_mb + $mt_streams * $per_stream_mem ) ) ) ); - $command = sprintf( "rclone copy -P --transfers %d --checkers %d --buffer-size %s --multi-thread-streams %d --multi-thread-chunk-size %dM %s %s", $transfers, $transfers, $buffer_size, $mt_streams, $mt_chunk_size, escapeshellarg( $this->get_remote_path( false ) ), escapeshellarg( $path ) ); + $command = sprintf( "rclone copy -P --transfers %d --buffer-size %s --multi-thread-streams %d --multi-thread-write-buffer-size %dKi --multi-thread-chunk-size %dM %s %s", $transfers, $buffer_size, $mt_streams, $mt_write_buffer, $mt_chunk_size, escapeshellarg( $this->get_remote_path( false ) ), escapeshellarg( $path ) ); $output = EE::launch( $command ); if ( $output->return_code ) { @@ -1337,7 +1347,7 @@ private function rclone_download( $path ) { private function rclone_upload( $path ) { $cpu_cores = intval( EE::launch( 'nproc' )->stdout ); - $available_ram = intval( EE::launch( "free -m | grep Mem | awk '{print $7}'" )->stdout ); + $available_ram = $this->get_available_ram_mb(); // Detect S3 backends, which require additional multipart-upload tuning. $rclone_type = EE::launch( 'rclone config show easyengine | grep type' )->stdout; @@ -1394,12 +1404,12 @@ private function rclone_upload( $path ) { } /** - * Compute memory-safe rclone transfer settings for an upload. + * Compute memory-safe rclone transfer settings shared by upload and download. * * rclone allocates one `--buffer-size` read-ahead buffer per concurrent - * `--transfers`, and, for S3 backends, an additional + * `--transfers`, and, for S3 uploads, an additional * `--s3-chunk-size * --s3-upload-concurrency` multipart buffer per transfer. - * The total in-memory footprint is therefore: + * The upload in-memory footprint is therefore: * * transfers * ( buffer_size + s3_chunk_size * s3_upload_concurrency ) * @@ -1410,6 +1420,10 @@ private function rclone_upload( $path ) { * total footprint at a fraction of currently-available memory, while still * scaling parallelism and buffer size up on larger hosts so spare RAM is used. * + * The returned `budget` (and `max_buffer`) let callers that allocate further + * buffer pools -- e.g. `rclone_download()` sizing `--multi-thread-streams` -- + * stay within the same single budget instead of recomputing their own. + * * Tunable via global config: `rclone-mem-fraction` (default 0.5) and * `rclone-max-buffer-size` in MB (default 256). * @@ -1417,7 +1431,7 @@ private function rclone_upload( $path ) { * @param int $available_ram Currently available memory in MB. * @param bool $is_s3 Whether the remote is an S3 backend. * - * @return array{transfers:int,buffer_size:int,s3_concurrency:int,s3_chunk_size:int} + * @return array{transfers:int,buffer_size:int,s3_concurrency:int,s3_chunk_size:int,budget:int,max_buffer:int} */ private function compute_rclone_resources( $cpu_cores, $available_ram, $is_s3 ) { $cpu_cores = max( 1, intval( $cpu_cores ) ); @@ -1462,6 +1476,8 @@ private function compute_rclone_resources( $cpu_cores, $available_ram, $is_s3 ) 'buffer_size' => $buffer_size, 's3_concurrency' => $s3_concurrency, 's3_chunk_size' => $s3_chunk_size, + 'budget' => $budget, + 'max_buffer' => $max_buffer, ]; } From 10876d976f14f61bdf0a6ca0ddc947910edc047e Mon Sep 17 00:00:00 2001 From: Riddhesh Sanghvi Date: Thu, 4 Jun 2026 10:17:42 +0530 Subject: [PATCH 4/5] fix(backup): honour the rclone memory budget on very low-RAM hosts Addresses review feedback that the single-budget invariant could still be violated at the floor: compute_rclone_resources() now lets --transfers and --s3-upload-concurrency fall to 1 (the desired floor stays at 2) so the minimum footprint fits the budget whenever it mathematically can, instead of being pinned at 2. rclone_download() reduces --transfers until each transfer's share of the budget can hold the read-ahead buffer plus at least one multi-thread stream, and clamps the buffer so a stream always fits. Previously, when the per-transfer stream budget was smaller than one stream, mt_streams floored to 1 and the combined footprint could exceed the budget (~4x on a tight 8-core host). The footprint can still marginally exceed budget only on hosts with under ~160 MB available, where rclone's irreducible minimum (16 MB buffer plus one in-flight chunk) cannot fit any smaller. --- src/helper/Site_Backup_Restore.php | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/helper/Site_Backup_Restore.php b/src/helper/Site_Backup_Restore.php index 92381b23..11b9c04d 100644 --- a/src/helper/Site_Backup_Restore.php +++ b/src/helper/Site_Backup_Restore.php @@ -1316,9 +1316,20 @@ private function rclone_download( $path ) { $mt_chunk_size = max( 1, intval( get_config_value( 'rclone-mt-chunk-size', 64 ) ) ); // MB; rclone default. $per_stream_mem = ( $mt_write_buffer / 1024 ) + $mt_chunk_size; // MB. - $per_transfer = max( 1, intval( floor( $budget / $transfers ) ) ); - $buffer_mb = max( 16, min( intval( floor( $per_transfer / 2 ) ), $max_buffer ) ); - $stream_budget = max( 0, $per_transfer - $buffer_mb ); + // Reduce transfers until each transfer's share of the budget can hold the + // minimum read-ahead buffer plus at least one stream, so the combined + // footprint stays within budget whenever the budget allows it at all. + $min_per_transfer = 16 + (int) ceil( $per_stream_mem ); + while ( $transfers > 1 && intval( floor( $budget / $transfers ) ) < $min_per_transfer ) { + $transfers--; + } + $per_transfer = max( $min_per_transfer, intval( floor( $budget / $transfers ) ) ); + + // Give the read-ahead buffer up to half the share (capped at $max_buffer) + // but always leave room for at least one stream; spend the rest on streams. + $buffer_mb = min( intval( floor( $per_transfer / 2 ) ), $max_buffer, $per_transfer - (int) ceil( $per_stream_mem ) ); + $buffer_mb = max( 16, $buffer_mb ); + $stream_budget = $per_transfer - $buffer_mb; $mt_streams = max( 1, min( $cpu_cores * 2, 32, intval( floor( $stream_budget / $per_stream_mem ) ) ) ); $buffer_size = $buffer_mb . 'M'; @@ -1455,12 +1466,14 @@ private function compute_rclone_resources( $cpu_cores, $available_ram, $is_s3 ) // If the budget can't fit the desired parallelism even at the minimum // buffer size, first shrink S3 multipart concurrency (the biggest memory - // lever at 64M/chunk), then the number of parallel transfers. - while ( $is_s3 && $s3_concurrency > 2 && + // lever at 64M/chunk), then the number of parallel transfers. Both may + // fall to 1 on extremely tight budgets so the helper honours its own cap + // whenever the budget allows it at all (the desired floor stays at 2). + while ( $is_s3 && $s3_concurrency > 1 && $transfers * ( $min_buffer + $s3_chunk_size * $s3_concurrency ) > $budget ) { - $s3_concurrency = max( 2, intval( $s3_concurrency / 2 ) ); + $s3_concurrency = max( 1, intval( $s3_concurrency / 2 ) ); } - while ( $transfers > 2 && + while ( $transfers > 1 && $transfers * ( $min_buffer + $s3_chunk_size * $s3_concurrency ) > $budget ) { $transfers--; } From 42cc62d60bab3513f9782b8024de2a2f1f681bd6 Mon Sep 17 00:00:00 2001 From: Riddhesh Sanghvi Date: Thu, 4 Jun 2026 12:08:00 +0530 Subject: [PATCH 5/5] fix(backup): detect S3 rclone backend by exact type, not substring Addresses review feedback: S3 detection ran 'rclone config show easyengine | grep type' and substring-matched 's3', which (a) hardcoded the easyengine remote name and so missed any backend configured via a different rclone-path, and (b) could false-positive on any config line whose value merely contained 's3' (e.g. an endpoint host or a content type), wrongly enabling S3 multipart tuning on a non-S3 remote. Extract detection into is_s3_remote(), which resolves the remote name from rclone-path and compares the backend's exact type value to s3. All S3-compatible providers (AWS, Spaces, Wasabi, MinIO, ...) use type = s3, so the exact match still covers them. --- src/helper/Site_Backup_Restore.php | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/helper/Site_Backup_Restore.php b/src/helper/Site_Backup_Restore.php index 11b9c04d..d5c055d4 100644 --- a/src/helper/Site_Backup_Restore.php +++ b/src/helper/Site_Backup_Restore.php @@ -1356,13 +1356,35 @@ private function rclone_download( $path ) { } + /** + * Whether the configured rclone remote is an S3 backend. + * + * Resolves the remote name from `rclone-path` (instead of assuming + * `easyengine`) and compares the backend's exact `type` value to `s3`, + * rather than substring-matching the raw `rclone config show` output -- which + * could both miss a non-`easyengine` remote and false-positive on any line + * whose value merely contains the substring `s3`. All S3-compatible providers + * (AWS, Spaces, Wasabi, MinIO, ...) share `type = s3`, so an exact match on + * the type value covers them. + * + * @return bool + */ + private function is_s3_remote() { + $rclone_path = get_config_value( 'rclone-path', 'easyengine:easyengine' ); + $remote = explode( ':', $rclone_path )[0]; + + $command = sprintf( "rclone config show %s | awk -F '=' '/^[[:space:]]*type[[:space:]]*=/ {gsub(/[[:space:]]/, \"\", \$2); print \$2; exit}'", escapeshellarg( $remote ) ); + $type = trim( EE::launch( $command )->stdout ); + + return ( 's3' === $type ); + } + private function rclone_upload( $path ) { $cpu_cores = intval( EE::launch( 'nproc' )->stdout ); $available_ram = $this->get_available_ram_mb(); // Detect S3 backends, which require additional multipart-upload tuning. - $rclone_type = EE::launch( 'rclone config show easyengine | grep type' )->stdout; - $is_s3 = ( strpos( $rclone_type, 's3' ) !== false ); + $is_s3 = $this->is_s3_remote(); $res = $this->compute_rclone_resources( $cpu_cores, $available_ram, $is_s3 ); $transfers = $res['transfers'];