Skip to content
Open
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 177 additions & 14 deletions src/helper/Site_Backup_Restore.php
Original file line number Diff line number Diff line change
Expand Up @@ -1269,11 +1269,84 @@ private function get_remote_path( $upload = true ) {
}


/**
* Read currently-available memory in MB.
*
* Pins `LC_ALL=C` so the `Mem:` label and column layout stay stable across
* locales, locates the "available" column by its header name (rather than a
* fixed field index, which differs across `free`/procps versions) and falls
* back to the "free" column on older builds that have no "available" column.
*
* @return int Available memory in MB (0 if it cannot be determined, in which
* case the resource helpers fall back to their safe minimums).
*/
private function get_available_ram_mb() {
$command = "LC_ALL=C free -m | awk 'NR==1{for(i=1;i<=NF;i++) if(\$i==\"available\") c=i+1} /^Mem:/{print (c ? \$c : \$4)}'";

return intval( EE::launch( $command )->stdout );
}

private function rclone_download( $path ) {
$cpu_cores = intval( EE::launch( 'nproc' )->stdout );
$multi_threads = min( intval( $cpu_cores ) * 2, 32 );
$command = sprintf( "rclone copy -P --multi-thread-streams %d %s %s", $multi_threads, escapeshellarg( $this->get_remote_path( false ) ), escapeshellarg( $path ) );
$output = EE::launch( $command );
$available_ram = $this->get_available_ram_mb();

// Derive the memory-safe transfer count and the total RAM budget from the
// shared helper (is_s3 = false: downloads allocate no S3 multipart-upload
// buffers). The budget is reused below rather than recomputed.
$res = $this->compute_rclone_resources( $cpu_cores, $available_ram, false );
$transfers = $res['transfers'];
$budget = $res['budget']; // MB; available_ram * rclone-mem-fraction.
$max_buffer = $res['max_buffer']; // MB.

// Per concurrent --transfers a download holds one --buffer-size read-ahead
// buffer plus, for files above rclone's --multi-thread-cutoff,
// --multi-thread-streams streams -- each with a
// --multi-thread-write-buffer-size buffer and one in-flight
// --multi-thread-chunk-size range. The whole footprint must fit ONE budget:
//
// transfers * ( buffer_size + multi_thread_streams * per_stream_mem ) <= budget
//
// so each transfer's share of the budget is split between the read-ahead
// buffer (up to half, capped at $max_buffer) and the multi-thread streams.
// Both scale with available RAM; on a tight budget streams floor at 1. The
// previous code budgeted the streams against the full budget independently
// of the read-ahead buffers, so the two pools could together reach ~2x the
// intended fraction and still OOM during restore/rollback.
$mt_write_buffer = max( 1, intval( get_config_value( 'rclone-mt-write-buffer-size', 128 ) ) ); // KiB; rclone default.
$mt_chunk_size = max( 1, intval( get_config_value( 'rclone-mt-chunk-size', 64 ) ) ); // MB; rclone default.
$per_stream_mem = ( $mt_write_buffer / 1024 ) + $mt_chunk_size; // MB.

// Reduce transfers until each transfer's share of the budget can hold the
// minimum read-ahead buffer plus at least one stream, so the combined
// footprint stays within budget whenever the budget allows it at all.
$min_per_transfer = 16 + (int) ceil( $per_stream_mem );
while ( $transfers > 1 && intval( floor( $budget / $transfers ) ) < $min_per_transfer ) {
$transfers--;
}
$per_transfer = max( $min_per_transfer, intval( floor( $budget / $transfers ) ) );

// Give the read-ahead buffer up to half the share (capped at $max_buffer)
// but always leave room for at least one stream; spend the rest on streams.
$buffer_mb = min( intval( floor( $per_transfer / 2 ) ), $max_buffer, $per_transfer - (int) ceil( $per_stream_mem ) );
$buffer_mb = max( 16, $buffer_mb );
$stream_budget = $per_transfer - $buffer_mb;
$mt_streams = max( 1, min( $cpu_cores * 2, 32, intval( floor( $stream_budget / $per_stream_mem ) ) ) );
$buffer_size = $buffer_mb . 'M';

EE::debug( sprintf(
'rclone download tuning: available_ram=%dMB budget=%dMB transfers=%d buffer-size=%s multi-thread-streams=%d mt-write-buffer=%dKi mt-chunk-size=%dM (est. peak ~%dMB)',
$available_ram,
$budget,
$transfers,
$buffer_size,
$mt_streams,
$mt_write_buffer,
$mt_chunk_size,
(int) ( $transfers * ( $buffer_mb + $mt_streams * $per_stream_mem ) )
) );

$command = sprintf( "rclone copy -P --transfers %d --buffer-size %s --multi-thread-streams %d --multi-thread-write-buffer-size %dKi --multi-thread-chunk-size %dM %s %s", $transfers, $buffer_size, $mt_streams, $mt_write_buffer, $mt_chunk_size, escapeshellarg( $this->get_remote_path( false ) ), escapeshellarg( $path ) );
$output = EE::launch( $command );

if ( $output->return_code ) {
EE::error( 'Error downloading backup from remote storage.' );
Expand All @@ -1284,23 +1357,33 @@ private function rclone_download( $path ) {


private function rclone_upload( $path ) {
$cpu_cores = intval( EE::launch( 'nproc' )->stdout );
$ram = intval( EE::launch( "free -m | grep Mem | awk '{print $7}'" )->stdout );
$transfers = max( 2, min( intval( $cpu_cores / 2 ), 4 ) );
$max_buffer_size = 4096;

$cpu_cores = intval( EE::launch( 'nproc' )->stdout );
$available_ram = $this->get_available_ram_mb();

$buffer_size = min( floor( $ram / $transfers ), $max_buffer_size ) . 'M';
// Detect S3 backends, which require additional multipart-upload tuning.
$rclone_type = EE::launch( 'rclone config show easyengine | grep type' )->stdout;
$is_s3 = ( strpos( $rclone_type, 's3' ) !== false );
Comment thread
mrrobot47 marked this conversation as resolved.
Outdated

$res = $this->compute_rclone_resources( $cpu_cores, $available_ram, $is_s3 );
$transfers = $res['transfers'];
$buffer_size = $res['buffer_size'] . 'M';

$command = 'rclone config show easyengine | grep type';
$output = EE::launch( $command )->stdout;
$s3_flag = '';

if ( strpos( $output, 's3' ) !== false ) {
$s3_flag = ' --s3-chunk-size=64M --s3-upload-concurrency ' . min( intval( $cpu_cores ) * 2, 32 );
if ( $is_s3 ) {
$s3_flag = sprintf( ' --s3-chunk-size=%dM --s3-upload-concurrency %d', $res['s3_chunk_size'], $res['s3_concurrency'] );
}

EE::debug( sprintf(
'rclone upload tuning: available_ram=%dMB transfers=%d buffer-size=%s s3=%s s3-chunk-size=%dM s3-upload-concurrency=%d (est. peak ~%dMB)',
$available_ram,
$transfers,
$buffer_size,
$is_s3 ? 'yes' : 'no',
$res['s3_chunk_size'],
$res['s3_concurrency'],
$transfers * ( $res['buffer_size'] + ( $res['s3_chunk_size'] * $res['s3_concurrency'] ) )
) );

$command = sprintf( "rclone copy -P %s --transfers %d --checkers %d --buffer-size %s %s %s", $s3_flag, $transfers, $transfers, $buffer_size, escapeshellarg( $path ), escapeshellarg( $this->get_remote_path() ) );
$output = EE::launch( $command );

Expand Down Expand Up @@ -1331,6 +1414,86 @@ private function rclone_upload( $path ) {
}
}

/**
* Compute memory-safe rclone transfer settings shared by upload and download.
*
* rclone allocates one `--buffer-size` read-ahead buffer per concurrent
* `--transfers`, and, for S3 uploads, an additional
* `--s3-chunk-size * --s3-upload-concurrency` multipart buffer per transfer.
* The upload in-memory footprint is therefore:
*
* transfers * ( buffer_size + s3_chunk_size * s3_upload_concurrency )
*
* The previous implementation set `buffer_size = available_ram / transfers`,
* which made the read-ahead buffers alone consume ~100% of available memory
* (e.g. `--buffer-size 1328M --transfers 2` on a 4 GB host) and routinely
* triggered the OOM killer during backups. This helper instead caps rclone's
* total footprint at a fraction of currently-available memory, while still
* scaling parallelism and buffer size up on larger hosts so spare RAM is used.
*
* The returned `budget` (and `max_buffer`) let callers that allocate further
* buffer pools -- e.g. `rclone_download()` sizing `--multi-thread-streams` --
* stay within the same single budget instead of recomputing their own.
*
* Tunable via global config: `rclone-mem-fraction` (default 0.5) and
* `rclone-max-buffer-size` in MB (default 256).
*
* @param int $cpu_cores Number of CPU cores (nproc).
* @param int $available_ram Currently available memory in MB.
* @param bool $is_s3 Whether the remote is an S3 backend.
*
* @return array{transfers:int,buffer_size:int,s3_concurrency:int,s3_chunk_size:int,budget:int,max_buffer:int}
*/
private function compute_rclone_resources( $cpu_cores, $available_ram, $is_s3 ) {
$cpu_cores = max( 1, intval( $cpu_cores ) );
$available_ram = max( 0, intval( $available_ram ) );

// Fraction of *available* RAM rclone may use, clamped to a safe range so
// a backup never starves the host (MariaDB, PHP-FPM, nginx) or itself.
$mem_fraction = floatval( get_config_value( 'rclone-mem-fraction', 0.5 ) );
$mem_fraction = min( 0.9, max( 0.1, $mem_fraction ) );

$min_buffer = 16; // rclone's default; never go below it.
$max_buffer = max( $min_buffer, intval( get_config_value( 'rclone-max-buffer-size', 256 ) ) );
$s3_chunk_size = $is_s3 ? 64 : 0;

// Total memory budget for rclone transfer/multipart buffers.
$budget = (int) floor( $available_ram * $mem_fraction );

// Desired parallelism, scaled with cores but capped to sane bounds.
$transfers = max( 2, min( $cpu_cores, 8 ) );
$s3_concurrency = $is_s3 ? max( 2, min( $cpu_cores * 2, 32 ) ) : 0;

// If the budget can't fit the desired parallelism even at the minimum
// buffer size, first shrink S3 multipart concurrency (the biggest memory
// lever at 64M/chunk), then the number of parallel transfers. Both may
// fall to 1 on extremely tight budgets so the helper honours its own cap
// whenever the budget allows it at all (the desired floor stays at 2).
while ( $is_s3 && $s3_concurrency > 1 &&
$transfers * ( $min_buffer + $s3_chunk_size * $s3_concurrency ) > $budget ) {
$s3_concurrency = max( 1, intval( $s3_concurrency / 2 ) );
}
while ( $transfers > 1 &&
$transfers * ( $min_buffer + $s3_chunk_size * $s3_concurrency ) > $budget ) {
$transfers--;
}
Comment thread
mrrobot47 marked this conversation as resolved.

// Spend whatever budget remains after reserving S3 multipart buffers on
// the read-ahead buffer, clamped to [$min_buffer, $max_buffer].
$per_transfer_budget = intval( floor( $budget / $transfers ) );
$buffer_size = $per_transfer_budget - ( $s3_chunk_size * $s3_concurrency );
$buffer_size = max( $min_buffer, min( $buffer_size, $max_buffer ) );

return [
'transfers' => $transfers,
'buffer_size' => $buffer_size,
's3_concurrency' => $s3_concurrency,
's3_chunk_size' => $s3_chunk_size,
'budget' => $budget,
'max_buffer' => $max_buffer,
];
}

/**
* Delete old backups from remote storage after successful upload.
* Keeps only the configured number of most recent backups.
Expand Down
Loading