@@ -1269,11 +1269,84 @@ private function get_remote_path( $upload = true ) {
12691269 }
12701270
12711271
1272+ /**
1273+ * Read currently-available memory in MB.
1274+ *
1275+ * Pins `LC_ALL=C` so the `Mem:` label and column layout stay stable across
1276+ * locales, locates the "available" column by its header name (rather than a
1277+ * fixed field index, which differs across `free`/procps versions) and falls
1278+ * back to the "free" column on older builds that have no "available" column.
1279+ *
1280+ * @return int Available memory in MB (0 if it cannot be determined, in which
1281+ * case the resource helpers fall back to their safe minimums).
1282+ */
1283+ private function get_available_ram_mb () {
1284+ $ command = "LC_ALL=C free -m | awk 'NR==1{for(i=1;i<=NF;i++) if( \$i== \"available \") c=i+1} /^Mem:/{print (c ? \$c : \$4)}' " ;
1285+
1286+ return intval ( EE ::launch ( $ command )->stdout );
1287+ }
1288+
12721289 private function rclone_download ( $ path ) {
12731290 $ cpu_cores = intval ( EE ::launch ( 'nproc ' )->stdout );
1274- $ multi_threads = min ( intval ( $ cpu_cores ) * 2 , 32 );
1275- $ command = sprintf ( "rclone copy -P --multi-thread-streams %d %s %s " , $ multi_threads , escapeshellarg ( $ this ->get_remote_path ( false ) ), escapeshellarg ( $ path ) );
1276- $ output = EE ::launch ( $ command );
1291+ $ available_ram = $ this ->get_available_ram_mb ();
1292+
1293+ // Derive the memory-safe transfer count and the total RAM budget from the
1294+ // shared helper (is_s3 = false: downloads allocate no S3 multipart-upload
1295+ // buffers). The budget is reused below rather than recomputed.
1296+ $ res = $ this ->compute_rclone_resources ( $ cpu_cores , $ available_ram , false );
1297+ $ transfers = $ res ['transfers ' ];
1298+ $ budget = $ res ['budget ' ]; // MB; available_ram * rclone-mem-fraction.
1299+ $ max_buffer = $ res ['max_buffer ' ]; // MB.
1300+
1301+ // Per concurrent --transfers a download holds one --buffer-size read-ahead
1302+ // buffer plus, for files above rclone's --multi-thread-cutoff,
1303+ // --multi-thread-streams streams -- each with a
1304+ // --multi-thread-write-buffer-size buffer and one in-flight
1305+ // --multi-thread-chunk-size range. The whole footprint must fit ONE budget:
1306+ //
1307+ // transfers * ( buffer_size + multi_thread_streams * per_stream_mem ) <= budget
1308+ //
1309+ // so each transfer's share of the budget is split between the read-ahead
1310+ // buffer (up to half, capped at $max_buffer) and the multi-thread streams.
1311+ // Both scale with available RAM; on a tight budget streams floor at 1. The
1312+ // previous code budgeted the streams against the full budget independently
1313+ // of the read-ahead buffers, so the two pools could together reach ~2x the
1314+ // intended fraction and still OOM during restore/rollback.
1315+ $ mt_write_buffer = max ( 1 , intval ( get_config_value ( 'rclone-mt-write-buffer-size ' , 128 ) ) ); // KiB; rclone default.
1316+ $ mt_chunk_size = max ( 1 , intval ( get_config_value ( 'rclone-mt-chunk-size ' , 64 ) ) ); // MB; rclone default.
1317+ $ per_stream_mem = ( $ mt_write_buffer / 1024 ) + $ mt_chunk_size ; // MB.
1318+
1319+ // Reduce transfers until each transfer's share of the budget can hold the
1320+ // minimum read-ahead buffer plus at least one stream, so the combined
1321+ // footprint stays within budget whenever the budget allows it at all.
1322+ $ min_per_transfer = 16 + (int ) ceil ( $ per_stream_mem );
1323+ while ( $ transfers > 1 && intval ( floor ( $ budget / $ transfers ) ) < $ min_per_transfer ) {
1324+ $ transfers --;
1325+ }
1326+ $ per_transfer = max ( $ min_per_transfer , intval ( floor ( $ budget / $ transfers ) ) );
1327+
1328+ // Give the read-ahead buffer up to half the share (capped at $max_buffer)
1329+ // but always leave room for at least one stream; spend the rest on streams.
1330+ $ buffer_mb = min ( intval ( floor ( $ per_transfer / 2 ) ), $ max_buffer , $ per_transfer - (int ) ceil ( $ per_stream_mem ) );
1331+ $ buffer_mb = max ( 16 , $ buffer_mb );
1332+ $ stream_budget = $ per_transfer - $ buffer_mb ;
1333+ $ mt_streams = max ( 1 , min ( $ cpu_cores * 2 , 32 , intval ( floor ( $ stream_budget / $ per_stream_mem ) ) ) );
1334+ $ buffer_size = $ buffer_mb . 'M ' ;
1335+
1336+ EE ::debug ( sprintf (
1337+ 'rclone download tuning: available_ram=%dMB budget=%dMB transfers=%d buffer-size=%s multi-thread-streams=%d mt-write-buffer=%dKi mt-chunk-size=%dM (est. peak ~%dMB) ' ,
1338+ $ available_ram ,
1339+ $ budget ,
1340+ $ transfers ,
1341+ $ buffer_size ,
1342+ $ mt_streams ,
1343+ $ mt_write_buffer ,
1344+ $ mt_chunk_size ,
1345+ (int ) ( $ transfers * ( $ buffer_mb + $ mt_streams * $ per_stream_mem ) )
1346+ ) );
1347+
1348+ $ command = sprintf ( "rclone copy -P --transfers %d --buffer-size %s --multi-thread-streams %d --multi-thread-write-buffer-size %dKi --multi-thread-chunk-size %dM %s %s " , $ transfers , $ buffer_size , $ mt_streams , $ mt_write_buffer , $ mt_chunk_size , escapeshellarg ( $ this ->get_remote_path ( false ) ), escapeshellarg ( $ path ) );
1349+ $ output = EE ::launch ( $ command );
12771350
12781351 if ( $ output ->return_code ) {
12791352 EE ::error ( 'Error downloading backup from remote storage. ' );
@@ -1283,24 +1356,56 @@ private function rclone_download( $path ) {
12831356 }
12841357
12851358
1286- private function rclone_upload ( $ path ) {
1287- $ cpu_cores = intval ( EE ::launch ( 'nproc ' )->stdout );
1288- $ ram = intval ( EE ::launch ( "free -m | grep Mem | awk '{print $7}' " )->stdout );
1289- $ transfers = max ( 2 , min ( intval ( $ cpu_cores / 2 ), 4 ) );
1290- $ max_buffer_size = 4096 ;
1359+ /**
1360+ * Whether the configured rclone remote is an S3 backend.
1361+ *
1362+ * Resolves the remote name from `rclone-path` (instead of assuming
1363+ * `easyengine`) and compares the backend's exact `type` value to `s3`,
1364+ * rather than substring-matching the raw `rclone config show` output -- which
1365+ * could both miss a non-`easyengine` remote and false-positive on any line
1366+ * whose value merely contains the substring `s3`. All S3-compatible providers
1367+ * (AWS, Spaces, Wasabi, MinIO, ...) share `type = s3`, so an exact match on
1368+ * the type value covers them.
1369+ *
1370+ * @return bool
1371+ */
1372+ private function is_s3_remote () {
1373+ $ rclone_path = get_config_value ( 'rclone-path ' , 'easyengine:easyengine ' );
1374+ $ remote = explode ( ': ' , $ rclone_path )[0 ];
1375+
1376+ $ command = sprintf ( "rclone config show %s | awk -F '=' '/^[[:space:]]*type[[:space:]]*=/ {gsub(/[[:space:]]/, \"\", \$2); print \$2; exit}' " , escapeshellarg ( $ remote ) );
1377+ $ type = trim ( EE ::launch ( $ command )->stdout );
1378+
1379+ return ( 's3 ' === $ type );
1380+ }
12911381
1382+ private function rclone_upload ( $ path ) {
1383+ $ cpu_cores = intval ( EE ::launch ( 'nproc ' )->stdout );
1384+ $ available_ram = $ this ->get_available_ram_mb ();
12921385
1293- $ buffer_size = min ( floor ( $ ram / $ transfers ), $ max_buffer_size ) . 'M ' ;
1386+ // Detect S3 backends, which require additional multipart-upload tuning.
1387+ $ is_s3 = $ this ->is_s3_remote ();
12941388
1389+ $ res = $ this ->compute_rclone_resources ( $ cpu_cores , $ available_ram , $ is_s3 );
1390+ $ transfers = $ res ['transfers ' ];
1391+ $ buffer_size = $ res ['buffer_size ' ] . 'M ' ;
12951392
1296- $ command = 'rclone config show easyengine | grep type ' ;
1297- $ output = EE ::launch ( $ command )->stdout ;
12981393 $ s3_flag = '' ;
1299-
1300- if ( strpos ( $ output , 's3 ' ) !== false ) {
1301- $ s3_flag = ' --s3-chunk-size=64M --s3-upload-concurrency ' . min ( intval ( $ cpu_cores ) * 2 , 32 );
1394+ if ( $ is_s3 ) {
1395+ $ s3_flag = sprintf ( ' --s3-chunk-size=%dM --s3-upload-concurrency %d ' , $ res ['s3_chunk_size ' ], $ res ['s3_concurrency ' ] );
13021396 }
13031397
1398+ EE ::debug ( sprintf (
1399+ 'rclone upload tuning: available_ram=%dMB transfers=%d buffer-size=%s s3=%s s3-chunk-size=%dM s3-upload-concurrency=%d (est. peak ~%dMB) ' ,
1400+ $ available_ram ,
1401+ $ transfers ,
1402+ $ buffer_size ,
1403+ $ is_s3 ? 'yes ' : 'no ' ,
1404+ $ res ['s3_chunk_size ' ],
1405+ $ res ['s3_concurrency ' ],
1406+ $ transfers * ( $ res ['buffer_size ' ] + ( $ res ['s3_chunk_size ' ] * $ res ['s3_concurrency ' ] ) )
1407+ ) );
1408+
13041409 $ command = sprintf ( "rclone copy -P %s --transfers %d --checkers %d --buffer-size %s %s %s " , $ s3_flag , $ transfers , $ transfers , $ buffer_size , escapeshellarg ( $ path ), escapeshellarg ( $ this ->get_remote_path () ) );
13051410 $ output = EE ::launch ( $ command );
13061411
@@ -1331,6 +1436,86 @@ private function rclone_upload( $path ) {
13311436 }
13321437 }
13331438
1439+ /**
1440+ * Compute memory-safe rclone transfer settings shared by upload and download.
1441+ *
1442+ * rclone allocates one `--buffer-size` read-ahead buffer per concurrent
1443+ * `--transfers`, and, for S3 uploads, an additional
1444+ * `--s3-chunk-size * --s3-upload-concurrency` multipart buffer per transfer.
1445+ * The upload in-memory footprint is therefore:
1446+ *
1447+ * transfers * ( buffer_size + s3_chunk_size * s3_upload_concurrency )
1448+ *
1449+ * The previous implementation set `buffer_size = available_ram / transfers`,
1450+ * which made the read-ahead buffers alone consume ~100% of available memory
1451+ * (e.g. `--buffer-size 1328M --transfers 2` on a 4 GB host) and routinely
1452+ * triggered the OOM killer during backups. This helper instead caps rclone's
1453+ * total footprint at a fraction of currently-available memory, while still
1454+ * scaling parallelism and buffer size up on larger hosts so spare RAM is used.
1455+ *
1456+ * The returned `budget` (and `max_buffer`) let callers that allocate further
1457+ * buffer pools -- e.g. `rclone_download()` sizing `--multi-thread-streams` --
1458+ * stay within the same single budget instead of recomputing their own.
1459+ *
1460+ * Tunable via global config: `rclone-mem-fraction` (default 0.5) and
1461+ * `rclone-max-buffer-size` in MB (default 256).
1462+ *
1463+ * @param int $cpu_cores Number of CPU cores (nproc).
1464+ * @param int $available_ram Currently available memory in MB.
1465+ * @param bool $is_s3 Whether the remote is an S3 backend.
1466+ *
1467+ * @return array{transfers:int,buffer_size:int,s3_concurrency:int,s3_chunk_size:int,budget:int,max_buffer:int}
1468+ */
1469+ private function compute_rclone_resources ( $ cpu_cores , $ available_ram , $ is_s3 ) {
1470+ $ cpu_cores = max ( 1 , intval ( $ cpu_cores ) );
1471+ $ available_ram = max ( 0 , intval ( $ available_ram ) );
1472+
1473+ // Fraction of *available* RAM rclone may use, clamped to a safe range so
1474+ // a backup never starves the host (MariaDB, PHP-FPM, nginx) or itself.
1475+ $ mem_fraction = floatval ( get_config_value ( 'rclone-mem-fraction ' , 0.5 ) );
1476+ $ mem_fraction = min ( 0.9 , max ( 0.1 , $ mem_fraction ) );
1477+
1478+ $ min_buffer = 16 ; // rclone's default; never go below it.
1479+ $ max_buffer = max ( $ min_buffer , intval ( get_config_value ( 'rclone-max-buffer-size ' , 256 ) ) );
1480+ $ s3_chunk_size = $ is_s3 ? 64 : 0 ;
1481+
1482+ // Total memory budget for rclone transfer/multipart buffers.
1483+ $ budget = (int ) floor ( $ available_ram * $ mem_fraction );
1484+
1485+ // Desired parallelism, scaled with cores but capped to sane bounds.
1486+ $ transfers = max ( 2 , min ( $ cpu_cores , 8 ) );
1487+ $ s3_concurrency = $ is_s3 ? max ( 2 , min ( $ cpu_cores * 2 , 32 ) ) : 0 ;
1488+
1489+ // If the budget can't fit the desired parallelism even at the minimum
1490+ // buffer size, first shrink S3 multipart concurrency (the biggest memory
1491+ // lever at 64M/chunk), then the number of parallel transfers. Both may
1492+ // fall to 1 on extremely tight budgets so the helper honours its own cap
1493+ // whenever the budget allows it at all (the desired floor stays at 2).
1494+ while ( $ is_s3 && $ s3_concurrency > 1 &&
1495+ $ transfers * ( $ min_buffer + $ s3_chunk_size * $ s3_concurrency ) > $ budget ) {
1496+ $ s3_concurrency = max ( 1 , intval ( $ s3_concurrency / 2 ) );
1497+ }
1498+ while ( $ transfers > 1 &&
1499+ $ transfers * ( $ min_buffer + $ s3_chunk_size * $ s3_concurrency ) > $ budget ) {
1500+ $ transfers --;
1501+ }
1502+
1503+ // Spend whatever budget remains after reserving S3 multipart buffers on
1504+ // the read-ahead buffer, clamped to [$min_buffer, $max_buffer].
1505+ $ per_transfer_budget = intval ( floor ( $ budget / $ transfers ) );
1506+ $ buffer_size = $ per_transfer_budget - ( $ s3_chunk_size * $ s3_concurrency );
1507+ $ buffer_size = max ( $ min_buffer , min ( $ buffer_size , $ max_buffer ) );
1508+
1509+ return [
1510+ 'transfers ' => $ transfers ,
1511+ 'buffer_size ' => $ buffer_size ,
1512+ 's3_concurrency ' => $ s3_concurrency ,
1513+ 's3_chunk_size ' => $ s3_chunk_size ,
1514+ 'budget ' => $ budget ,
1515+ 'max_buffer ' => $ max_buffer ,
1516+ ];
1517+ }
1518+
13341519 /**
13351520 * Delete old backups from remote storage after successful upload.
13361521 * Keeps only the configured number of most recent backups.
0 commit comments