dmlc · RAMitchell · May 14, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
@@ -480,7 +480,7 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(),
 #' @param eta,learning_rate (two aliases for the same parameter)
 #' Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and `eta` shrinks the feature weights to make the boosting process more conservative.
 #' - range: \eqn{[0,1]}
-#' - default value: 0.3 for tree-based boosters, 0.5 for linear booster.
+#' - default value: 0.1 for tree-based boosters, 0.5 for linear booster.
 #'
 #' Note: should only pass one of `eta` or `learning_rate`. Both refer to the same parameter and there's thus no difference between one or the other.
 #' @param gamma,min_split_loss (two aliases for the same parameter) (for Tree Booster) (default=0, alias: `gamma`)
@@ -493,15 +493,15 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(),
 #' Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit. 0 indicates no limit on depth. Beware that XGBoost aggressively consumes memory when training a deep tree. `"exact"` tree method requires non-zero value.
 #'
 #' range: \eqn{[0, \infty)}
-#' @param min_child_weight (for Tree Booster) (default=1)
+#' @param min_child_weight (for Tree Booster) (default=2)
 #' Minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than `min_child_weight`, then the building process will give up further partitioning. In linear regression task, this simply corresponds to minimum number of instances needed to be in each node. The larger `min_child_weight` is, the more conservative the algorithm will be.
 #'
 #' range: \eqn{[0, \infty)}
 #' @param max_delta_step (for Tree Booster) (default=0)
 #' Maximum delta step we allow each leaf output to be. If the value is set to 0, it means there is no constraint. If it is set to a positive value, it can help making the update step more conservative. Usually this parameter is not needed, but it might help in logistic regression when class is extremely imbalanced. Set it to value of 1-10 might help control the update.
 #'
 #' range: \eqn{[0, \infty)}
-#' @param subsample (for Tree Booster) (default=1)
+#' @param subsample (for Tree Booster) (default=0.8)
 #' Subsample ratio of the training instances. Setting it to 0.5 means that XGBoost would randomly sample half of the training data prior to growing trees. and this will prevent overfitting. Subsampling will occur once in every boosting iteration.
 #'
 #' range: \eqn{(0,1]}
@@ -514,9 +514,9 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(),
 #'   `"subsample"` may be set to as low as 0.1 without loss of model accuracy. Note that this
 #'   sampling method is only supported when `"tree_method"` is set to `"hist"`; other tree
 #'   methods only support `"uniform"` sampling.
-#' @param colsample_bytree,colsample_bylevel,colsample_bynode (for Tree Booster) (default=1)
+#' @param colsample_bytree,colsample_bylevel,colsample_bynode (for Tree Booster) (`colsample_bytree` default=0.8, `colsample_bylevel` and `colsample_bynode` default=1)
 #' This is a family of parameters for subsampling of columns.
-#' - All `"colsample_by*"` parameters have a range of \eqn{(0, 1]}, the default value of 1, and specify the fraction of columns to be subsampled.
+#' - All `"colsample_by*"` parameters have a range of \eqn{(0, 1]} and specify the fraction of columns to be subsampled.
 #' - `"colsample_bytree"` is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.
 #' - `"colsample_bylevel"` is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.
 #' - `"colsample_bynode"` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method.

diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
@@ -1052,7 +1052,7 @@ check.early.stopping.rounds <- function(early_stopping_rounds, eval_set) {
 #' @param learning_rate (alias: `eta`)
 #' Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and `learning_rate` shrinks the feature weights to make the boosting process more conservative.
 #' - range: \eqn{[0,1]}
-#' - default value: 0.3 for tree-based boosters, 0.5 for linear booster.
+#' - default value: 0.1 for tree-based boosters, 0.5 for linear booster.
 #' @param reg_lambda (alias: `lambda`)
 #' - For tree-based boosters:
 #'   - L2 regularization term on weights. Increasing this value will make model more conservative.

diff --git a/R-package/man/xgb.params.Rd b/R-package/man/xgb.params.Rd
diff --git a/R-package/man/xgboost.Rd b/R-package/man/xgboost.Rd
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
@@ -11,6 +11,12 @@ windows_flag <- .Platform$OS.type == "windows" &&
   .Machine$sizeof.pointer != 8
 solaris_flag <- (Sys.info()["sysname"] == "SunOS")
 n_threads <- 1
+legacy_sampling_params <- list(
+  min_child_weight = 1,
+  subsample = 1,
+  colsample_bytree = 1
+)
+legacy_tree_params <- c(list(learning_rate = 0.3), legacy_sampling_params)
 
 
 test_that("train and predict binary classification", {
@@ -159,10 +165,10 @@ test_that("train and predict softprob", {
     bst <- xgb.train(
       data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1),
       nrounds = 5,
-      params = xgb.params(
+      params = c(xgb.params(
         max_depth = 3, learning_rate = 0.5, nthread = n_threads,
         objective = "multi:softprob", num_class = 3, eval_metric = "merror"
-      ),
+      ), legacy_sampling_params),
       evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1))
     ),
     "train-merror"
@@ -218,10 +224,10 @@ test_that("train and predict softmax", {
     bst <- xgb.train(
       data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1),
       nrounds = 5,
-      params = xgb.params(
+      params = c(xgb.params(
         max_depth = 3, learning_rate = 0.5, nthread = n_threads,
         objective = "multi:softmax", num_class = 3, eval_metric = "merror"
-      ),
+      ), legacy_sampling_params),
       evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1))
     ),
     "train-merror"
@@ -339,9 +345,9 @@ test_that("use of multiple eval metrics works", {
 test_that("training continuation works", {
   dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
   evals <- list(train = dtrain)
-  params <- xgb.params(
+  params <- c(xgb.params(
     objective = "binary:logistic", max_depth = 2, learning_rate = 1, nthread = n_threads
-  )
+  ), legacy_sampling_params)
 
   # for the reference, use 4 iterations at once:
   set.seed(11)
@@ -380,13 +386,13 @@ test_that("xgb.cv works", {
       data = xgb.DMatrix(train$data, label = train$label, nthread = 1),
       nfold = 5,
       nrounds = 2,
-      params = xgb.params(
+      params = c(xgb.params(
         max_depth = 2,
         learning_rate = 1.,
         nthread = n_threads,
         objective = "binary:logistic",
         eval_metric = "error"
-      ),
+      ), legacy_sampling_params),
       verbose = TRUE
     ),
     "train-error:"
@@ -443,23 +449,23 @@ test_that("xgb.cv works with stratified folds", {
     data = dtrain,
     nrounds = 2,
     nfold = 5,
-    params = xgb.params(
+    params = c(xgb.params(
       max_depth = 2,
       nthread = n_threads,
       objective = "binary:logistic"
-    ),
+    ), legacy_sampling_params),
     verbose = FALSE, stratified = FALSE
   )
   set.seed(314159)
   cv2 <- xgb.cv(
     data = dtrain,
     nfold = 5,
     nrounds = 2,
-    params = xgb.params(
+    params = c(xgb.params(
       max_depth = 2,
       nthread = n_threads,
       objective = "binary:logistic"
-    ),
+    ), legacy_sampling_params),
     verbose = FALSE, stratified = TRUE
   )
   # Stratified folds should result in a different evaluation logs
@@ -488,11 +494,11 @@ test_that("train and predict with non-strict classes", {
     bst <- xgb.train(
       data = xgb.DMatrix(train_dense, label = train$label, nthread = 1),
       nrounds = 2,
-      params = xgb.params(
+      params = c(xgb.params(
         max_depth = 2,
         nthread = n_threads,
         objective = "binary:logistic"
-      ),
+      ), legacy_sampling_params),
       verbose = 0
     ),
     regexp = NA
@@ -507,11 +513,11 @@ test_that("train and predict with non-strict classes", {
     bst <- xgb.train(
       data = xgb.DMatrix(train_dense, label = train$label, nthread = 1),
       nrounds = 2,
-      params = xgb.params(
+      params = c(xgb.params(
         max_depth = 2,
         nthread = n_threads,
         objective = "binary:logistic"
-      ),
+      ), legacy_sampling_params),
       verbose = 0
     ),
     regexp = NA
@@ -559,11 +565,11 @@ test_that("colsample_bytree works", {
   evals <- list(train = dtrain, eval = dtest)
   ## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
   ## each tree
-  params <- xgb.params(
+  params <- c(xgb.params(
     max_depth = 2, learning_rate = 0, nthread = n_threads,
     colsample_bytree = 0.01, objective = "binary:logistic",
     eval_metric = "auc"
-  )
+  ), list(min_child_weight = 1, subsample = 1))
   set.seed(2)
   bst <- xgb.train(params, dtrain, nrounds = 100, evals = evals, verbose = 0)
   xgb.importance(model = bst)
@@ -690,12 +696,12 @@ test_that("Quantile regression accepts multiple quantiles", {
   dm <- xgb.DMatrix(data = x, label = y, nthread = 1)
   model <- xgb.train(
     data = dm,
-    params = xgb.params(
+    params = c(xgb.params(
       objective = "reg:quantileerror",
       tree_method = "exact",
       quantile_alpha = c(0.05, 0.5, 0.95),
       nthread = n_threads
-    ),
+    ), legacy_tree_params),
     nrounds = 15
   )
   pred <- predict(model, x)
@@ -717,12 +723,12 @@ test_that("Can use multi-output labels with built-in objectives", {
   y_mirrored <- cbind(y, -y)
   dm <- xgb.DMatrix(x, label = y_mirrored, nthread = n_threads)
   model <- xgb.train(
-    params = xgb.params(
+    params = c(xgb.params(
       tree_method = "hist",
       multi_strategy = "multi_output_tree",
       objective = "reg:squarederror",
       nthread = n_threads
-    ),
+    ), legacy_tree_params),
     data = dm,
     nrounds = 5
   )
@@ -739,7 +745,7 @@ test_that("Can use multi-output labels with custom objectives", {
   y_mirrored <- cbind(y, -y)
   dm <- xgb.DMatrix(x, label = y_mirrored, nthread = n_threads)
   model <- xgb.train(
-    params = xgb.params(
+    params = c(xgb.params(
       tree_method = "hist",
       multi_strategy = "multi_output_tree",
       base_score = 0,
@@ -751,7 +757,7 @@ test_that("Can use multi-output labels with custom objectives", {
         return(list(grad = grad, hess = hess))
       },
       nthread = n_threads
-    ),
+    ), legacy_tree_params),
     data = dm,
     nrounds = 5
   )

diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R
@@ -154,7 +154,8 @@ test_that("xgb.cb.evaluation.log works as expected for xgb.cv", {
 
 params <- xgb.params(
   objective = "binary:logistic", eval_metric = "error",
-  max_depth = 4, nthread = n_threads
+  max_depth = 4, nthread = n_threads,
+  min_child_weight = 1, subsample = 1, colsample_bytree = 1
 )
 
 test_that("can store evaluation_log without printing", {

diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R
@@ -29,6 +29,7 @@ evalerror <- function(preds, dtrain) {
 }
 
 param <- list(max_depth = 2, learning_rate = 1, nthread = n_threads,
+              min_child_weight = 1, subsample = 1, colsample_bytree = 1,
               objective = logregobj, eval_metric = evalerror)
 num_round <- 2
 

diff --git a/R-package/vignettes/xgboostfromJSON.Rmd b/R-package/vignettes/xgboostfromJSON.Rmd
@@ -57,8 +57,13 @@ bst <- xgb.train(
   nrounds = 1,
   params = xgb.params(
     objective = "binary:logistic",
-    nthread = 2,
-    max_depth = 1
+    nthread = 1,
+    max_depth = 1,
+    eta = 0.3,
+    min_child_weight = 1,
+    subsample = 1,
+    colsample_bytree = 1,
+    seed = 0
   )
 )
 ```