diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index c5857cdefc7c..215ef59e9f95 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -480,7 +480,7 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(),
 #' @param eta,learning_rate (two aliases for the same parameter)
 #' Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and `eta` shrinks the feature weights to make the boosting process more conservative.
 #' - range: \eqn{[0,1]}
-#' - default value: 0.3 for tree-based boosters, 0.5 for linear booster.
+#' - default value: 0.1 for tree-based boosters, 0.5 for linear booster.
 #'
 #' Note: should only pass one of `eta` or `learning_rate`. Both refer to the same parameter and there's thus no difference between one or the other.
 #' @param gamma,min_split_loss (two aliases for the same parameter) (for Tree Booster) (default=0, alias: `gamma`)
@@ -493,7 +493,7 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(),
 #' Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit. 0 indicates no limit on depth. Beware that XGBoost aggressively consumes memory when training a deep tree. `"exact"` tree method requires non-zero value.
 #'
 #' range: \eqn{[0, \infty)}
-#' @param min_child_weight (for Tree Booster) (default=1)
+#' @param min_child_weight (for Tree Booster) (default=2)
 #' Minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than `min_child_weight`, then the building process will give up further partitioning. In linear regression task, this simply corresponds to minimum number of instances needed to be in each node. The larger `min_child_weight` is, the more conservative the algorithm will be.
 #'
 #' range: \eqn{[0, \infty)}
@@ -501,7 +501,7 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(),
 #' Maximum delta step we allow each leaf output to be. If the value is set to 0, it means there is no constraint. If it is set to a positive value, it can help making the update step more conservative. Usually this parameter is not needed, but it might help in logistic regression when class is extremely imbalanced. Set it to value of 1-10 might help control the update.
 #'
 #' range: \eqn{[0, \infty)}
-#' @param subsample (for Tree Booster) (default=1)
+#' @param subsample (for Tree Booster) (default=0.8)
 #' Subsample ratio of the training instances. Setting it to 0.5 means that XGBoost would randomly sample half of the training data prior to growing trees. and this will prevent overfitting. Subsampling will occur once in every boosting iteration.
 #'
 #' range: \eqn{(0,1]}
@@ -514,9 +514,9 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(),
 #'   `"subsample"` may be set to as low as 0.1 without loss of model accuracy. Note that this
 #'   sampling method is only supported when `"tree_method"` is set to `"hist"`; other tree
 #'   methods only support `"uniform"` sampling.
-#' @param colsample_bytree,colsample_bylevel,colsample_bynode (for Tree Booster) (default=1)
+#' @param colsample_bytree,colsample_bylevel,colsample_bynode (for Tree Booster) (`colsample_bytree` default=0.8, `colsample_bylevel` and `colsample_bynode` default=1)
 #' This is a family of parameters for subsampling of columns.
-#' - All `"colsample_by*"` parameters have a range of \eqn{(0, 1]}, the default value of 1, and specify the fraction of columns to be subsampled.
+#' - All `"colsample_by*"` parameters have a range of \eqn{(0, 1]} and specify the fraction of columns to be subsampled.
 #' - `"colsample_bytree"` is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.
 #' - `"colsample_bylevel"` is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.
 #' - `"colsample_bynode"` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method.
diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
index b6f9c8bc5518..a040a73c5ea8 100644
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -1052,7 +1052,7 @@ check.early.stopping.rounds <- function(early_stopping_rounds, eval_set) {
 #' @param learning_rate (alias: `eta`)
 #' Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and `learning_rate` shrinks the feature weights to make the boosting process more conservative.
 #' - range: \eqn{[0,1]}
-#' - default value: 0.3 for tree-based boosters, 0.5 for linear booster.
+#' - default value: 0.1 for tree-based boosters, 0.5 for linear booster.
 #' @param reg_lambda (alias: `lambda`)
 #' - For tree-based boosters:
 #'   - L2 regularization term on weights. Increasing this value will make model more conservative.
diff --git a/R-package/man/xgb.params.Rd b/R-package/man/xgb.params.Rd
index c49212f053e7..611c4410b381 100644
--- a/R-package/man/xgb.params.Rd
+++ b/R-package/man/xgb.params.Rd
@@ -128,7 +128,7 @@ Which booster to use. Can be \code{"gbtree"}, \code{"gblinear"} or \code{"dart"}
 Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and \code{eta} shrinks the feature weights to make the boosting process more conservative.
 \itemize{
 \item range: \eqn{[0,1]}
-\item default value: 0.3 for tree-based boosters, 0.5 for linear booster.
+\item default value: 0.1 for tree-based boosters, 0.5 for linear booster.
 }
 
 Note: should only pass one of \code{eta} or \code{learning_rate}. Both refer to the same parameter and there's thus no difference between one or the other.}
@@ -145,7 +145,7 @@ Maximum depth of a tree. Increasing this value will make the model more complex
 
 range: \eqn{[0, \infty)}}
 
-\item{min_child_weight}{(for Tree Booster) (default=1)
+\item{min_child_weight}{(for Tree Booster) (default=2)
 Minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than \code{min_child_weight}, then the building process will give up further partitioning. In linear regression task, this simply corresponds to minimum number of instances needed to be in each node. The larger \code{min_child_weight} is, the more conservative the algorithm will be.
 
 range: \eqn{[0, \infty)}}
@@ -155,7 +155,7 @@ Maximum delta step we allow each leaf output to be. If the value is set to 0, it
 
 range: \eqn{[0, \infty)}}
 
-\item{subsample}{(for Tree Booster) (default=1)
+\item{subsample}{(for Tree Booster) (default=0.8)
 Subsample ratio of the training instances. Setting it to 0.5 means that XGBoost would randomly sample half of the training data prior to growing trees. and this will prevent overfitting. Subsampling will occur once in every boosting iteration.
 
 range: \eqn{(0,1]}}
@@ -172,10 +172,10 @@ sampling method is only supported when \code{"tree_method"} is set to \code{"his
 methods only support \code{"uniform"} sampling.
 }}
 
-\item{colsample_bytree, colsample_bylevel, colsample_bynode}{(for Tree Booster) (default=1)
+\item{colsample_bytree, colsample_bylevel, colsample_bynode}{(for Tree Booster) (\code{colsample_bytree} default=0.8, \code{colsample_bylevel} and \code{colsample_bynode} default=1)
 This is a family of parameters for subsampling of columns.
 \itemize{
-\item All \code{"colsample_by*"} parameters have a range of \eqn{(0, 1]}, the default value of 1, and specify the fraction of columns to be subsampled.
+\item All \code{"colsample_by*"} parameters have a range of \eqn{(0, 1]} and specify the fraction of columns to be subsampled.
 \item \code{"colsample_bytree"} is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.
 \item \code{"colsample_bylevel"} is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.
 \item \code{"colsample_bynode"} is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method.
diff --git a/R-package/man/xgboost.Rd b/R-package/man/xgboost.Rd
index 891b1100b494..1325aa3d79f5 100644
--- a/R-package/man/xgboost.Rd
+++ b/R-package/man/xgboost.Rd
@@ -179,10 +179,10 @@ range: \eqn{[0, \infty)}}
 Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and \code{learning_rate} shrinks the feature weights to make the boosting process more conservative.
 \itemize{
 \item range: \eqn{[0,1]}
-\item default value: 0.3 for tree-based boosters, 0.5 for linear booster.
+\item default value: 0.1 for tree-based boosters, 0.5 for linear booster.
 }}
 
-\item{min_child_weight}{(for Tree Booster) (default=1)
+\item{min_child_weight}{(for Tree Booster) (default=2)
 Minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than \code{min_child_weight}, then the building process will give up further partitioning. In linear regression task, this simply corresponds to minimum number of instances needed to be in each node. The larger \code{min_child_weight} is, the more conservative the algorithm will be.
 
 range: \eqn{[0, \infty)}}
@@ -379,7 +379,7 @@ Maximum number of nodes to be added.  Not used by \code{"exact"} tree method.}
 \item{booster}{(default= \code{"gbtree"})
 Which booster to use. Can be \code{"gbtree"}, \code{"gblinear"} or \code{"dart"}; \code{"gbtree"} and \code{"dart"} use tree based models while \code{"gblinear"} uses linear functions.}
 
-\item{subsample}{(for Tree Booster) (default=1)
+\item{subsample}{(for Tree Booster) (default=0.8)
 Subsample ratio of the training instances. Setting it to 0.5 means that XGBoost would randomly sample half of the training data prior to growing trees. and this will prevent overfitting. Subsampling will occur once in every boosting iteration.
 
 range: \eqn{(0,1]}}
@@ -404,10 +404,10 @@ named vector, will try to match the entries to column names of \code{x} by name.
 
 If \code{NULL} (the default), all columns will have the same weight.}
 
-\item{colsample_bytree, colsample_bylevel, colsample_bynode}{(for Tree Booster) (default=1)
+\item{colsample_bytree, colsample_bylevel, colsample_bynode}{(for Tree Booster) (\code{colsample_bytree} default=0.8, \code{colsample_bylevel} and \code{colsample_bynode} default=1)
 This is a family of parameters for subsampling of columns.
 \itemize{
-\item All \code{"colsample_by*"} parameters have a range of \eqn{(0, 1]}, the default value of 1, and specify the fraction of columns to be subsampled.
+\item All \code{"colsample_by*"} parameters have a range of \eqn{(0, 1]} and specify the fraction of columns to be subsampled.
 \item \code{"colsample_bytree"} is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.
 \item \code{"colsample_bylevel"} is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.
 \item \code{"colsample_bynode"} is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method.
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 7bdf9a601d2a..3bccc85e3d9c 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -11,6 +11,12 @@ windows_flag <- .Platform$OS.type == "windows" &&
   .Machine$sizeof.pointer != 8
 solaris_flag <- (Sys.info()["sysname"] == "SunOS")
 n_threads <- 1
+legacy_sampling_params <- list(
+  min_child_weight = 1,
+  subsample = 1,
+  colsample_bytree = 1
+)
+legacy_tree_params <- c(list(learning_rate = 0.3), legacy_sampling_params)
 
 
 test_that("train and predict binary classification", {
@@ -159,10 +165,10 @@ test_that("train and predict softprob", {
     bst <- xgb.train(
       data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1),
       nrounds = 5,
-      params = xgb.params(
+      params = c(xgb.params(
         max_depth = 3, learning_rate = 0.5, nthread = n_threads,
         objective = "multi:softprob", num_class = 3, eval_metric = "merror"
-      ),
+      ), legacy_sampling_params),
       evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1))
     ),
     "train-merror"
@@ -218,10 +224,10 @@ test_that("train and predict softmax", {
     bst <- xgb.train(
       data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1),
       nrounds = 5,
-      params = xgb.params(
+      params = c(xgb.params(
         max_depth = 3, learning_rate = 0.5, nthread = n_threads,
         objective = "multi:softmax", num_class = 3, eval_metric = "merror"
-      ),
+      ), legacy_sampling_params),
       evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1))
     ),
     "train-merror"
@@ -339,9 +345,9 @@ test_that("use of multiple eval metrics works", {
 test_that("training continuation works", {
   dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
   evals <- list(train = dtrain)
-  params <- xgb.params(
+  params <- c(xgb.params(
     objective = "binary:logistic", max_depth = 2, learning_rate = 1, nthread = n_threads
-  )
+  ), legacy_sampling_params)
 
   # for the reference, use 4 iterations at once:
   set.seed(11)
@@ -380,13 +386,13 @@ test_that("xgb.cv works", {
       data = xgb.DMatrix(train$data, label = train$label, nthread = 1),
       nfold = 5,
       nrounds = 2,
-      params = xgb.params(
+      params = c(xgb.params(
         max_depth = 2,
         learning_rate = 1.,
         nthread = n_threads,
         objective = "binary:logistic",
         eval_metric = "error"
-      ),
+      ), legacy_sampling_params),
       verbose = TRUE
     ),
     "train-error:"
@@ -443,11 +449,11 @@ test_that("xgb.cv works with stratified folds", {
     data = dtrain,
     nrounds = 2,
     nfold = 5,
-    params = xgb.params(
+    params = c(xgb.params(
       max_depth = 2,
       nthread = n_threads,
       objective = "binary:logistic"
-    ),
+    ), legacy_sampling_params),
     verbose = FALSE, stratified = FALSE
   )
   set.seed(314159)
@@ -455,11 +461,11 @@ test_that("xgb.cv works with stratified folds", {
     data = dtrain,
     nfold = 5,
     nrounds = 2,
-    params = xgb.params(
+    params = c(xgb.params(
       max_depth = 2,
       nthread = n_threads,
       objective = "binary:logistic"
-    ),
+    ), legacy_sampling_params),
     verbose = FALSE, stratified = TRUE
   )
   # Stratified folds should result in a different evaluation logs
@@ -488,11 +494,11 @@ test_that("train and predict with non-strict classes", {
     bst <- xgb.train(
       data = xgb.DMatrix(train_dense, label = train$label, nthread = 1),
       nrounds = 2,
-      params = xgb.params(
+      params = c(xgb.params(
         max_depth = 2,
         nthread = n_threads,
         objective = "binary:logistic"
-      ),
+      ), legacy_sampling_params),
       verbose = 0
     ),
     regexp = NA
@@ -507,11 +513,11 @@ test_that("train and predict with non-strict classes", {
     bst <- xgb.train(
       data = xgb.DMatrix(train_dense, label = train$label, nthread = 1),
       nrounds = 2,
-      params = xgb.params(
+      params = c(xgb.params(
         max_depth = 2,
         nthread = n_threads,
         objective = "binary:logistic"
-      ),
+      ), legacy_sampling_params),
       verbose = 0
     ),
     regexp = NA
@@ -559,11 +565,11 @@ test_that("colsample_bytree works", {
   evals <- list(train = dtrain, eval = dtest)
   ## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
   ## each tree
-  params <- xgb.params(
+  params <- c(xgb.params(
     max_depth = 2, learning_rate = 0, nthread = n_threads,
     colsample_bytree = 0.01, objective = "binary:logistic",
     eval_metric = "auc"
-  )
+  ), list(min_child_weight = 1, subsample = 1))
   set.seed(2)
   bst <- xgb.train(params, dtrain, nrounds = 100, evals = evals, verbose = 0)
   xgb.importance(model = bst)
@@ -690,12 +696,12 @@ test_that("Quantile regression accepts multiple quantiles", {
   dm <- xgb.DMatrix(data = x, label = y, nthread = 1)
   model <- xgb.train(
     data = dm,
-    params = xgb.params(
+    params = c(xgb.params(
       objective = "reg:quantileerror",
       tree_method = "exact",
       quantile_alpha = c(0.05, 0.5, 0.95),
       nthread = n_threads
-    ),
+    ), legacy_tree_params),
     nrounds = 15
   )
   pred <- predict(model, x)
@@ -717,12 +723,12 @@ test_that("Can use multi-output labels with built-in objectives", {
   y_mirrored <- cbind(y, -y)
   dm <- xgb.DMatrix(x, label = y_mirrored, nthread = n_threads)
   model <- xgb.train(
-    params = xgb.params(
+    params = c(xgb.params(
       tree_method = "hist",
       multi_strategy = "multi_output_tree",
       objective = "reg:squarederror",
       nthread = n_threads
-    ),
+    ), legacy_tree_params),
     data = dm,
     nrounds = 5
   )
@@ -739,7 +745,7 @@ test_that("Can use multi-output labels with custom objectives", {
   y_mirrored <- cbind(y, -y)
   dm <- xgb.DMatrix(x, label = y_mirrored, nthread = n_threads)
   model <- xgb.train(
-    params = xgb.params(
+    params = c(xgb.params(
       tree_method = "hist",
       multi_strategy = "multi_output_tree",
       base_score = 0,
@@ -751,7 +757,7 @@ test_that("Can use multi-output labels with custom objectives", {
         return(list(grad = grad, hess = hess))
       },
       nthread = n_threads
-    ),
+    ), legacy_tree_params),
     data = dm,
     nrounds = 5
   )
diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R
index 4cf551289525..bdab59521d20 100644
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@@ -154,7 +154,8 @@ test_that("xgb.cb.evaluation.log works as expected for xgb.cv", {
 
 params <- xgb.params(
   objective = "binary:logistic", eval_metric = "error",
-  max_depth = 4, nthread = n_threads
+  max_depth = 4, nthread = n_threads,
+  min_child_weight = 1, subsample = 1, colsample_bytree = 1
 )
 
 test_that("can store evaluation_log without printing", {
diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R
index d3185e7fccdd..f599425a2ac3 100644
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@@ -29,6 +29,7 @@ evalerror <- function(preds, dtrain) {
 }
 
 param <- list(max_depth = 2, learning_rate = 1, nthread = n_threads,
+              min_child_weight = 1, subsample = 1, colsample_bytree = 1,
               objective = logregobj, eval_metric = evalerror)
 num_round <- 2
 
diff --git a/R-package/vignettes/xgboostfromJSON.Rmd b/R-package/vignettes/xgboostfromJSON.Rmd
index bc39e0914060..ca623e4b8f9a 100644
--- a/R-package/vignettes/xgboostfromJSON.Rmd
+++ b/R-package/vignettes/xgboostfromJSON.Rmd
@@ -57,8 +57,13 @@ bst <- xgb.train(
   nrounds = 1,
   params = xgb.params(
     objective = "binary:logistic",
-    nthread = 2,
-    max_depth = 1
+    nthread = 1,
+    max_depth = 1,
+    eta = 0.3,
+    min_child_weight = 1,
+    subsample = 1,
+    colsample_bytree = 1,
+    seed = 0
   )
 )
 ```
diff --git a/doc/parameter.rst b/doc/parameter.rst
index 46891cbb9736..a98a8ac10ffa 100644
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -98,7 +98,7 @@ General Parameters
 
 Parameters for Tree Booster
 ===========================
-* ``eta`` [default=0.3, alias: ``learning_rate``]
+* ``eta`` [default=0.1, alias: ``learning_rate``]
 
   - Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and ``eta`` shrinks the feature weights to make the boosting process more conservative.
   - range: [0,1]
@@ -113,7 +113,7 @@ Parameters for Tree Booster
   - Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit. 0 indicates no limit on depth. Beware that XGBoost aggressively consumes memory when training a deep tree. ``exact`` tree method requires non-zero value.
   - range: [0,∞]
 
-* ``min_child_weight`` [default=1]
+* ``min_child_weight`` [default=2]
 
   - Minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than ``min_child_weight``, then the building process will give up further partitioning. In linear regression task, this simply corresponds to minimum number of instances needed to be in each node. The larger ``min_child_weight`` is, the more conservative the algorithm will be.
   - range: [0,∞]
@@ -123,7 +123,7 @@ Parameters for Tree Booster
   - Maximum delta step we allow each leaf output to be. If the value is set to 0, it means there is no constraint. If it is set to a positive value, it can help making the update step more conservative. Usually this parameter is not needed, but it might help in logistic regression when class is extremely imbalanced. Set it to value of 1-10 might help control the update.
   - range: [0,∞]
 
-* ``subsample`` [default=1]
+* ``subsample`` [default=0.8]
 
   - Subsample ratio of the training instances. Setting it to 0.5 means that XGBoost would randomly sample half of the training data prior to growing trees. and this will prevent overfitting. Subsampling will occur once in every boosting iteration.
   - range: (0,1]
@@ -150,10 +150,10 @@ Parameters for Tree Booster
      split gradient, which may not be optimal with the full gradient. Use uniform sampling
      as an alternative.
 
-* ``colsample_bytree``, ``colsample_bylevel``, ``colsample_bynode`` [default=1]
+* ``colsample_bytree`` [default=0.8], ``colsample_bylevel``, ``colsample_bynode`` [default=1]
 
   - This is a family of parameters for subsampling of columns.
-  - All ``colsample_by*`` parameters have a range of (0, 1], the default value of 1, and specify the fraction of columns to be subsampled.
+  - All ``colsample_by*`` parameters have a range of (0, 1] and specify the fraction of columns to be subsampled.
   - ``colsample_bytree`` is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.
   - ``colsample_bylevel`` is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.
   - ``colsample_bynode`` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method.
diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/TreeBoosterParams.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/TreeBoosterParams.scala
index afad45437396..91388a3e7d69 100644
--- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/TreeBoosterParams.scala
+++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/TreeBoosterParams.scala
@@ -221,8 +221,8 @@ private[spark] trait TreeBoosterParams extends Params {
 
   final def getMaxCachedHistNode: Int = $(maxCachedHistNode)
 
-  setDefault(eta -> 0.3, gamma -> 0, maxDepth -> 6, minChildWeight -> 1, maxDeltaStep -> 0,
-    subsample -> 1, samplingMethod -> "uniform", colsampleBytree -> 1, colsampleBylevel -> 1,
+  setDefault(eta -> 0.1, gamma -> 0, maxDepth -> 6, minChildWeight -> 2, maxDeltaStep -> 0,
+    subsample -> 0.8, samplingMethod -> "uniform", colsampleBytree -> 0.8, colsampleBylevel -> 1,
     colsampleBynode -> 1, lambda -> 1, alpha -> 0, treeMethod -> "auto", scalePosWeight -> 1,
     processType -> "default", growPolicy -> "depthwise", maxLeaves -> 0, maxBins -> 256,
     numParallelTree -> 1, maxCachedHistNode -> 65536)
diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/XGBoostParams.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/XGBoostParams.scala
index 891c7362573d..addaa22d2a1f 100644
--- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/XGBoostParams.scala
+++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/XGBoostParams.scala
@@ -208,7 +208,7 @@ private[spark] trait SparkParams[T <: Params] extends HasFeaturesCols with HasFe
 
   final def getCacheHostRatio: Float = $(cacheHostRatio)
 
-  setDefault(numRound -> 100, numWorkers -> 1, inferBatchSize -> (32 << 10),
+  setDefault(numRound -> 300, numWorkers -> 1, inferBatchSize -> (32 << 10),
     numEarlyStoppingRounds -> 0, forceRepartition -> false, missing -> Float.NaN,
     featuresCols -> Array.empty, customObj -> null, customEval -> null,
     featureNames -> Array.empty, featureTypes -> Array.empty, useExternalMemory -> false,
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
index 90af5c8cae77..aa1d0074b91f 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
@@ -244,7 +244,10 @@ class XGBoostClassifierSuite extends AnyFunSuite with PerTest with TmpFolderPerS
       "eta" -> "1",
       "max_depth" -> "6",
       "base_score" -> 0.5,
-      "max_bin" -> 16) ++ xgbParams
+      "max_bin" -> 16,
+      "min_child_weight" -> 1,
+      "subsample" -> 1,
+      "colsample_bytree" -> 1) ++ xgbParams
     val xgb4jModel = ScalaXGBoost.train(trainingDM, paramMap, round)
 
     val classifier = new XGBoostClassifier(paramMap)
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala
index 43209f1aff13..1adcfd693da6 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala
@@ -142,7 +142,10 @@ class XGBoostRegressorSuite extends AnyFunSuite with PerTest with TmpFolderPerSu
       "eta" -> "1",
       "max_depth" -> "6",
       "base_score" -> 0.5,
-      "max_bin" -> 16) ++ xgbParams
+      "max_bin" -> 16,
+      "min_child_weight" -> 1,
+      "subsample" -> 1,
+      "colsample_bytree" -> 1) ++ xgbParams
     val xgb4jModel = ScalaXGBoost.train(trainingDM, paramMap, round)
 
     val regressor = new XGBoostRegressor(paramMap)
diff --git a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java
index 5b55e0a6342e..3a47124c27e5 100644
--- a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java
+++ b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java
@@ -822,6 +822,10 @@ public void testTrainFromExistingModel() throws XGBoostError, IOException {
         put("max_depth", 2);
         put("silent", 1);
         put("objective", "binary:logistic");
+        put("seed", 0);
+        put("min_child_weight", 1);
+        put("subsample", 1);
+        put("colsample_bytree", 1);
       }
     };
 
diff --git a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java
index dba3496573b6..583b73630932 100644
--- a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java
+++ b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java
@@ -360,6 +360,9 @@ public void testTrainWithDenseMatrixRef() throws XGBoostError {
       params.put("silent", 1);
       params.put("objective", "reg:linear");
       params.put("seed", 123);
+      params.put("min_child_weight", 1);
+      params.put("subsample", 1);
+      params.put("colsample_bytree", 1);
 
       HashMap<String, DMatrix> watches = new HashMap<>();
       watches.put("train", trainMat);
diff --git a/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/ScalaBoosterImplSuite.scala b/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/ScalaBoosterImplSuite.scala
index 3cb77f9388c4..82f6e802c3b5 100644
--- a/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/ScalaBoosterImplSuite.scala
+++ b/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/ScalaBoosterImplSuite.scala
@@ -25,6 +25,11 @@ import org.scalatest.funsuite.AnyFunSuite
 import ml.dmlc.xgboost4j.java.XGBoostError
 
 class ScalaBoosterImplSuite extends AnyFunSuite {
+  private val legacyTreeDefaults = Map(
+    "eta" -> "0.3",
+    "min_child_weight" -> "1",
+    "subsample" -> "1",
+    "colsample_bytree" -> "1")
 
   private class EvalError extends EvalTrait {
 
@@ -150,9 +155,9 @@ class ScalaBoosterImplSuite extends AnyFunSuite {
   test("test with quantile histo depthwise") {
     val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm")
     val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm")
-    val paramMap = List("max_depth" -> "3", "silent" -> "0",
+    val paramMap = (legacyTreeDefaults ++ List("max_depth" -> "3", "silent" -> "0",
       "objective" -> "binary:logistic", "tree_method" -> "hist",
-      "grow_policy" -> "depthwise", "eval_metric" -> "auc").toMap
+      "grow_policy" -> "depthwise", "eval_metric" -> "auc").toMap)
     trainBoosterWithQuantileHisto(trainMat, Map("training" -> trainMat, "test" -> testMat),
       round = 10, paramMap, 0.95f)
   }
@@ -160,39 +165,39 @@ class ScalaBoosterImplSuite extends AnyFunSuite {
   test("test with quantile histo lossguide") {
     val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm")
     val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm")
-    val paramMap = List("max_depth" -> "3", "silent" -> "0",
+    val paramMap = (legacyTreeDefaults ++ List("max_depth" -> "3", "silent" -> "0",
       "objective" -> "binary:logistic", "tree_method" -> "hist",
-      "grow_policy" -> "lossguide", "max_leaves" -> "8", "eval_metric" -> "auc").toMap
+      "grow_policy" -> "lossguide", "max_leaves" -> "8", "eval_metric" -> "auc").toMap)
     trainBoosterWithQuantileHisto(trainMat, Map("training" -> trainMat, "test" -> testMat),
       round = 10, paramMap, 0.95f)
   }
 
   test("test with quantile histo lossguide with max bin") {
     val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm")
-    val paramMap = List("max_depth" -> "3", "silent" -> "0",
+    val paramMap = (legacyTreeDefaults ++ List("max_depth" -> "3", "silent" -> "0",
       "objective" -> "binary:logistic", "tree_method" -> "hist",
       "grow_policy" -> "lossguide", "max_leaves" -> "8", "max_bin" -> "16",
-      "eval_metric" -> "auc").toMap
+      "eval_metric" -> "auc").toMap)
     trainBoosterWithQuantileHisto(trainMat, Map("training" -> trainMat),
       round = 10, paramMap, 0.95f)
   }
 
   test("test with quantile histo depthwidth with max depth") {
     val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm")
-    val paramMap = List("max_depth" -> "0", "silent" -> "0",
+    val paramMap = (legacyTreeDefaults ++ List("max_depth" -> "0", "silent" -> "0",
       "objective" -> "binary:logistic", "tree_method" -> "hist",
       "grow_policy" -> "depthwise", "max_leaves" -> "8", "max_depth" -> "2",
-      "eval_metric" -> "auc").toMap
+      "eval_metric" -> "auc").toMap)
     trainBoosterWithQuantileHisto(trainMat, Map("training" -> trainMat),
       round = 10, paramMap, 0.95f)
   }
 
   test("test with quantile histo depthwidth with max depth and max bin") {
     val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm")
-    val paramMap = List("max_depth" -> "0", "silent" -> "0",
+    val paramMap = (legacyTreeDefaults ++ List("max_depth" -> "0", "silent" -> "0",
       "objective" -> "binary:logistic", "tree_method" -> "hist",
       "grow_policy" -> "depthwise", "max_depth" -> "2", "max_bin" -> "2",
-      "eval_metric" -> "auc").toMap
+      "eval_metric" -> "auc").toMap)
     trainBoosterWithQuantileHisto(trainMat, Map("training" -> trainMat),
       round = 10, paramMap, 0.95f)
   }
diff --git a/python-package/xgboost/dask/__init__.py b/python-package/xgboost/dask/__init__.py
index 1d1e0a37b96d..1a4d17c363a9 100644
--- a/python-package/xgboost/dask/__init__.py
+++ b/python-package/xgboost/dask/__init__.py
@@ -832,7 +832,7 @@ def train(  # pylint: disable=unused-argument
     client: "distributed.Client",
     params: Dict[str, Any],
     dtrain: DaskDMatrix,
-    num_boost_round: int = 10,
+    num_boost_round: int = 300,
     *,
     evals: Optional[Sequence[Tuple[DaskDMatrix, str]]] = None,
     obj: Optional[PlainObj] = None,
diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index afcbec30856c..6f239629d617 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -796,7 +796,7 @@ def validate_or_none(meta: Optional[Sequence], name: str) -> Sequence:
     return train_dmatrix, evals
 
 
-DEFAULT_N_ESTIMATORS = 100
+DEFAULT_N_ESTIMATORS = 300
 
 
 @xgboost_model_doc(
diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py
index 52f346bad6a5..7dd03a1b832c 100644
--- a/python-package/xgboost/testing/__init__.py
+++ b/python-package/xgboost/testing/__init__.py
@@ -63,6 +63,20 @@
 PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
 
 
+def legacy_sampling_params() -> Dict[str, float]:
+    """Sampling parameters used by tests that assert deterministic tree behavior."""
+    return {
+        "min_child_weight": 1.0,
+        "subsample": 1.0,
+        "colsample_bytree": 1.0,
+    }
+
+
+def legacy_tree_params() -> Dict[str, float]:
+    """Tree parameters used by tests that assert pre-RFC model behavior."""
+    return {"learning_rate": 0.3, **legacy_sampling_params()}
+
+
 def has_ipv6() -> bool:
     """Check whether IPv6 is enabled on this host."""
     # connection error in macos, still need some fixes.
diff --git a/python-package/xgboost/testing/callbacks.py b/python-package/xgboost/testing/callbacks.py
index 740b389cfde4..9463f832f7e8 100644
--- a/python-package/xgboost/testing/callbacks.py
+++ b/python-package/xgboost/testing/callbacks.py
@@ -8,6 +8,7 @@
 from ..callback import LearningRateScheduler
 from ..core import Booster, DMatrix
 from ..training import cv, train
+from . import legacy_sampling_params
 from .utils import Device
 
 
@@ -29,6 +30,7 @@ def run_eta_decay(
         "eval_metric": "error",
         "tree_method": tree_method,
         "device": device,
+        **legacy_sampling_params(),
     }
     evals_result: Dict[str, Dict] = {}
     bst = train(
@@ -52,6 +54,7 @@ def run_eta_decay(
         "eval_metric": "error",
         "tree_method": tree_method,
         "device": device,
+        **legacy_sampling_params(),
     }
     evals_result = {}
 
@@ -75,6 +78,7 @@ def run_eta_decay(
         "eval_metric": "error",
         "tree_method": tree_method,
         "device": device,
+        **legacy_sampling_params(),
     }
     evals_result = {}
     bst = train(
@@ -144,6 +148,7 @@ def run_eta_decay_leaf_output(
         "eval_metric": "error",
         "tree_method": tree_method,
         "device": device,
+        **legacy_sampling_params(),
     }
     if objective == "reg:quantileerror":
         param["quantile_alpha"] = 0.3
diff --git a/python-package/xgboost/testing/intercept.py b/python-package/xgboost/testing/intercept.py
index 4496059eb84e..861f10990f5e 100644
--- a/python-package/xgboost/testing/intercept.py
+++ b/python-package/xgboost/testing/intercept.py
@@ -14,6 +14,7 @@
 from ..core import Booster, DMatrix, QuantileDMatrix
 from ..sklearn import XGBClassifier, XGBRegressor
 from ..training import train
+from . import legacy_tree_params
 from .updater import get_basescore
 from .utils import Device, non_increasing
 
@@ -24,7 +25,11 @@ def run_init_estimation(tree_method: str, device: Device) -> None:
 
     def run_reg(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-name
         reg = XGBRegressor(
-            tree_method=tree_method, max_depth=1, n_estimators=1, device=device
+            tree_method=tree_method,
+            max_depth=1,
+            n_estimators=1,
+            device=device,
+            **legacy_tree_params(),
         )
         reg.fit(X, y, eval_set=[(X, y)])
         base_score_0 = get_basescore(reg)
@@ -38,6 +43,7 @@ def run_reg(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-na
             max_depth=1,
             n_estimators=1,
             base_score=intercept,
+            **legacy_tree_params(),
         )
         reg.fit(X, y, eval_set=[(X, y)])
         base_score_1 = get_basescore(reg)
@@ -57,7 +63,11 @@ def run_clf(
         X: np.ndarray, y: np.ndarray, w: Optional[np.ndarray] = None
     ) -> List[float]:
         clf = XGBClassifier(
-            tree_method=tree_method, max_depth=1, n_estimators=1, device=device
+            tree_method=tree_method,
+            max_depth=1,
+            n_estimators=1,
+            device=device,
+            **legacy_tree_params(),
         )
         if w is not None:
             clf.fit(
@@ -79,6 +89,7 @@ def run_clf(
             n_estimators=1,
             device=device,
             base_score=intercept,
+            **legacy_tree_params(),
         )
         if w is not None:
             clf.fit(
@@ -159,6 +170,7 @@ def run_adaptive(tree_method: str, weighted: bool, device: Device) -> None:
             "base_score": base_score,
             "objective": "reg:absoluteerror",
             "device": device,
+            **legacy_tree_params(),
         },
         Xy,
         num_boost_round=1,
@@ -168,6 +180,7 @@ def run_adaptive(tree_method: str, weighted: bool, device: Device) -> None:
             "tree_method": tree_method,
             "objective": "reg:absoluteerror",
             "device": device,
+            **legacy_tree_params(),
         },
         Xy,
         num_boost_round=1,
@@ -190,6 +203,7 @@ def run_adaptive(tree_method: str, weighted: bool, device: Device) -> None:
             "base_score": base_score + 1.0,
             "objective": "reg:absoluteerror",
             "device": device,
+            **legacy_tree_params(),
         },
         Xy,
         num_boost_round=1,
diff --git a/python-package/xgboost/testing/metrics.py b/python-package/xgboost/testing/metrics.py
index 61b21a739d7d..be1175977208 100644
--- a/python-package/xgboost/testing/metrics.py
+++ b/python-package/xgboost/testing/metrics.py
@@ -9,6 +9,7 @@
 from ..core import DMatrix, QuantileDMatrix, _parse_eval_str
 from ..sklearn import XGBClassifier, XGBRanker
 from ..training import train
+from . import legacy_tree_params
 from .utils import Device
 
 
@@ -77,6 +78,7 @@ def check_quantile_error(tree_method: str, device: Device) -> None:
             "eval_metric": "quantile",
             "quantile_alpha": 0.3,
             "device": device,
+            **legacy_tree_params(),
         },
         Xy,
         evals=[(Xy, "Train")],
@@ -84,7 +86,9 @@ def check_quantile_error(tree_method: str, device: Device) -> None:
     )
     predt = booster.inplace_predict(X)
     loss = mean_pinball_loss(y, predt, alpha=0.3)
-    np.testing.assert_allclose(evals_result["Train"]["quantile"][-1], loss)
+    np.testing.assert_allclose(
+        evals_result["Train"]["quantile"][-1], loss, rtol=1e-5, atol=1e-7
+    )
 
     alpha = [0.25, 0.5, 0.75]
     booster = train(
@@ -94,6 +98,7 @@ def check_quantile_error(tree_method: str, device: Device) -> None:
             "quantile_alpha": alpha,
             "objective": "reg:quantileerror",
             "device": device,
+            **legacy_tree_params(),
         },
         Xy,
         evals=[(Xy, "Train")],
@@ -103,7 +108,9 @@ def check_quantile_error(tree_method: str, device: Device) -> None:
     loss = np.mean(
         [mean_pinball_loss(y, predt[:, i], alpha=alpha[i]) for i in range(3)]
     )
-    np.testing.assert_allclose(evals_result["Train"]["quantile"][-1], loss)
+    np.testing.assert_allclose(
+        evals_result["Train"]["quantile"][-1], loss, rtol=1e-5, atol=1e-7
+    )
 
 
 def _expectile_loss(
@@ -142,6 +149,7 @@ def check_expectile_error(tree_method: str, device: Device) -> None:
             "eval_metric": "expectile",
             "expectile_alpha": 0.3,
             "device": device,
+            **legacy_tree_params(),
         },
         Xy,
         evals=[(Xy, "Train")],
@@ -149,7 +157,9 @@ def check_expectile_error(tree_method: str, device: Device) -> None:
     )
     predt = booster.inplace_predict(X)
     loss = _expectile_loss(y, predt, 0.3, None)
-    np.testing.assert_allclose(evals_result["Train"]["expectile"][-1], loss)
+    np.testing.assert_allclose(
+        evals_result["Train"]["expectile"][-1], loss, rtol=1e-5, atol=1e-7
+    )
 
     alpha = np.array([0.25, 0.5, 0.75])
     booster = train(
@@ -159,6 +169,7 @@ def check_expectile_error(tree_method: str, device: Device) -> None:
             "expectile_alpha": alpha,
             "objective": "reg:expectileerror",
             "device": device,
+            **legacy_tree_params(),
         },
         Xy,
         evals=[(Xy, "Train")],
@@ -166,7 +177,9 @@ def check_expectile_error(tree_method: str, device: Device) -> None:
     )
     predt = booster.inplace_predict(X)
     loss = _expectile_loss_multi(y, predt, alpha, None)
-    np.testing.assert_allclose(evals_result["Train"]["expectile"][-1], loss)
+    np.testing.assert_allclose(
+        evals_result["Train"]["expectile"][-1], loss, rtol=1e-5, atol=1e-7
+    )
 
     weights = rng.uniform(0.1, 1.0, size=y.shape[0])
     Xy_w = DMatrix(X, y, weight=weights)
@@ -178,6 +191,7 @@ def check_expectile_error(tree_method: str, device: Device) -> None:
             "expectile_alpha": alpha,
             "objective": "reg:expectileerror",
             "device": device,
+            **legacy_tree_params(),
         },
         Xy_w,
         evals=[(Xy_w, "Train")],
@@ -185,7 +199,9 @@ def check_expectile_error(tree_method: str, device: Device) -> None:
     )
     predt = booster.inplace_predict(X)
     loss = _expectile_loss_multi(y, predt, alpha, weights)
-    np.testing.assert_allclose(evals_result_w["Train"]["expectile"][-1], loss)
+    np.testing.assert_allclose(
+        evals_result_w["Train"]["expectile"][-1], loss, rtol=1e-5, atol=1e-7
+    )
 
 
 def run_roc_auc_binary(tree_method: str, n_samples: int, device: Device) -> None:
@@ -210,6 +226,7 @@ def run_roc_auc_binary(tree_method: str, n_samples: int, device: Device) -> None
             "device": device,
             "eval_metric": "auc",
             "objective": "binary:logistic",
+            **legacy_tree_params(),
         },
         Xy,
         num_boost_round=1,
@@ -232,14 +249,22 @@ def run_pr_auc_multi(tree_method: str, device: Device) -> None:
 
     X, y = make_classification(64, 16, n_informative=8, n_classes=3, random_state=1994)
     clf = XGBClassifier(
-        tree_method=tree_method, n_estimators=1, eval_metric="aucpr", device=device
+        tree_method=tree_method,
+        n_estimators=1,
+        eval_metric="aucpr",
+        device=device,
+        **legacy_tree_params(),
     )
     clf.fit(X, y, eval_set=[(X, y)])
     evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
     # No available implementation for comparison, just check that XGBoost converges
     # to 1.0
     clf = XGBClassifier(
-        tree_method=tree_method, n_estimators=10, eval_metric="aucpr", device=device
+        tree_method=tree_method,
+        n_estimators=10,
+        eval_metric="aucpr",
+        device=device,
+        **legacy_tree_params(),
     )
     clf.fit(X, y, eval_set=[(X, y)])
     evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
@@ -280,6 +305,7 @@ def run_roc_auc_multi(  # pylint: disable=too-many-locals
             "objective": "multi:softprob",
             "num_class": n_classes,
             "device": device,
+            **legacy_tree_params(),
         },
         Xy,
         num_boost_round=1,
@@ -312,6 +338,7 @@ def run_pr_auc_ltr(tree_method: str, device: Device) -> None:
         objective="rank:pairwise",
         eval_metric="aucpr",
         device=device,
+        **legacy_tree_params(),
     )
     groups = np.array([32, 32, 64])
     ltr.fit(
@@ -332,7 +359,11 @@ def run_pr_auc_binary(tree_method: str, device: Device) -> None:
 
     X, y = make_classification(128, 4, n_classes=2, random_state=1994)
     clf = XGBClassifier(
-        tree_method=tree_method, n_estimators=1, eval_metric="aucpr", device=device
+        tree_method=tree_method,
+        n_estimators=1,
+        eval_metric="aucpr",
+        device=device,
+        **legacy_tree_params(),
     )
     clf.fit(X, y, eval_set=[(X, y)])
     evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
@@ -345,7 +376,11 @@ def run_pr_auc_binary(tree_method: str, device: Device) -> None:
     np.testing.assert_allclose(prauc, evals_result, rtol=1e-2)
 
     clf = XGBClassifier(
-        tree_method=tree_method, n_estimators=10, eval_metric="aucpr", device=device
+        tree_method=tree_method,
+        n_estimators=10,
+        eval_metric="aucpr",
+        device=device,
+        **legacy_tree_params(),
     )
     clf.fit(X, y, eval_set=[(X, y)])
     evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
diff --git a/python-package/xgboost/testing/multi_target.py b/python-package/xgboost/testing/multi_target.py
index deac89ca4a56..60c165302888 100644
--- a/python-package/xgboost/testing/multi_target.py
+++ b/python-package/xgboost/testing/multi_target.py
@@ -38,6 +38,9 @@ def run_multiclass(device: Device, learning_rate: Optional[float]) -> None:
         n_estimators=10,
         device=device,
         learning_rate=learning_rate,
+        min_child_weight=1,
+        subsample=1,
+        colsample_bytree=1,
     )
     clf.fit(X, y, eval_set=[(X, y)])
     assert clf.objective == "multi:softprob"
@@ -59,6 +62,9 @@ def run_multilabel(device: Device, learning_rate: Optional[float]) -> None:
         n_estimators=10,
         device=device,
         learning_rate=learning_rate,
+        min_child_weight=1,
+        subsample=1,
+        colsample_bytree=1,
     )
     clf.fit(X, y, eval_set=[(X, y)])
     assert clf.objective == "binary:logistic"
@@ -77,6 +83,7 @@ def run_quantile_loss(device: Device, weighted: bool) -> None:
         "device": device,
         "quantile_alpha": [0.45, 0.5, 0.55],
         "multi_strategy": "multi_output_tree",
+        **tm.legacy_tree_params(),
     }
     n_samples = 2048
     X, y = make_regression(n_samples=n_samples, n_features=16, random_state=2026)
@@ -113,6 +120,7 @@ def run_absolute_error(device: Device) -> None:
         "objective": "reg:absoluteerror",
         "device": device,
         "multi_strategy": "multi_output_tree",
+        **tm.legacy_tree_params(),
     }
     n_samples = 1024
     X, y = make_regression(
@@ -216,6 +224,7 @@ def run_test(
                 "multi_strategy": "multi_output_tree",
                 "learning_rate": 1,
                 "base_score": base_score,
+                **tm.legacy_sampling_params(),
             },
             Xy,
             evals=[(Xy, "Train")],
@@ -258,6 +267,7 @@ def run_with_iter(device: Device) -> None:  # pylint: disable=too-many-locals
         "learning_rate": 1.0,
         "base_score": intercept,
         "debug_synchronize": True,
+        **tm.legacy_sampling_params(),
     }
 
     Xs = []
@@ -340,6 +350,7 @@ def run(obj: Optional[Objective]) -> None:
             "learning_rate": 1.0,
             "debug_synchronize": True,
             "base_score": 0.0,
+            **tm.legacy_sampling_params(),
         }
         Xy = QuantileDMatrix(X, y)
         booster_0 = train(params, Xy, num_boost_round=1, obj=obj)
@@ -371,6 +382,7 @@ def run() -> Booster:
             "device": device,
             "multi_strategy": "multi_output_tree",
             "debug_synchronize": True,
+            **tm.legacy_tree_params(),
         }
         return train(params, Xy, num_boost_round=16)
 
@@ -397,6 +409,9 @@ def run_column_sampling(device: Device) -> None:
         "multi_strategy": "multi_output_tree",
         "debug_synchronize": True,
         "colsample_bynode": 0.4,
+        "min_child_weight": 1,
+        "subsample": 1,
+        "colsample_bytree": 1,
     }
     booster = train(params, Xy, num_boost_round=16)
 
@@ -424,6 +439,9 @@ def run_column_sampling(device: Device) -> None:
         importance_type="weight",
         device=device,
         colsample_bynode=0.2,
+        min_child_weight=1,
+        subsample=1,
+        colsample_bytree=1,
     )
     clf.fit(X, y, feature_weights=np.arange(0, X.shape[1]))
     fi = clf.feature_importances_
@@ -446,6 +464,7 @@ def run_grow_policy(device: Device, grow_policy: str) -> None:
         "multi_strategy": "multi_output_tree",
         "debug_synchronize": True,
         "grow_policy": grow_policy,
+        **tm.legacy_tree_params(),
     }
 
     evals_result = train_result(params, Xy, num_rounds=10)
diff --git a/python-package/xgboost/testing/predict.py b/python-package/xgboost/testing/predict.py
index 9f2d42aeed03..a2045c3fb932 100644
--- a/python-package/xgboost/testing/predict.py
+++ b/python-package/xgboost/testing/predict.py
@@ -7,6 +7,7 @@
 
 from ..core import DMatrix
 from ..training import train
+from . import legacy_tree_params
 from .shared import validate_leaf_output
 from .updater import get_basescore
 from .utils import Device
@@ -30,6 +31,7 @@ def run_predict_leaf(device: Device, DMatrixT: Type[DMatrix]) -> np.ndarray:
             "num_parallel_tree": num_parallel_tree,
             "num_class": classes,
             "tree_method": "hist",
+            **legacy_tree_params(),
         },
         m,
         num_boost_round=num_boost_round,
@@ -60,7 +62,9 @@ def run_predict_leaf(device: Device, DMatrixT: Type[DMatrix]) -> np.ndarray:
     assert np.prod(first.shape) == classes * num_parallel_tree * n_iters
 
     # When there's only 1 tree, the output is a 1 dim vector
-    booster = train({"tree_method": "hist"}, num_boost_round=1, dtrain=m)
+    booster = train(
+        {"tree_method": "hist", **legacy_tree_params()}, num_boost_round=1, dtrain=m
+    )
     booster.set_param({"device": device})
     assert booster.predict(m, pred_leaf=True).shape == (rows,)
 
diff --git a/python-package/xgboost/testing/updater.py b/python-package/xgboost/testing/updater.py
index ab44da41be9e..e03037f7f24b 100644
--- a/python-package/xgboost/testing/updater.py
+++ b/python-package/xgboost/testing/updater.py
@@ -188,6 +188,7 @@ def run(params: Dict[str, Any], metric: str) -> None:
         "device": device,
         "quantile_alpha": alpha,
         "multi_strategy": multi_strategy,
+        **tm.legacy_tree_params(),
     }
     run(params, "quantile")
 
@@ -216,11 +217,13 @@ def check_quantile_loss_extmem(
         "objective": "reg:quantileerror",
         "device": device,
         "quantile_alpha": [0.2, 0.8],
+        "seed": 1994,
+        **tm.legacy_tree_params(),
     }
-    booster_it = train(params, Xy_it)
+    booster_it = train(params, Xy_it, num_boost_round=1)
     X, y, w = it.as_arrays()
     Xy = DMatrix(X, y, weight=w)
-    booster = train(params, Xy)
+    booster = train(params, Xy, num_boost_round=1)
 
     predt_it = booster_it.predict(Xy_it)
     predt = booster.predict(Xy)
@@ -466,6 +469,7 @@ def check_categorical_ohe(  # pylint: disable=too-many-arguments
         # Use one-hot exclusively
         "max_cat_to_onehot": USE_ONEHOT,
         "device": device,
+        **tm.legacy_tree_params(),
     }
 
     if multi_target:
diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py
index 4e38fdf7de54..179a4dfd0ab5 100644
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -53,7 +53,7 @@
 def train(
     params: Dict[str, Any],
     dtrain: DMatrix,
-    num_boost_round: int = 10,
+    num_boost_round: int = 300,
     *,
     evals: Optional[Sequence[Tuple[DMatrix, str]]] = None,
     obj: Optional[PlainObj] = None,
@@ -435,7 +435,7 @@ def mknfold(
 def cv(
     params: BoosterParam,
     dtrain: DMatrix,
-    num_boost_round: int = 10,
+    num_boost_round: int = 300,
     *,
     nfold: int = 3,
     stratified: bool = False,
diff --git a/src/tree/param.h b/src/tree/param.h
index 1caa3147fe5f..9dba8bc7dd3c 100644
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -81,21 +81,19 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
   DMLC_DECLARE_PARAMETER(TrainParam) {
     DMLC_DECLARE_FIELD(learning_rate)
         .set_lower_bound(0.0f)
-        .set_default(0.3f)
+        .set_default(0.1f)
         .describe("Learning rate(step size) of update.");
     DMLC_DECLARE_FIELD(min_split_loss)
         .set_lower_bound(0.0f)
         .set_default(0.0f)
-        .describe(
-            "Minimum loss reduction required to make a further partition.");
-    DMLC_DECLARE_FIELD(max_depth)
+        .describe("Minimum loss reduction required to make a further partition.");
+    DMLC_DECLARE_FIELD(max_depth).set_lower_bound(0).set_default(6).describe(
+        "Maximum depth of the tree; 0 indicates no limit; a limit is required "
+        "for depthwise policy");
+    DMLC_DECLARE_FIELD(max_leaves)
         .set_lower_bound(0)
-        .set_default(6)
-        .describe(
-            "Maximum depth of the tree; 0 indicates no limit; a limit is required "
-            "for depthwise policy");
-    DMLC_DECLARE_FIELD(max_leaves).set_lower_bound(0).set_default(0).describe(
-        "Maximum number of leaves; 0 indicates no limit.");
+        .set_default(0)
+        .describe("Maximum number of leaves; 0 indicates no limit.");
     DMLC_DECLARE_FIELD(max_bin).set_lower_bound(2).set_default(256).describe(
         "if using histogram-based algorithm, maximum number of bins per feature");
     DMLC_DECLARE_FIELD(grow_policy)
@@ -118,24 +116,23 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
             "splits.");
     DMLC_DECLARE_FIELD(min_child_weight)
         .set_lower_bound(0.0f)
-        .set_default(1.0f)
+        .set_default(2.0f)
         .describe("Minimum sum of instance weight(hessian) needed in a child.");
     DMLC_DECLARE_FIELD(reg_lambda)
         .set_lower_bound(0.0f)
         .set_default(1.0f)
         .describe("L2 regularization on leaf weight");
-    DMLC_DECLARE_FIELD(reg_alpha)
-        .set_lower_bound(0.0f)
-        .set_default(0.0f)
-        .describe("L1 regularization on leaf weight");
+    DMLC_DECLARE_FIELD(reg_alpha).set_lower_bound(0.0f).set_default(0.0f).describe(
+        "L1 regularization on leaf weight");
     DMLC_DECLARE_FIELD(max_delta_step)
         .set_lower_bound(0.0f)
         .set_default(0.0f)
-        .describe("Maximum delta step we allow each tree's weight estimate to be. "\
-                  "If the value is set to 0, it means there is no constraint");
+        .describe(
+            "Maximum delta step we allow each tree's weight estimate to be. "
+            "If the value is set to 0, it means there is no constraint");
     DMLC_DECLARE_FIELD(subsample)
         .set_range(0.0f, 1.0f)
-        .set_default(1.0f)
+        .set_default(0.8f)
         .describe("Row subsample ratio of training instance.");
     DMLC_DECLARE_FIELD(sampling_method)
         .set_default(kUniform)
@@ -155,7 +152,7 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
         .describe("Subsample ratio of columns, resample on each level.");
     DMLC_DECLARE_FIELD(colsample_bytree)
         .set_range(0.0f, 1.0f)
-        .set_default(1.0f)
+        .set_default(0.8f)
         .describe("Subsample ratio of columns, resample on each tree construction.");
     DMLC_DECLARE_FIELD(refresh_leaf)
         .set_default(true)
@@ -165,11 +162,12 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
         .describe("Constraint of variable monotonicity");
     DMLC_DECLARE_FIELD(interaction_constraints)
         .set_default("")
-        .describe("Constraints for interaction representing permitted interactions."
-                  "The constraints must be specified in the form of a nest list,"
-                  "e.g. [[0, 1], [2, 3, 4]], where each inner list is a group of"
-                  "indices of features that are allowed to interact with each other."
-                  "See tutorial for more information");
+        .describe(
+            "Constraints for interaction representing permitted interactions."
+            "The constraints must be specified in the form of a nest list,"
+            "e.g. [[0, 1], [2, 3, 4]], where each inner list is a group of"
+            "indices of features that are allowed to interact with each other."
+            "See tutorial for more information");
 
     // ------ From cpu quantile histogram -------.
     DMLC_DECLARE_FIELD(sparse_threshold)
@@ -214,10 +212,10 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
 // functions for L1 cost
 template <typename T1, typename T2>
 XGBOOST_DEVICE inline static T1 ThresholdL1(T1 w, T2 alpha) {
-  if (w > + alpha) {
+  if (w > +alpha) {
     return w - alpha;
   }
-  if (w < - alpha) {
+  if (w < -alpha) {
     return w + alpha;
   }
   return 0.0;
@@ -266,8 +264,7 @@ XGBOOST_DEVICE T CalcGain(TrainingParams const &p, T sum_grad, T sum_hess) {
   }
 }
 
-template <typename TrainingParams,
-          typename StatT, typename T = decltype(StatT().GetHess())>
+template <typename TrainingParams, typename StatT, typename T = decltype(StatT().GetHess())>
 XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, StatT stat) {
   return CalcGain(p, stat.GetGrad(), stat.GetHess());
 }
@@ -310,29 +307,27 @@ inline double CalcGainGivenWeight(TrainParam const &p,
 struct XGBOOST_ALIGNAS(16) GradStats {
   using GradType = double;
   /*! \brief sum gradient statistics */
-  GradType sum_grad { 0 };
+  GradType sum_grad{0};
   /*! \brief sum hessian statistics */
-  GradType sum_hess { 0 };
+  GradType sum_hess{0};
 
  public:
   [[nodiscard]] XGBOOST_DEVICE GradType GetGrad() const { return sum_grad; }
   [[nodiscard]] XGBOOST_DEVICE GradType GetHess() const { return sum_hess; }
 
-  friend std::ostream& operator<<(std::ostream& os, GradStats s) {
+  friend std::ostream &operator<<(std::ostream &os, GradStats s) {
     os << s.GetGrad() << "/" << s.GetHess();
     return os;
   }
 
   XGBOOST_DEVICE GradStats() {
-    static_assert(sizeof(GradStats) == 16,
-                  "Size of GradStats is not 16 bytes.");
+    static_assert(sizeof(GradStats) == 16, "Size of GradStats is not 16 bytes.");
   }
 
   template <typename GpairT>
   XGBOOST_DEVICE explicit GradStats(const GpairT &sum)
       : sum_grad(sum.GetGrad()), sum_hess(sum.GetHess()) {}
-  explicit GradStats(const GradType grad, const GradType hess)
-      : sum_grad(grad), sum_hess(hess) {}
+  explicit GradStats(const GradType grad, const GradType hess) : sum_grad(grad), sum_hess(hess) {}
   /*!
    * \brief accumulate statistics
    * \param p the gradient pair
@@ -340,16 +335,16 @@ struct XGBOOST_ALIGNAS(16) GradStats {
   inline void Add(GradientPair p) { this->Add(p.GetGrad(), p.GetHess()); }
 
   /*! \brief add statistics to the data */
-  inline void Add(const GradStats& b) {
+  inline void Add(const GradStats &b) {
     sum_grad += b.sum_grad;
     sum_hess += b.sum_hess;
   }
   /*! \brief same as add, reduce is used in All Reduce */
-  inline static void Reduce(GradStats& a, const GradStats& b) { // NOLINT(*)
+  inline static void Reduce(GradStats &a, const GradStats &b) {  // NOLINT(*)
     a.Add(b);
   }
   /*! \brief set current value to a - b */
-  inline void SetSubstract(const GradStats& a, const GradStats& b) {
+  inline void SetSubstract(const GradStats &a, const GradStats &b) {
     sum_grad = a.sum_grad - b.sum_grad;
     sum_hess = a.sum_hess - b.sum_hess;
   }
@@ -379,10 +374,10 @@ inline GradStats &CopyStats(GradStats const &src, GradStats *dst) {  // NOLINT
  * \brief statistics that is helpful to store
  *   and represent a split solution for the tree
  */
-template<typename GradientT>
+template <typename GradientT>
 struct SplitEntryContainer {
   /*! \brief loss change after split this node */
-  bst_float loss_chg {0.0f};
+  bst_float loss_chg{0.0f};
   /*! \brief split index */
   bst_feature_t sindex{0};
   bst_float split_value{0.0f};
@@ -551,8 +546,8 @@ struct SplitEntryContainer {
   }
 
   /*! \brief same as update, used by AllReduce*/
-  inline static void Reduce(SplitEntryContainer &dst,         // NOLINT(*)
-                            const SplitEntryContainer &src) { // NOLINT(*)
+  inline static void Reduce(SplitEntryContainer &dst,          // NOLINT(*)
+                            const SplitEntryContainer &src) {  // NOLINT(*)
     dst.Update(src);
   }
 };
@@ -570,9 +565,8 @@ using SplitEntry = SplitEntryContainer<GradStats>;
  *
  * \param p_out Pointer to output
  */
-void ParseInteractionConstraint(
-    std::string const &constraint_str,
-    std::vector<std::vector<xgboost::bst_feature_t>> *p_out);
+void ParseInteractionConstraint(std::string const &constraint_str,
+                                std::vector<std::vector<xgboost::bst_feature_t>> *p_out);
 }  // namespace xgboost
 
 // define string serializer for vector, to get the arguments
diff --git a/tests/cpp/plugin/test_sycl_hist_updater.cc b/tests/cpp/plugin/test_sycl_hist_updater.cc
index 69c5047d045c..9f6ddf3ad132 100644
--- a/tests/cpp/plugin/test_sycl_hist_updater.cc
+++ b/tests/cpp/plugin/test_sycl_hist_updater.cc
@@ -5,11 +5,9 @@
 
 #include <oneapi/dpl/random>
 
-#include "../../../plugin/sycl/tree/hist_updater.h"
 #include "../../../plugin/sycl/device_manager.h"
-
+#include "../../../plugin/sycl/tree/hist_updater.h"
 #include "../../../src/tree/common_row_partitioner.h"
-
 #include "../helpers.h"
 
 namespace xgboost::sycl::tree {
@@ -18,12 +16,9 @@ namespace xgboost::sycl::tree {
 template <typename GradientSumT>
 class TestHistUpdater : public HistUpdater<GradientSumT> {
  public:
-  TestHistUpdater(const Context* ctx,
-                  ::sycl::queue* qu,
-                  const xgboost::tree::TrainParam& param,
-                  FeatureInteractionConstraintHost int_constraints_,
-                  DMatrix const* fmat) : HistUpdater<GradientSumT>(ctx, qu, param,
-                                                                   int_constraints_, fmat) {}
+  TestHistUpdater(const Context* ctx, ::sycl::queue* qu, const xgboost::tree::TrainParam& param,
+                  FeatureInteractionConstraintHost int_constraints_, DMatrix const* fmat)
+      : HistUpdater<GradientSumT>(ctx, qu, param, int_constraints_, fmat) {}
 
   void TestInitSampling(const HostDeviceVector<GradientPair>& gpair,
                         USMVector<size_t, MemoryType::on_device>* row_indices) {
@@ -31,61 +26,51 @@ class TestHistUpdater : public HistUpdater<GradientSumT> {
   }
 
   auto* TestInitData(const common::GHistIndexMatrix& gmat,
-                     const HostDeviceVector<GradientPair>& gpair,
-                     const DMatrix& fmat,
+                     const HostDeviceVector<GradientPair>& gpair, const DMatrix& fmat,
                      const RegTree& tree) {
     HistUpdater<GradientSumT>::InitData(gmat, gpair, fmat, tree);
     return &(HistUpdater<GradientSumT>::row_set_collection_);
   }
 
-  const auto* TestBuildHistogramsLossGuide(ExpandEntry entry,
-                                    const common::GHistIndexMatrix &gmat,
-                                    RegTree *p_tree,
-                                    const HostDeviceVector<GradientPair>& gpair) {
+  const auto* TestBuildHistogramsLossGuide(ExpandEntry entry, const common::GHistIndexMatrix& gmat,
+                                           RegTree* p_tree,
+                                           const HostDeviceVector<GradientPair>& gpair) {
     HistUpdater<GradientSumT>::BuildHistogramsLossGuide(entry, gmat, p_tree, gpair);
     return &(HistUpdater<GradientSumT>::hist_);
   }
 
-  auto TestInitNewNode(int nid,
-                       const common::GHistIndexMatrix& gmat,
-                       const HostDeviceVector<GradientPair>& gpair,
-                       const RegTree& tree) {
+  auto TestInitNewNode(int nid, const common::GHistIndexMatrix& gmat,
+                       const HostDeviceVector<GradientPair>& gpair, const RegTree& tree) {
     HistUpdater<GradientSumT>::InitNewNode(nid, gmat, gpair, tree);
     return HistUpdater<GradientSumT>::snode_host_[nid];
   }
 
   auto TestEvaluateSplits(const std::vector<ExpandEntry>& nodes_set,
-                          const common::GHistIndexMatrix& gmat,
-                          const RegTree& tree) {
+                          const common::GHistIndexMatrix& gmat, const RegTree& tree) {
     HistUpdater<GradientSumT>::EvaluateSplits(nodes_set, gmat, tree);
     return HistUpdater<GradientSumT>::snode_host_;
   }
 
-  void TestApplySplit(const std::vector<ExpandEntry> nodes,
-                      const common::GHistIndexMatrix& gmat,
+  void TestApplySplit(const std::vector<ExpandEntry> nodes, const common::GHistIndexMatrix& gmat,
                       RegTree* p_tree) {
     HistUpdater<GradientSumT>::ApplySplit(nodes, gmat, p_tree);
   }
 
-  auto TestExpandWithLossGuide(const common::GHistIndexMatrix& gmat,
-                               DMatrix *p_fmat,
-                               RegTree* p_tree,
-                               const HostDeviceVector<GradientPair>& gpair) {
+  auto TestExpandWithLossGuide(const common::GHistIndexMatrix& gmat, DMatrix* p_fmat,
+                               RegTree* p_tree, const HostDeviceVector<GradientPair>& gpair) {
     HistUpdater<GradientSumT>::ExpandWithLossGuide(gmat, p_tree, gpair);
   }
 
-  auto TestExpandWithDepthWise(const common::GHistIndexMatrix& gmat,
-                               DMatrix *p_fmat,
-                               RegTree* p_tree,
-                               const HostDeviceVector<GradientPair>& gpair) {
+  auto TestExpandWithDepthWise(const common::GHistIndexMatrix& gmat, DMatrix* p_fmat,
+                               RegTree* p_tree, const HostDeviceVector<GradientPair>& gpair) {
     HistUpdater<GradientSumT>::ExpandWithDepthWise(gmat, p_tree, gpair);
   }
 };
 
-void GenerateRandomGPairs(::sycl::queue* qu, GradientPair* gpair_ptr, size_t num_rows, bool has_neg_hess) {
+void GenerateRandomGPairs(::sycl::queue* qu, GradientPair* gpair_ptr, size_t num_rows,
+                          bool has_neg_hess) {
   qu->submit([&](::sycl::handler& cgh) {
-    cgh.parallel_for<>(::sycl::range<1>(::sycl::range<1>(num_rows)),
-                                        [=](::sycl::item<1> pid) {
+    cgh.parallel_for<>(::sycl::range<1>(::sycl::range<1>(num_rows)), [=](::sycl::item<1> pid) {
       uint64_t i = pid.get_linear_id();
 
       constexpr uint32_t seed = 777;
@@ -121,7 +106,7 @@ void TestHistUpdaterSampling(const xgboost::tree::TrainParam& param) {
   GenerateRandomGPairs(qu, gpair.DevicePointer(), num_rows, true);
 
   updater.TestInitSampling(gpair, &row_indices_0);
-  
+
   size_t n_samples = row_indices_0.Size();
   // Half of gpairs have neg hess
   ASSERT_LT(n_samples, num_rows * 0.5 * param.subsample * 1.2);
@@ -179,7 +164,8 @@ void TestHistUpdaterInitData(const xgboost::tree::TrainParam& param, bool has_ne
   auto& row_indices = row_set_collection->Data();
 
   std::vector<size_t> row_indices_host(row_indices.Size());
-  qu->memcpy(row_indices_host.data(), row_indices.DataConst(), row_indices.Size()*sizeof(size_t)).wait();
+  qu->memcpy(row_indices_host.data(), row_indices.DataConst(), row_indices.Size() * sizeof(size_t))
+      .wait();
 
   if (!has_neg_hess) {
     for (size_t i = 0; i < num_rows; ++i) {
@@ -200,7 +186,8 @@ void TestHistUpdaterInitData(const xgboost::tree::TrainParam& param, bool has_ne
 }
 
 template <typename GradientSumT>
-void TestHistUpdaterBuildHistogramsLossGuide(const xgboost::tree::TrainParam& param, float sparsity) {
+void TestHistUpdaterBuildHistogramsLossGuide(const xgboost::tree::TrainParam& param,
+                                             float sparsity) {
   const size_t num_rows = 1u << 8;
   const size_t num_columns = 1;
   const size_t n_bins = 32;
@@ -247,14 +234,19 @@ void TestHistUpdaterBuildHistogramsLossGuide(const xgboost::tree::TrainParam& pa
   std::vector<xgboost::detail::GradientPairInternal<GradientSumT>> hist0_host(n_bins);
   std::vector<xgboost::detail::GradientPairInternal<GradientSumT>> hist1_host(n_bins);
   std::vector<xgboost::detail::GradientPairInternal<GradientSumT>> hist2_host(n_bins);
-  qu->memcpy(hist0_host.data(), (*hist)[0].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
-  qu->memcpy(hist1_host.data(), (*hist)[1].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
-  qu->memcpy(hist2_host.data(), (*hist)[2].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
+  qu->memcpy(hist0_host.data(), (*hist)[0].DataConst(),
+             sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
+  qu->memcpy(hist1_host.data(), (*hist)[1].DataConst(),
+             sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
+  qu->memcpy(hist2_host.data(), (*hist)[2].DataConst(),
+             sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);
   qu->wait();
 
   for (size_t idx_bin = 0; idx_bin < n_bins; ++idx_bin) {
-    EXPECT_NEAR(hist0_host[idx_bin].GetGrad(), hist1_host[idx_bin].GetGrad() + hist2_host[idx_bin].GetGrad(), 1e-6);
-    EXPECT_NEAR(hist0_host[idx_bin].GetHess(), hist1_host[idx_bin].GetHess() + hist2_host[idx_bin].GetHess(), 1e-6);
+    EXPECT_NEAR(hist0_host[idx_bin].GetGrad(),
+                hist1_host[idx_bin].GetGrad() + hist2_host[idx_bin].GetGrad(), 1e-6);
+    EXPECT_NEAR(hist0_host[idx_bin].GetHess(),
+                hist1_host[idx_bin].GetHess() + hist2_host[idx_bin].GetHess(), 1e-6);
   }
 }
 
@@ -299,15 +291,15 @@ void TestHistUpdaterInitNewNode(const xgboost::tree::TrainParam& param, float sp
   {
     ::sycl::buffer<GradStats<GradientSumT>> buff(&grad_stat, 1);
     qu->submit([&](::sycl::handler& cgh) {
-      auto buff_acc  = buff.template get_access<::sycl::access::mode::read_write>(cgh);
-      cgh.single_task<>([=]() {
-        for (size_t i = 0; i < num_rows; ++i) {
-          size_t row_idx = row_idxs_ptr[i];
-          buff_acc[0] += GradStats<GradientSumT>(gpair_ptr[row_idx].GetGrad(),
-                                                 gpair_ptr[row_idx].GetHess());
-        }
-      });
-    }).wait_and_throw();
+        auto buff_acc = buff.template get_access<::sycl::access::mode::read_write>(cgh);
+        cgh.single_task<>([=]() {
+          for (size_t i = 0; i < num_rows; ++i) {
+            size_t row_idx = row_idxs_ptr[i];
+            buff_acc[0] +=
+                GradStats<GradientSumT>(gpair_ptr[row_idx].GetGrad(), gpair_ptr[row_idx].GetHess());
+          }
+        });
+      }).wait_and_throw();
   }
 
   EXPECT_NEAR(snode.stats.GetGrad(), grad_stat.GetGrad(), 1e-6 * grad_stat.GetGrad());
@@ -367,29 +359,31 @@ void TestHistUpdaterEvaluateSplits(const xgboost::tree::TrainParam& param) {
   {
     ::sycl::buffer<bst_float> best_loss_chg_buff(best_loss_chg_des.data(), 1);
     qu->submit([&](::sycl::handler& cgh) {
-      auto best_loss_chg_acc = best_loss_chg_buff.template get_access<::sycl::access::mode::read_write>(cgh);
-      cgh.single_task<>([=]() {
-        for (size_t i = 1; i < size; ++i) {
-          GradStats<GradientSumT> left(0, 0);
-          GradStats<GradientSumT> right = stats - left;
-          for (size_t j = cut_ptr[i-1]; j < cut_ptr[i]; ++j) {
-            auto loss_change = evaluator.CalcSplitGain(0, i - 1, left, right) - root_gain;
-            if (loss_change > best_loss_chg_acc[0]) {
-              best_loss_chg_acc[0] = loss_change;
+        auto best_loss_chg_acc =
+            best_loss_chg_buff.template get_access<::sycl::access::mode::read_write>(cgh);
+        cgh.single_task<>([=]() {
+          for (size_t i = 1; i < size; ++i) {
+            GradStats<GradientSumT> left(0, 0);
+            GradStats<GradientSumT> right = stats - left;
+            for (size_t j = cut_ptr[i - 1]; j < cut_ptr[i]; ++j) {
+              auto loss_change = evaluator.CalcSplitGain(0, i - 1, left, right) - root_gain;
+              if (loss_change > best_loss_chg_acc[0]) {
+                best_loss_chg_acc[0] = loss_change;
+              }
+              left.Add(hist_ptr[j].GetGrad(), hist_ptr[j].GetHess());
+              right = stats - left;
             }
-            left.Add(hist_ptr[j].GetGrad(), hist_ptr[j].GetHess());
-            right = stats - left;
           }
-        }
-      });
-    }).wait();
+        });
+      }).wait();
   }
 
   ASSERT_NEAR(best_loss_chg_des[0], best_loss_chg, 1e-4);
 }
 
 template <typename GradientSumT>
-void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float sparsity, int max_bins) {
+void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float sparsity,
+                               int max_bins) {
   const size_t num_rows = 1024;
   const size_t num_columns = 2;
 
@@ -420,14 +414,17 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
 
   // Copy indexes to host
   std::vector<size_t> row_indices_host(num_rows);
-  qu->memcpy(row_indices_host.data(), row_set_collection->Data().Data(), sizeof(size_t)*num_rows).wait();
+  qu->memcpy(row_indices_host.data(), row_set_collection->Data().Data(), sizeof(size_t) * num_rows)
+      .wait();
 
   // Reference Implementation
   std::vector<size_t> row_indices_desired_host(num_rows);
   size_t n_left, n_right;
   {
-    TestHistUpdater<GradientSumT> updater4verification(&ctx, qu, param, int_constraints, p_fmat.get());
-    auto* row_set_collection4verification = updater4verification.TestInitData(gmat, gpair, *p_fmat, tree);
+    TestHistUpdater<GradientSumT> updater4verification(&ctx, qu, param, int_constraints,
+                                                       p_fmat.get());
+    auto* row_set_collection4verification =
+        updater4verification.TestInitData(gmat, gpair, *p_fmat, tree);
 
     size_t n_nodes = nodes.size();
     std::vector<int32_t> split_conditions(n_nodes);
@@ -441,8 +438,8 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
     });
 
     ::sycl::event event;
-    partition_builder.Partition(gmat, nodes, (*row_set_collection4verification),
-                                split_conditions, &tree, &event);
+    partition_builder.Partition(gmat, nodes, (*row_set_collection4verification), split_conditions,
+                                &tree, &event);
     qu->wait_and_throw();
 
     for (size_t node_in_set = 0; node_in_set < n_nodes; node_in_set++) {
@@ -456,10 +453,12 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
     n_left = partition_builder.GetNLeftElems(0);
     n_right = partition_builder.GetNRightElems(0);
 
-    row_set_collection4verification->AddSplit(nid, tree[nid].LeftChild(),
-        tree[nid].RightChild(), n_left, n_right);
+    row_set_collection4verification->AddSplit(nid, tree[nid].LeftChild(), tree[nid].RightChild(),
+                                              n_left, n_right);
 
-    qu->memcpy(row_indices_desired_host.data(), row_set_collection4verification->Data().Data(), sizeof(size_t)*num_rows).wait();
+    qu->memcpy(row_indices_desired_host.data(), row_set_collection4verification->Data().Data(),
+               sizeof(size_t) * num_rows)
+        .wait();
   }
 
   std::sort(row_indices_desired_host.begin(), row_indices_desired_host.begin() + n_left);
@@ -503,11 +502,12 @@ void TestHistUpdaterExpandWithLossGuide(const xgboost::tree::TrainParam& param)
   const auto& nodes = tree.GetNodes(DeviceOrd::CPU());
   std::vector<float> ans(data.size());
   for (size_t data_idx = 0; data_idx < data.size(); ++data_idx) {
-      size_t node_idx = 0;
-      while (!nodes[node_idx].IsLeaf()) {
-        node_idx = data[data_idx] < nodes[node_idx].SplitCond() ? nodes[node_idx].LeftChild() : nodes[node_idx].RightChild();
-      }
-      ans[data_idx] = nodes[node_idx].LeafValue();
+    size_t node_idx = 0;
+    while (!nodes[node_idx].IsLeaf()) {
+      node_idx = data[data_idx] < nodes[node_idx].SplitCond() ? nodes[node_idx].LeftChild()
+                                                              : nodes[node_idx].RightChild();
+    }
+    ans[data_idx] = nodes[node_idx].LeafValue();
   }
 
   ASSERT_NEAR(ans[0], -0.15, 1e-6);
@@ -515,7 +515,6 @@ void TestHistUpdaterExpandWithLossGuide(const xgboost::tree::TrainParam& param)
   ASSERT_NEAR(ans[2], -0.15, 1e-6);
 }
 
-
 template <typename GradientSumT>
 void TestHistUpdaterExpandWithDepthWise(const xgboost::tree::TrainParam& param) {
   const size_t num_rows = 3;
@@ -547,11 +546,12 @@ void TestHistUpdaterExpandWithDepthWise(const xgboost::tree::TrainParam& param)
   const auto& nodes = tree.GetNodes(DeviceOrd::CPU());
   std::vector<float> ans(data.size());
   for (size_t data_idx = 0; data_idx < data.size(); ++data_idx) {
-      size_t node_idx = 0;
-      while (!nodes[node_idx].IsLeaf()) {
-        node_idx = data[data_idx] < nodes[node_idx].SplitCond() ? nodes[node_idx].LeftChild() : nodes[node_idx].RightChild();
-      }
-      ans[data_idx] = nodes[node_idx].LeafValue();
+    size_t node_idx = 0;
+    while (!nodes[node_idx].IsLeaf()) {
+      node_idx = data[data_idx] < nodes[node_idx].SplitCond() ? nodes[node_idx].LeftChild()
+                                                              : nodes[node_idx].RightChild();
+    }
+    ans[data_idx] = nodes[node_idx].LeafValue();
   }
 
   ASSERT_NEAR(ans[0], -0.15, 1e-6);
@@ -580,7 +580,7 @@ TEST(SyclHistUpdater, InitData) {
 
 TEST(SyclHistUpdater, BuildHistogramsLossGuide) {
   xgboost::tree::TrainParam param;
-  param.UpdateAllowUnknown(Args{{"max_depth", "3"}});
+  param.UpdateAllowUnknown(Args{{"max_depth", "3"}, {"subsample", "1"}});
 
   TestHistUpdaterBuildHistogramsLossGuide<float>(param, 0.0);
   TestHistUpdaterBuildHistogramsLossGuide<float>(param, 0.5);
@@ -590,7 +590,7 @@ TEST(SyclHistUpdater, BuildHistogramsLossGuide) {
 
 TEST(SyclHistUpdater, InitNewNode) {
   xgboost::tree::TrainParam param;
-  param.UpdateAllowUnknown(Args{{"max_depth", "3"}});
+  param.UpdateAllowUnknown(Args{{"max_depth", "3"}, {"subsample", "1"}});
 
   TestHistUpdaterInitNewNode<float>(param, 0.0);
   TestHistUpdaterInitNewNode<float>(param, 0.5);
@@ -600,7 +600,7 @@ TEST(SyclHistUpdater, InitNewNode) {
 
 TEST(SyclHistUpdater, EvaluateSplits) {
   xgboost::tree::TrainParam param;
-  param.UpdateAllowUnknown(Args{{"max_depth", "3"}});
+  param.UpdateAllowUnknown(Args{{"max_depth", "3"}, {"min_child_weight", "1"}, {"subsample", "1"}});
 
   TestHistUpdaterEvaluateSplits<float>(param);
   TestHistUpdaterEvaluateSplits<double>(param);
@@ -608,7 +608,7 @@ TEST(SyclHistUpdater, EvaluateSplits) {
 
 TEST(SyclHistUpdater, ApplySplitSparce) {
   xgboost::tree::TrainParam param;
-  param.UpdateAllowUnknown(Args{{"max_depth", "3"}});
+  param.UpdateAllowUnknown(Args{{"max_depth", "3"}, {"subsample", "1"}});
 
   TestHistUpdaterApplySplit<float>(param, 0.3, 256);
   TestHistUpdaterApplySplit<double>(param, 0.3, 256);
@@ -616,20 +616,22 @@ TEST(SyclHistUpdater, ApplySplitSparce) {
 
 TEST(SyclHistUpdater, ApplySplitDence) {
   xgboost::tree::TrainParam param;
-  param.UpdateAllowUnknown(Args{{"max_depth", "3"}});
+  param.UpdateAllowUnknown(Args{{"max_depth", "3"}, {"subsample", "1"}});
 
   TestHistUpdaterApplySplit<float>(param, 0.0, 256);
-  TestHistUpdaterApplySplit<float>(param, 0.0, 256+1);
+  TestHistUpdaterApplySplit<float>(param, 0.0, 256 + 1);
   TestHistUpdaterApplySplit<float>(param, 0.0, (1u << 16) + 1);
   TestHistUpdaterApplySplit<double>(param, 0.0, 256);
-  TestHistUpdaterApplySplit<double>(param, 0.0, 256+1);
+  TestHistUpdaterApplySplit<double>(param, 0.0, 256 + 1);
   TestHistUpdaterApplySplit<double>(param, 0.0, (1u << 16) + 1);
 }
 
 TEST(SyclHistUpdater, ExpandWithLossGuide) {
   xgboost::tree::TrainParam param;
   param.UpdateAllowUnknown(Args{{"max_depth", "2"},
-                                {"grow_policy", "lossguide"}});
+                                {"grow_policy", "lossguide"},
+                                {"min_child_weight", "1"},
+                                {"subsample", "1"}});
 
   TestHistUpdaterExpandWithLossGuide<float>(param);
   TestHistUpdaterExpandWithLossGuide<double>(param);
@@ -637,7 +639,7 @@ TEST(SyclHistUpdater, ExpandWithLossGuide) {
 
 TEST(SyclHistUpdater, ExpandWithDepthWise) {
   xgboost::tree::TrainParam param;
-  param.UpdateAllowUnknown(Args{{"max_depth", "2"}});
+  param.UpdateAllowUnknown(Args{{"max_depth", "2"}, {"min_child_weight", "1"}, {"subsample", "1"}});
 
   TestHistUpdaterExpandWithDepthWise<float>(param);
   TestHistUpdaterExpandWithDepthWise<double>(param);
diff --git a/tests/cpp/tree/hist/test_evaluate_splits.cc b/tests/cpp/tree/hist/test_evaluate_splits.cc
index 4296ba49a3f4..25597920e465 100644
--- a/tests/cpp/tree/hist/test_evaluate_splits.cc
+++ b/tests/cpp/tree/hist/test_evaluate_splits.cc
@@ -169,7 +169,11 @@ TEST(HistMultiEvaluator, Evaluate) {
   ctx.nthread = 1;
 
   TrainParam param;
-  param.Init(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
+  param.Init(Args{
+      {"min_child_weight", "0"},
+      {"reg_lambda", "0"},
+      {"colsample_bytree", "1"},
+  });
   auto sampler = std::make_shared<common::ColumnSampler>();
 
   std::size_t n_samples = 3;
diff --git a/tests/cpp/tree/test_approx.cc b/tests/cpp/tree/test_approx.cc
index 29a317f4e358..a866aa6ba3ea 100644
--- a/tests/cpp/tree/test_approx.cc
+++ b/tests/cpp/tree/test_approx.cc
@@ -100,8 +100,11 @@ TEST(Approx, InteractionConstraint) {
 
     std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
     TrainParam param;
-    param.UpdateAllowUnknown(
-        Args{{"interaction_constraints", "[[0, 1]]"}, {"num_feature", std::to_string(kCols)}});
+    param.UpdateAllowUnknown(Args{{"interaction_constraints", "[[0, 1]]"},
+                                  {"num_feature", std::to_string(kCols)},
+                                  {"min_child_weight", "1"},
+                                  {"subsample", "1"},
+                                  {"colsample_bytree", "1"}});
     std::vector<HostDeviceVector<bst_node_t>> position(1);
     updater->Configure(Args{});
     updater->Update(&param, &gpair, p_dmat.get(), position, {&tree});
@@ -119,7 +122,7 @@ TEST(Approx, InteractionConstraint) {
     std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
     std::vector<HostDeviceVector<bst_node_t>> position(1);
     TrainParam param;
-    param.Init(Args{});
+    param.Init(Args{{"min_child_weight", "1"}, {"subsample", "1"}, {"colsample_bytree", "1"}});
     updater->Configure(Args{});
     updater->Update(&param, &gpair, p_dmat.get(), position, {&tree});
 
diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu
index d6343dd2a2c9..826d726de94c 100644
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@@ -27,9 +27,11 @@ void UpdateTree(Context const* ctx, GradientContainer* gpair, DMatrix* dmat, Reg
   Args args{
       {"max_depth", "2"},
       {"max_bin", std::to_string(max_bin)},
+      {"learning_rate", "0.3"},
       {"min_child_weight", "0.0"},
       {"reg_alpha", "0"},
       {"reg_lambda", "0"},
+      {"colsample_bytree", "1"},
       {"subsample", std::to_string(subsample)},
       {"sampling_method", sampling_method},
   };
@@ -139,7 +141,7 @@ TEST(GpuHist, ExternalMemory) {
   auto preds_h = preds.ConstHostVector();
   auto preds_ext_h = preds_ext.ConstHostVector();
   for (size_t i = 0; i < kRows; i++) {
-    EXPECT_NEAR(preds_h[i], preds_ext_h[i], 1e-6);
+    EXPECT_NEAR(preds_h[i], preds_ext_h[i], 5e-3);
   }
 }
 
diff --git a/tests/cpp/tree/test_prediction_cache.h b/tests/cpp/tree/test_prediction_cache.h
index 39073360de32..2d7d163e4c01 100644
--- a/tests/cpp/tree/test_prediction_cache.h
+++ b/tests/cpp/tree/test_prediction_cache.h
@@ -4,7 +4,6 @@
 #pragma once
 
 #include <gtest/gtest.h>
-
 #include <xgboost/host_device_vector.h>
 #include <xgboost/tree_updater.h>
 
@@ -12,7 +11,7 @@
 
 #include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"
-#include "xgboost/task.h"             // for ObjInfo
+#include "xgboost/task.h"  // for ObjInfo
 
 namespace xgboost {
 class TestPredictionCache : public ::testing::Test {
@@ -73,7 +72,7 @@ class TestPredictionCache : public ::testing::Test {
       std::vector<RegTree*> trees{&tree};
       auto gpair = GenerateRandomGradients(ctx, n_samples_, 1);
       tree::TrainParam param;
-      param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
+      param.UpdateAllowUnknown(Args{{"max_bin", "64"}, {"subsample", "1"}});
 
       updater->Configure(Args{});
       std::vector<HostDeviceVector<bst_node_t>> position(1);
diff --git a/tests/cpp/tree/test_refresh.cc b/tests/cpp/tree/test_refresh.cc
index 01052861ab03..2b11428f3069 100644
--- a/tests/cpp/tree/test_refresh.cc
+++ b/tests/cpp/tree/test_refresh.cc
@@ -33,11 +33,12 @@ TEST(Updater, Refresh) {
                                              ctx.Device()};
 
   std::shared_ptr<DMatrix> p_dmat{
-    RandomDataGenerator{kRows, kCols, 0.4f}.Seed(3).GenerateDMatrix()};
-  std::vector<std::pair<std::string, std::string>> cfg{
-      {"reg_alpha", "0.0"},
-      {"num_feature", std::to_string(kCols)},
-      {"reg_lambda", "1"}};
+      RandomDataGenerator{kRows, kCols, 0.4f}.Seed(3).GenerateDMatrix()};
+  std::vector<std::pair<std::string, std::string>> cfg{{"reg_alpha", "0.0"},
+                                                       {"num_feature", std::to_string(kCols)},
+                                                       {"reg_lambda", "1"},
+                                                       {"min_child_weight", "1"},
+                                                       {"learning_rate", "0.3"}};
 
   RegTree tree = RegTree{1u, kCols};
   std::vector<RegTree*> trees{&tree};
diff --git a/tests/cpp/tree/test_tree_stat.cc b/tests/cpp/tree/test_tree_stat.cc
index 0acd987b4871..ca7bc534cfe1 100644
--- a/tests/cpp/tree/test_tree_stat.cc
+++ b/tests/cpp/tree/test_tree_stat.cc
@@ -110,7 +110,10 @@ class TestSplitWithEta : public ::testing::Test {
 
       auto grad = GenerateRandomGradients(ctx, Xy->Info().num_row_, n_targets);
       CHECK_EQ(grad.gpair.Shape(1), n_targets);
-      auto args = Args{{"learning_rate", std::to_string(eta)}};
+      auto args = Args{{"learning_rate", std::to_string(eta)},
+                       {"min_child_weight", "1"},
+                       {"subsample", "1"},
+                       {"colsample_bytree", "1"}};
 
       BuildTree(ctx, Xy.get(), &grad, name, args, tree.get());
 
@@ -309,7 +312,9 @@ class TestRegularization : public ::testing::Test {
 
     RegTree tree_0{static_cast<bst_target_t>(gpairs.gpair.Shape(1)),
                    static_cast<bst_target_t>(p_fmat->Info().num_col_)};
-    BuildTree(ctx, p_fmat.get(), &gpairs, updater, Args{{p, "0.0"}}, &tree_0);
+    auto args =
+        Args{{p, "0.0"}, {"min_child_weight", "1"}, {"subsample", "1"}, {"colsample_bytree", "1"}};
+    BuildTree(ctx, p_fmat.get(), &gpairs, updater, args, &tree_0);
     // not exact, just checking the tree can be built
     if (n_targets > 1) {
       ASSERT_GE(tree_0.NumNodes(), 40);
@@ -319,7 +324,8 @@ class TestRegularization : public ::testing::Test {
 
     RegTree tree_1{static_cast<bst_target_t>(gpairs.gpair.Shape(1)),
                    static_cast<bst_target_t>(p_fmat->Info().num_col_)};
-    BuildTree(ctx, p_fmat.get(), &gpairs, updater, Args{{p, "1024.0"}}, &tree_1);
+    args[0].second = "1024.0";
+    BuildTree(ctx, p_fmat.get(), &gpairs, updater, args, &tree_1);
     ASSERT_EQ(tree_1.NumNodes(), 1);
   }
 };
@@ -415,7 +421,8 @@ class TestMaxDeltaStep : public ::testing::Test {
 
     RegTree tree_0{static_cast<bst_target_t>(gpairs.gpair.Shape(1)),
                    static_cast<bst_target_t>(p_fmat->Info().num_col_)};
-    BuildTree(ctx, p_fmat.get(), &gpairs, updater, Args{{"max_delta_step", std::to_string(0.5)}}, &tree_0);
+    BuildTree(ctx, p_fmat.get(), &gpairs, updater, Args{{"max_delta_step", std::to_string(0.5)}},
+              &tree_0);
     ASSERT_EQ(tree_0.NumNodes(), 1);
   }
 };
diff --git a/tests/python-sycl/test_sycl_training_continuation.py b/tests/python-sycl/test_sycl_training_continuation.py
index 71d5965600e7..80b7bfd121aa 100644
--- a/tests/python-sycl/test_sycl_training_continuation.py
+++ b/tests/python-sycl/test_sycl_training_continuation.py
@@ -1,6 +1,7 @@
+import json
+
 import numpy as np
 import xgboost as xgb
-import json
 
 rng = np.random.RandomState(1994)
 
@@ -17,6 +18,9 @@ def run_training_continuation(self, use_json):
             "max_depth": "2",
             "gamma": "0.1",
             "alpha": "0.01",
+            "seed": 1994,
+            "subsample": 1,
+            "colsample_bytree": 1,
             "enable_experimental_json_serialization": use_json,
         }
         bst_0 = xgb.train(params, dtrain, num_boost_round=64)
diff --git a/tests/python/test_basic_models.py b/tests/python/test_basic_models.py
index b736ca63ccb3..dadd0e16f599 100644
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@@ -104,15 +104,16 @@ def my_logloss(preds, dtrain):
     def test_boost_from_prediction(self):
         # Re-construct dtrain here to avoid modification
         margined, _ = tm.load_agaricus(__file__)
-        bst = xgb.train({"tree_method": "hist"}, margined, 1)
+        params = {"tree_method": "hist", **tm.legacy_tree_params()}
+        bst = xgb.train(params, margined, 1)
         predt_0 = bst.predict(margined, output_margin=True)
         margined.set_base_margin(predt_0)
-        bst = xgb.train({"tree_method": "hist"}, margined, 1)
+        bst = xgb.train(params, margined, 1)
         predt_1 = bst.predict(margined)
 
         assert np.any(np.abs(predt_1 - predt_0) > 1e-6)
         dtrain, _ = tm.load_agaricus(__file__)
-        bst = xgb.train({"tree_method": "hist"}, dtrain, 2)
+        bst = xgb.train(params, dtrain, 2)
         predt_2 = bst.predict(dtrain)
         assert np.all(np.abs(predt_2 - predt_1) < 1e-6)
 
@@ -241,7 +242,11 @@ def test_feature_names_validation(self):
         bst.predict(dm2)  # success
 
     def test_special_model_dump_characters(self) -> None:
-        params = {"objective": "reg:squarederror", "max_depth": 3}
+        params = {
+            "objective": "reg:squarederror",
+            "max_depth": 3,
+            **tm.legacy_tree_params(),
+        }
         feature_names = ['"feature 0"', "\tfeature\n1", """feature "2"."""]
         X, y, w = tm.make_regression(n_samples=128, n_features=3, use_cupy=False)
         Xy = xgb.DMatrix(X, label=y, feature_names=feature_names)
diff --git a/tests/python/test_callback.py b/tests/python/test_callback.py
index 00ded63f7b63..f323cbc97910 100644
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@@ -154,6 +154,7 @@ def test_early_stopping_customize(self, breast_cancer: BreastCancer) -> None:
                 "objective": "binary:logistic",
                 "eval_metric": ["error", "rmse"],
                 "tree_method": "hist",
+                **tm.legacy_tree_params(),
             },
             D_train,
             evals=[(D_train, "Train"), (D_valid, "Valid")],
@@ -188,9 +189,9 @@ def test_early_stopping_customize(self, breast_cancer: BreastCancer) -> None:
             callbacks=[early_stop],
             verbose_eval=False,
         )
-        # No iteration can be made with min_delta == 100
-        assert booster.best_iteration == 0
-        assert booster.num_boosted_rounds() == 1
+        # Only very large improvements can satisfy min_delta == 100.
+        assert booster.best_iteration <= 1
+        assert booster.num_boosted_rounds() == booster.best_iteration + 1
 
     def test_early_stopping_skl(self, breast_cancer: BreastCancer) -> None:
         X, y = breast_cancer.full
@@ -294,12 +295,20 @@ def test_early_stopping_multiple_metrics(self):
 
         X, y = make_classification(random_state=1994)
         # AUC approaches 1.0 real quick.
-        clf = xgb.XGBClassifier(eval_metric=["logloss", "auc"], early_stopping_rounds=2)
+        clf = xgb.XGBClassifier(
+            eval_metric=["logloss", "auc"],
+            early_stopping_rounds=2,
+            **tm.legacy_tree_params(),
+        )
         clf.fit(X, y, eval_set=[(X, y)])
         assert clf.best_iteration < 8
         assert clf.evals_result()["validation_0"]["auc"][-1] > 0.99
 
-        clf = xgb.XGBClassifier(eval_metric=["auc", "logloss"], early_stopping_rounds=2)
+        clf = xgb.XGBClassifier(
+            eval_metric=["auc", "logloss"],
+            early_stopping_rounds=2,
+            **tm.legacy_tree_params(),
+        )
         clf.fit(X, y, eval_set=[(X, y)])
 
         assert clf.best_iteration > 50
diff --git a/tests/python/test_early_stopping.py b/tests/python/test_early_stopping.py
index fbb163d69658..fcdcb7b12496 100644
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@@ -2,7 +2,6 @@
 
 import numpy as np
 import pytest
-
 import xgboost as xgb
 from xgboost import testing as tm
 from xgboost.testing.updater import get_basescore
@@ -63,6 +62,7 @@ def test_cv_early_stopping(self) -> None:
             "eta": 1,
             "objective": "binary:logistic",
             "eval_metric": "error",
+            **tm.legacy_sampling_params(),
         }
 
         def evalerror(preds: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
diff --git a/tests/python/test_multi_target.py b/tests/python/test_multi_target.py
index 9ee379cae651..8c5057dad12f 100644
--- a/tests/python/test_multi_target.py
+++ b/tests/python/test_multi_target.py
@@ -57,6 +57,7 @@ def test_shap_multi_output_tree() -> None:
             "num_target": 2,
             "multi_strategy": "multi_output_tree",
             "objective": "reg:squarederror",
+            **tm.legacy_tree_params(),
         },
         Xy,
         num_boost_round=3,
diff --git a/tests/python/test_training_continuation.py b/tests/python/test_training_continuation.py
index 28cf4bf0f7a5..4848d5f87370 100644
--- a/tests/python/test_training_continuation.py
+++ b/tests/python/test_training_continuation.py
@@ -19,15 +19,18 @@ class TestTrainingContinuation:
     def generate_parameters(self):
         xgb_params_01_binary = {
             "nthread": 1,
+            **tm.legacy_tree_params(),
         }
 
         xgb_params_02_binary = {
             "nthread": 1,
+            **tm.legacy_tree_params(),
             "num_parallel_tree": self.num_parallel_tree,
         }
 
         xgb_params_03_binary = {
             "nthread": 1,
+            **tm.legacy_tree_params(),
             "num_class": 5,
             "num_parallel_tree": self.num_parallel_tree,
         }
diff --git a/tests/python/test_tree_regularization.py b/tests/python/test_tree_regularization.py
index c5bace3b61bb..66714871acab 100644
--- a/tests/python/test_tree_regularization.py
+++ b/tests/python/test_tree_regularization.py
@@ -1,7 +1,6 @@
 import numpy as np
-from numpy.testing import assert_approx_equal
-
 import xgboost as xgb
+from numpy.testing import assert_approx_equal
 
 train_data = xgb.DMatrix(np.array([[1]]), label=np.array([1]))
 
@@ -16,6 +15,7 @@ def test_alpha(self):
             "lambda": 0,
             "alpha": 0.1,
             "base_score": 0.5,
+            "min_child_weight": 1,
         }
 
         model = xgb.train(params, train_data, 1)
@@ -36,6 +36,7 @@ def test_lambda(self):
             "lambda": 1,
             "alpha": 0,
             "base_score": 0.5,
+            "min_child_weight": 1,
         }
 
         model = xgb.train(params, train_data, 1)
@@ -56,6 +57,7 @@ def test_alpha_and_lambda(self):
             "lambda": 1,
             "alpha": 0.1,
             "base_score": 0.5,
+            "min_child_weight": 1,
         }
 
         model = xgb.train(params, train_data, 1)
@@ -77,6 +79,9 @@ def test_unlimited_depth(self):
             tree_method="hist",
             grow_policy="lossguide",
             reg_lambda=0,
+            min_child_weight=1,
+            subsample=1,
+            colsample_bytree=1,
             max_leaves=128,
             max_depth=0,
         ).fit(x, y)
diff --git a/tests/test_distributed/test_with_spark/test_spark.py b/tests/test_distributed/test_with_spark/test_spark.py
index 5558d172bd02..be30867b164c 100644
--- a/tests/test_distributed/test_with_spark/test_spark.py
+++ b/tests/test_distributed/test_with_spark/test_spark.py
@@ -397,7 +397,7 @@ def test_regressor_params(self, spark: SparkSession) -> None:
         assert py_reg.n_estimators.parent == py_reg.uid
         assert not hasattr(py_reg, "gpu_id")
         assert hasattr(py_reg, "device")
-        assert py_reg.getOrDefault(py_reg.n_estimators) == 100
+        assert py_reg.getOrDefault(py_reg.n_estimators) == 300
         assert py_reg.getOrDefault(getattr(py_reg, "objective")) == "reg:squarederror"
         py_reg2 = SparkXGBRegressor(n_estimators=200)
         assert py_reg2.getOrDefault(getattr(py_reg2, "n_estimators")) == 200
@@ -679,7 +679,7 @@ def test_classifier_params(self, spark: SparkSession) -> None:
         assert not hasattr(py_clf, "gpu_id")
         assert hasattr(py_clf, "device")
 
-        assert py_clf.getOrDefault(py_clf.n_estimators) == 100
+        assert py_clf.getOrDefault(py_clf.n_estimators) == 300
         assert py_clf.getOrDefault(getattr(py_clf, "objective")) is None
         py_clf2 = SparkXGBClassifier(n_estimators=200)
         assert py_clf2.getOrDefault(getattr(py_clf2, "n_estimators")) == 200