diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
index 1064fd29df..493045022b 100644
--- a/flaml/automl/automl.py
+++ b/flaml/automl/automl.py
@@ -343,6 +343,12 @@ def custom_metric(
          }
         ```
             skip_transform: boolean, default=False | Whether to pre-process data prior to modeling.
+            allow_label_overlap: boolean, default=True | For classification tasks with holdout evaluation,
+                whether to allow label overlap between train and validation sets. When True (default),
+                uses a fast strategy that adds the first instance of missing labels to the set that is
+                missing them, which may create some overlap. When False, uses a precise but slower
+                strategy that intelligently re-splits instances to avoid overlap when possible.
+                Only affects classification tasks with holdout evaluation method.
             fit_kwargs_by_estimator: dict, default=None | The user specified keywords arguments, grouped by estimator name.
                 e.g.,
 
@@ -373,6 +379,7 @@ def custom_metric(
         settings["split_ratio"] = settings.get("split_ratio", SPLIT_RATIO)
         settings["n_splits"] = settings.get("n_splits", N_SPLITS)
         settings["auto_augment"] = settings.get("auto_augment", True)
+        settings["allow_label_overlap"] = settings.get("allow_label_overlap", True)
         settings["metric"] = settings.get("metric", "auto")
         # Validate that custom metric is callable if not a string
         self._validate_metric_parameter(settings["metric"], allow_auto=True)
@@ -1113,6 +1120,7 @@ def retrain_from_log(
         eval_method = self._decide_eval_method(eval_method, time_budget)
         self.modelcount = 0
         self._auto_augment = auto_augment
+        self._allow_label_overlap = self._settings.get("allow_label_overlap", True)
         self._prepare_data(eval_method, split_ratio, n_splits)
         self._state.time_budget = -1
         self._state.free_mem_ratio = 0
@@ -1716,6 +1724,7 @@ def _prepare_data(self, eval_method, split_ratio, n_splits):
             n_splits,
             self._df,
             self._sample_weight_full,
+            self._allow_label_overlap,
         )
         self.data_size_full = self._state.data_size_full
 
@@ -1772,6 +1781,7 @@ def fit(
         time_col=None,
         cv_score_agg_func=None,
         skip_transform=None,
+        allow_label_overlap=True,
         mlflow_logging=None,
         fit_kwargs_by_estimator=None,
         mlflow_exp_name=None,
@@ -2058,6 +2068,12 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
         ```
 
             skip_transform: boolean, default=False | Whether to pre-process data prior to modeling.
+            allow_label_overlap: boolean, default=True | For classification tasks with holdout evaluation,
+                whether to allow label overlap between train and validation sets. When True (default),
+                uses a fast strategy that adds the first instance of missing labels to the set that is
+                missing them, which may create some overlap. When False, uses a precise but slower
+                strategy that intelligently re-splits instances to avoid overlap when possible.
+                Only affects classification tasks with holdout evaluation method.
             mlflow_logging: boolean, default=None | Whether to log the training results to mlflow.
                 Default value is None, which means the logging decision is made based on
                 AutoML.__init__'s mlflow_logging argument. Not valid if mlflow is not installed.
@@ -2127,6 +2143,9 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
         split_ratio = split_ratio or self._settings.get("split_ratio")
         n_splits = n_splits or self._settings.get("n_splits")
         auto_augment = self._settings.get("auto_augment") if auto_augment is None else auto_augment
+        allow_label_overlap = (
+            self._settings.get("allow_label_overlap") if allow_label_overlap is None else allow_label_overlap
+        )
         metric = self._settings.get("metric") if metric is None else metric
         estimator_list = estimator_list or self._settings.get("estimator_list")
         log_file_name = self._settings.get("log_file_name") if log_file_name is None else log_file_name
@@ -2309,6 +2328,7 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
 
         self._retrain_in_budget = retrain_full == "budget" and (eval_method == "holdout" and self._state.X_val is None)
         self._auto_augment = auto_augment
+        self._allow_label_overlap = allow_label_overlap
 
         _sample_size_from_starting_points = {}
         if isinstance(starting_points, dict):
diff --git a/flaml/automl/task/generic_task.py b/flaml/automl/task/generic_task.py
index 5b74a3d755..19f80a45ef 100644
--- a/flaml/automl/task/generic_task.py
+++ b/flaml/automl/task/generic_task.py
@@ -365,6 +365,465 @@ def _train_test_split(state, X, y, first=None, rest=None, split_ratio=0.2, strat
             X_train, X_val, y_train, y_val = GenericTask._split_pyspark(state, X, y, split_ratio, stratify)
         return X_train, X_val, y_train, y_val
 
+    def _handle_missing_labels_fast(
+        self,
+        state,
+        X_train,
+        X_val,
+        y_train,
+        y_val,
+        X_train_all,
+        y_train_all,
+        is_spark_dataframe,
+        data_is_df,
+    ):
+        """Handle missing labels by adding first instance to the set with missing label.
+
+        This is the faster version that may create some overlap but ensures all labels
+        are present in both sets. If a label is missing from train, it adds the first
+        instance to train. If a label is missing from val, it adds the first instance to val.
+        If no labels are missing, no instances are duplicated.
+
+        Args:
+            state: The state object containing fit parameters
+            X_train, X_val: Training and validation features
+            y_train, y_val: Training and validation labels
+            X_train_all, y_train_all: Complete dataset
+            is_spark_dataframe: Whether data is pandas_on_spark
+            data_is_df: Whether data is DataFrame/Series
+
+        Returns:
+            Tuple of (X_train, X_val, y_train, y_val) with missing labels added
+        """
+        # Check which labels are present in train and val sets
+        if is_spark_dataframe:
+            label_set_train, _ = unique_pandas_on_spark(y_train)
+            label_set_val, _ = unique_pandas_on_spark(y_val)
+            label_set_all, first = unique_value_first_index(y_train_all)
+        else:
+            label_set_all, first = unique_value_first_index(y_train_all)
+            label_set_train = np.unique(y_train)
+            label_set_val = np.unique(y_val)
+
+        # Find missing labels
+        missing_in_train = np.setdiff1d(label_set_all, label_set_train)
+        missing_in_val = np.setdiff1d(label_set_all, label_set_val)
+
+        # Add first instance of missing labels to train set
+        if len(missing_in_train) > 0:
+            missing_train_indices = []
+            for label in missing_in_train:
+                label_matches = np.where(label_set_all == label)[0]
+                if len(label_matches) > 0 and label_matches[0] < len(first):
+                    missing_train_indices.append(first[label_matches[0]])
+
+            if len(missing_train_indices) > 0:
+                X_missing_train = (
+                    iloc_pandas_on_spark(X_train_all, missing_train_indices)
+                    if is_spark_dataframe
+                    else X_train_all.iloc[missing_train_indices]
+                    if data_is_df
+                    else X_train_all[missing_train_indices]
+                )
+                y_missing_train = (
+                    iloc_pandas_on_spark(y_train_all, missing_train_indices)
+                    if is_spark_dataframe
+                    else y_train_all.iloc[missing_train_indices]
+                    if isinstance(y_train_all, (pd.Series, psSeries))
+                    else y_train_all[missing_train_indices]
+                )
+                X_train = concat(X_missing_train, X_train)
+                y_train = concat(y_missing_train, y_train) if data_is_df else np.concatenate([y_missing_train, y_train])
+
+                # Handle sample_weight if present
+                if "sample_weight" in state.fit_kwargs:
+                    sample_weight_source = (
+                        state.sample_weight_all
+                        if hasattr(state, "sample_weight_all")
+                        else state.fit_kwargs.get("sample_weight")
+                    )
+                    if sample_weight_source is not None and max(missing_train_indices) < len(sample_weight_source):
+                        missing_weights = (
+                            sample_weight_source[missing_train_indices]
+                            if isinstance(sample_weight_source, np.ndarray)
+                            else sample_weight_source.iloc[missing_train_indices]
+                        )
+                        state.fit_kwargs["sample_weight"] = concat(missing_weights, state.fit_kwargs["sample_weight"])
+
+        # Add first instance of missing labels to val set
+        if len(missing_in_val) > 0:
+            missing_val_indices = []
+            for label in missing_in_val:
+                label_matches = np.where(label_set_all == label)[0]
+                if len(label_matches) > 0 and label_matches[0] < len(first):
+                    missing_val_indices.append(first[label_matches[0]])
+
+            if len(missing_val_indices) > 0:
+                X_missing_val = (
+                    iloc_pandas_on_spark(X_train_all, missing_val_indices)
+                    if is_spark_dataframe
+                    else X_train_all.iloc[missing_val_indices]
+                    if data_is_df
+                    else X_train_all[missing_val_indices]
+                )
+                y_missing_val = (
+                    iloc_pandas_on_spark(y_train_all, missing_val_indices)
+                    if is_spark_dataframe
+                    else y_train_all.iloc[missing_val_indices]
+                    if isinstance(y_train_all, (pd.Series, psSeries))
+                    else y_train_all[missing_val_indices]
+                )
+                X_val = concat(X_missing_val, X_val)
+                y_val = concat(y_missing_val, y_val) if data_is_df else np.concatenate([y_missing_val, y_val])
+
+                # Handle sample_weight if present
+                if (
+                    "sample_weight" in state.fit_kwargs
+                    and hasattr(state, "weight_val")
+                    and state.weight_val is not None
+                ):
+                    sample_weight_source = (
+                        state.sample_weight_all
+                        if hasattr(state, "sample_weight_all")
+                        else state.fit_kwargs.get("sample_weight")
+                    )
+                    if sample_weight_source is not None and max(missing_val_indices) < len(sample_weight_source):
+                        missing_weights = (
+                            sample_weight_source[missing_val_indices]
+                            if isinstance(sample_weight_source, np.ndarray)
+                            else sample_weight_source.iloc[missing_val_indices]
+                        )
+                        state.weight_val = concat(missing_weights, state.weight_val)
+
+        return X_train, X_val, y_train, y_val
+
+    def _handle_missing_labels_no_overlap(
+        self,
+        state,
+        X_train,
+        X_val,
+        y_train,
+        y_val,
+        X_train_all,
+        y_train_all,
+        is_spark_dataframe,
+        data_is_df,
+        split_ratio,
+    ):
+        """Handle missing labels intelligently to avoid overlap when possible.
+
+        This is the slower but more precise version that:
+        - For single-instance classes: Adds to both sets (unavoidable overlap)
+        - For multi-instance classes: Re-splits them properly to avoid overlap
+
+        Args:
+            state: The state object containing fit parameters
+            X_train, X_val: Training and validation features
+            y_train, y_val: Training and validation labels
+            X_train_all, y_train_all: Complete dataset
+            is_spark_dataframe: Whether data is pandas_on_spark
+            data_is_df: Whether data is DataFrame/Series
+            split_ratio: The ratio for splitting
+
+        Returns:
+            Tuple of (X_train, X_val, y_train, y_val) with missing labels handled
+        """
+        # Check which labels are present in train and val sets
+        if is_spark_dataframe:
+            label_set_train, _ = unique_pandas_on_spark(y_train)
+            label_set_val, _ = unique_pandas_on_spark(y_val)
+            label_set_all, first = unique_value_first_index(y_train_all)
+        else:
+            label_set_all, first = unique_value_first_index(y_train_all)
+            label_set_train = np.unique(y_train)
+            label_set_val = np.unique(y_val)
+
+        # Find missing labels
+        missing_in_train = np.setdiff1d(label_set_all, label_set_train)
+        missing_in_val = np.setdiff1d(label_set_all, label_set_val)
+
+        # Handle missing labels intelligently
+        # For classes with only 1 instance: add to both sets (unavoidable overlap)
+        # For classes with multiple instances: move/split them properly to avoid overlap
+
+        if len(missing_in_train) > 0:
+            # Process missing labels in training set
+            for label in missing_in_train:
+                # Find all indices for this label in the original data
+                if is_spark_dataframe:
+                    label_indices = np.where(y_train_all.to_numpy() == label)[0].tolist()
+                else:
+                    label_indices = np.where(np.asarray(y_train_all) == label)[0].tolist()
+
+                num_instances = len(label_indices)
+
+                if num_instances == 1:
+                    # Single instance: must add to both train and val (unavoidable overlap)
+                    X_single = (
+                        iloc_pandas_on_spark(X_train_all, label_indices)
+                        if is_spark_dataframe
+                        else X_train_all.iloc[label_indices]
+                        if data_is_df
+                        else X_train_all[label_indices]
+                    )
+                    y_single = (
+                        iloc_pandas_on_spark(y_train_all, label_indices)
+                        if is_spark_dataframe
+                        else y_train_all.iloc[label_indices]
+                        if isinstance(y_train_all, (pd.Series, psSeries))
+                        else y_train_all[label_indices]
+                    )
+                    X_train = concat(X_single, X_train)
+                    y_train = concat(y_single, y_train) if data_is_df else np.concatenate([y_single, y_train])
+
+                    # Handle sample_weight
+                    if "sample_weight" in state.fit_kwargs:
+                        sample_weight_source = (
+                            state.sample_weight_all
+                            if hasattr(state, "sample_weight_all")
+                            else state.fit_kwargs.get("sample_weight")
+                        )
+                        if sample_weight_source is not None and label_indices[0] < len(sample_weight_source):
+                            single_weight = (
+                                sample_weight_source[label_indices]
+                                if isinstance(sample_weight_source, np.ndarray)
+                                else sample_weight_source.iloc[label_indices]
+                            )
+                            state.fit_kwargs["sample_weight"] = concat(single_weight, state.fit_kwargs["sample_weight"])
+                else:
+                    # Multiple instances: move some from val to train (no overlap needed)
+                    # Calculate how many to move to train (leave at least 1 in val)
+                    num_to_train = max(1, min(num_instances - 1, int(num_instances * (1 - split_ratio))))
+                    indices_to_move = label_indices[:num_to_train]
+
+                    X_to_move = (
+                        iloc_pandas_on_spark(X_train_all, indices_to_move)
+                        if is_spark_dataframe
+                        else X_train_all.iloc[indices_to_move]
+                        if data_is_df
+                        else X_train_all[indices_to_move]
+                    )
+                    y_to_move = (
+                        iloc_pandas_on_spark(y_train_all, indices_to_move)
+                        if is_spark_dataframe
+                        else y_train_all.iloc[indices_to_move]
+                        if isinstance(y_train_all, (pd.Series, psSeries))
+                        else y_train_all[indices_to_move]
+                    )
+
+                    # Add to train
+                    X_train = concat(X_to_move, X_train)
+                    y_train = concat(y_to_move, y_train) if data_is_df else np.concatenate([y_to_move, y_train])
+
+                    # Remove from val (they are currently all in val)
+                    if is_spark_dataframe:
+                        val_mask = ~y_val.isin([label])
+                        X_val = X_val[val_mask]
+                        y_val = y_val[val_mask]
+                    else:
+                        val_mask = np.asarray(y_val) != label
+                        if data_is_df:
+                            X_val = X_val[val_mask]
+                            y_val = y_val[val_mask]
+                        else:
+                            X_val = X_val[val_mask]
+                            y_val = y_val[val_mask]
+
+                    # Add remaining instances back to val
+                    remaining_indices = label_indices[num_to_train:]
+                    if len(remaining_indices) > 0:
+                        X_remaining = (
+                            iloc_pandas_on_spark(X_train_all, remaining_indices)
+                            if is_spark_dataframe
+                            else X_train_all.iloc[remaining_indices]
+                            if data_is_df
+                            else X_train_all[remaining_indices]
+                        )
+                        y_remaining = (
+                            iloc_pandas_on_spark(y_train_all, remaining_indices)
+                            if is_spark_dataframe
+                            else y_train_all.iloc[remaining_indices]
+                            if isinstance(y_train_all, (pd.Series, psSeries))
+                            else y_train_all[remaining_indices]
+                        )
+                        X_val = concat(X_remaining, X_val)
+                        y_val = concat(y_remaining, y_val) if data_is_df else np.concatenate([y_remaining, y_val])
+
+                    # Handle sample_weight
+                    if "sample_weight" in state.fit_kwargs:
+                        sample_weight_source = (
+                            state.sample_weight_all
+                            if hasattr(state, "sample_weight_all")
+                            else state.fit_kwargs.get("sample_weight")
+                        )
+                        if sample_weight_source is not None and max(indices_to_move) < len(sample_weight_source):
+                            weights_to_move = (
+                                sample_weight_source[indices_to_move]
+                                if isinstance(sample_weight_source, np.ndarray)
+                                else sample_weight_source.iloc[indices_to_move]
+                            )
+                            state.fit_kwargs["sample_weight"] = concat(
+                                weights_to_move, state.fit_kwargs["sample_weight"]
+                            )
+
+                            if (
+                                len(remaining_indices) > 0
+                                and hasattr(state, "weight_val")
+                                and state.weight_val is not None
+                            ):
+                                # Remove and re-add weights for val
+                                if isinstance(state.weight_val, np.ndarray):
+                                    state.weight_val = state.weight_val[val_mask]
+                                else:
+                                    state.weight_val = state.weight_val[val_mask]
+
+                                if max(remaining_indices) < len(sample_weight_source):
+                                    remaining_weights = (
+                                        sample_weight_source[remaining_indices]
+                                        if isinstance(sample_weight_source, np.ndarray)
+                                        else sample_weight_source.iloc[remaining_indices]
+                                    )
+                                    state.weight_val = concat(remaining_weights, state.weight_val)
+
+        if len(missing_in_val) > 0:
+            # Process missing labels in validation set
+            for label in missing_in_val:
+                # Find all indices for this label in the original data
+                if is_spark_dataframe:
+                    label_indices = np.where(y_train_all.to_numpy() == label)[0].tolist()
+                else:
+                    label_indices = np.where(np.asarray(y_train_all) == label)[0].tolist()
+
+                num_instances = len(label_indices)
+
+                if num_instances == 1:
+                    # Single instance: must add to both train and val (unavoidable overlap)
+                    X_single = (
+                        iloc_pandas_on_spark(X_train_all, label_indices)
+                        if is_spark_dataframe
+                        else X_train_all.iloc[label_indices]
+                        if data_is_df
+                        else X_train_all[label_indices]
+                    )
+                    y_single = (
+                        iloc_pandas_on_spark(y_train_all, label_indices)
+                        if is_spark_dataframe
+                        else y_train_all.iloc[label_indices]
+                        if isinstance(y_train_all, (pd.Series, psSeries))
+                        else y_train_all[label_indices]
+                    )
+                    X_val = concat(X_single, X_val)
+                    y_val = concat(y_single, y_val) if data_is_df else np.concatenate([y_single, y_val])
+
+                    # Handle sample_weight
+                    if "sample_weight" in state.fit_kwargs and hasattr(state, "weight_val"):
+                        sample_weight_source = (
+                            state.sample_weight_all
+                            if hasattr(state, "sample_weight_all")
+                            else state.fit_kwargs.get("sample_weight")
+                        )
+                        if sample_weight_source is not None and label_indices[0] < len(sample_weight_source):
+                            single_weight = (
+                                sample_weight_source[label_indices]
+                                if isinstance(sample_weight_source, np.ndarray)
+                                else sample_weight_source.iloc[label_indices]
+                            )
+                            if state.weight_val is not None:
+                                state.weight_val = concat(single_weight, state.weight_val)
+                else:
+                    # Multiple instances: move some from train to val (no overlap needed)
+                    # Calculate how many to move to val (leave at least 1 in train)
+                    num_to_val = max(1, min(num_instances - 1, int(num_instances * split_ratio)))
+                    indices_to_move = label_indices[:num_to_val]
+
+                    X_to_move = (
+                        iloc_pandas_on_spark(X_train_all, indices_to_move)
+                        if is_spark_dataframe
+                        else X_train_all.iloc[indices_to_move]
+                        if data_is_df
+                        else X_train_all[indices_to_move]
+                    )
+                    y_to_move = (
+                        iloc_pandas_on_spark(y_train_all, indices_to_move)
+                        if is_spark_dataframe
+                        else y_train_all.iloc[indices_to_move]
+                        if isinstance(y_train_all, (pd.Series, psSeries))
+                        else y_train_all[indices_to_move]
+                    )
+
+                    # Add to val
+                    X_val = concat(X_to_move, X_val)
+                    y_val = concat(y_to_move, y_val) if data_is_df else np.concatenate([y_to_move, y_val])
+
+                    # Remove from train (they are currently all in train)
+                    if is_spark_dataframe:
+                        train_mask = ~y_train.isin([label])
+                        X_train = X_train[train_mask]
+                        y_train = y_train[train_mask]
+                    else:
+                        train_mask = np.asarray(y_train) != label
+                        if data_is_df:
+                            X_train = X_train[train_mask]
+                            y_train = y_train[train_mask]
+                        else:
+                            X_train = X_train[train_mask]
+                            y_train = y_train[train_mask]
+
+                    # Add remaining instances back to train
+                    remaining_indices = label_indices[num_to_val:]
+                    if len(remaining_indices) > 0:
+                        X_remaining = (
+                            iloc_pandas_on_spark(X_train_all, remaining_indices)
+                            if is_spark_dataframe
+                            else X_train_all.iloc[remaining_indices]
+                            if data_is_df
+                            else X_train_all[remaining_indices]
+                        )
+                        y_remaining = (
+                            iloc_pandas_on_spark(y_train_all, remaining_indices)
+                            if is_spark_dataframe
+                            else y_train_all.iloc[remaining_indices]
+                            if isinstance(y_train_all, (pd.Series, psSeries))
+                            else y_train_all[remaining_indices]
+                        )
+                        X_train = concat(X_remaining, X_train)
+                        y_train = concat(y_remaining, y_train) if data_is_df else np.concatenate([y_remaining, y_train])
+
+                    # Handle sample_weight
+                    if "sample_weight" in state.fit_kwargs:
+                        sample_weight_source = (
+                            state.sample_weight_all
+                            if hasattr(state, "sample_weight_all")
+                            else state.fit_kwargs.get("sample_weight")
+                        )
+                        if sample_weight_source is not None and max(indices_to_move) < len(sample_weight_source):
+                            weights_to_move = (
+                                sample_weight_source[indices_to_move]
+                                if isinstance(sample_weight_source, np.ndarray)
+                                else sample_weight_source.iloc[indices_to_move]
+                            )
+                            if hasattr(state, "weight_val") and state.weight_val is not None:
+                                state.weight_val = concat(weights_to_move, state.weight_val)
+
+                            if len(remaining_indices) > 0:
+                                # Remove and re-add weights for train
+                                if isinstance(state.fit_kwargs["sample_weight"], np.ndarray):
+                                    state.fit_kwargs["sample_weight"] = state.fit_kwargs["sample_weight"][train_mask]
+                                else:
+                                    state.fit_kwargs["sample_weight"] = state.fit_kwargs["sample_weight"][train_mask]
+
+                                if max(remaining_indices) < len(sample_weight_source):
+                                    remaining_weights = (
+                                        sample_weight_source[remaining_indices]
+                                        if isinstance(sample_weight_source, np.ndarray)
+                                        else sample_weight_source.iloc[remaining_indices]
+                                    )
+                                    state.fit_kwargs["sample_weight"] = concat(
+                                        remaining_weights, state.fit_kwargs["sample_weight"]
+                                    )
+
+        return X_train, X_val, y_train, y_val
+
     def prepare_data(
         self,
         state,
@@ -377,6 +836,7 @@ def prepare_data(
         n_splits,
         data_is_df,
         sample_weight_full,
+        allow_label_overlap=True,
     ) -> int:
         X_val, y_val = state.X_val, state.y_val
         if issparse(X_val):
@@ -505,59 +965,46 @@ def prepare_data(
             elif self.is_classification():
                 # for classification, make sure the labels are complete in both
                 # training and validation data
-                label_set, first = unique_value_first_index(y_train_all)
-                rest = []
-                last = 0
-                first.sort()
-                for i in range(len(first)):
-                    rest.extend(range(last, first[i]))
-                    last = first[i] + 1
-                rest.extend(range(last, len(y_train_all)))
-                X_first = X_train_all.iloc[first] if data_is_df else X_train_all[first]
-                if len(first) < len(y_train_all) / 2:
-                    # Get X_rest and y_rest with drop, sparse matrix can't apply np.delete
-                    X_rest = (
-                        np.delete(X_train_all, first, axis=0)
-                        if isinstance(X_train_all, np.ndarray)
-                        else X_train_all.drop(first.tolist())
-                        if data_is_df
-                        else X_train_all[rest]
-                    )
-                    y_rest = (
-                        np.delete(y_train_all, first, axis=0)
-                        if isinstance(y_train_all, np.ndarray)
-                        else y_train_all.drop(first.tolist())
-                        if data_is_df
-                        else y_train_all[rest]
+                stratify = y_train_all if split_type == "stratified" else None
+                X_train, X_val, y_train, y_val = self._train_test_split(
+                    state, X_train_all, y_train_all, split_ratio=split_ratio, stratify=stratify
+                )
+
+                # Handle missing labels using the appropriate strategy
+                if allow_label_overlap:
+                    # Fast version: adds first instance to set with missing label (may create overlap)
+                    X_train, X_val, y_train, y_val = self._handle_missing_labels_fast(
+                        state,
+                        X_train,
+                        X_val,
+                        y_train,
+                        y_val,
+                        X_train_all,
+                        y_train_all,
+                        is_spark_dataframe,
+                        data_is_df,
                     )
                 else:
-                    X_rest = (
-                        iloc_pandas_on_spark(X_train_all, rest)
-                        if is_spark_dataframe
-                        else X_train_all.iloc[rest]
-                        if data_is_df
-                        else X_train_all[rest]
-                    )
-                    y_rest = (
-                        iloc_pandas_on_spark(y_train_all, rest)
-                        if is_spark_dataframe
-                        else y_train_all.iloc[rest]
-                        if data_is_df
-                        else y_train_all[rest]
+                    # Precise version: avoids overlap when possible (slower)
+                    X_train, X_val, y_train, y_val = self._handle_missing_labels_no_overlap(
+                        state,
+                        X_train,
+                        X_val,
+                        y_train,
+                        y_val,
+                        X_train_all,
+                        y_train_all,
+                        is_spark_dataframe,
+                        data_is_df,
+                        split_ratio,
                     )
-                stratify = y_rest if split_type == "stratified" else None
-                X_train, X_val, y_train, y_val = self._train_test_split(
-                    state, X_rest, y_rest, first, rest, split_ratio, stratify
-                )
-                X_train = concat(X_first, X_train)
-                y_train = concat(label_set, y_train) if data_is_df else np.concatenate([label_set, y_train])
-                X_val = concat(X_first, X_val)
-                y_val = concat(label_set, y_val) if data_is_df else np.concatenate([label_set, y_val])
 
                 if isinstance(y_train, (psDataFrame, pd.DataFrame)) and y_train.shape[1] == 1:
                     y_train = y_train[y_train.columns[0]]
                     y_val = y_val[y_val.columns[0]]
-                    y_train.name = y_val.name = y_rest.name
+                    # Only set name if y_train_all is a Series (not a DataFrame)
+                    if isinstance(y_train_all, (pd.Series, psSeries)):
+                        y_train.name = y_val.name = y_train_all.name
 
             elif self.is_regression():
                 X_train, X_val, y_train, y_val = self._train_test_split(
diff --git a/test/automl/test_no_overlap.py b/test/automl/test_no_overlap.py
new file mode 100644
index 0000000000..443d8b9980
--- /dev/null
+++ b/test/automl/test_no_overlap.py
@@ -0,0 +1,272 @@
+"""Test to ensure correct label overlap handling for classification tasks"""
+import numpy as np
+import pandas as pd
+from sklearn.datasets import load_iris, make_classification
+
+from flaml import AutoML
+
+
+def test_allow_label_overlap_true():
+    """Test with allow_label_overlap=True (fast mode, default)"""
+    # Load iris dataset
+    dic_data = load_iris(as_frame=True)
+    iris_data = dic_data["frame"]
+
+    # Prepare data
+    x_train = iris_data[["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]].to_numpy()
+    y_train = iris_data["target"]
+
+    # Train with fast mode (default)
+    automl = AutoML()
+    automl_settings = {
+        "max_iter": 5,
+        "metric": "accuracy",
+        "task": "classification",
+        "estimator_list": ["lgbm"],
+        "eval_method": "holdout",
+        "split_type": "stratified",
+        "keep_search_state": True,
+        "retrain_full": False,
+        "auto_augment": False,
+        "verbose": 0,
+        "allow_label_overlap": True,  # Fast mode
+    }
+    automl.fit(x_train, y_train, **automl_settings)
+
+    # Check results
+    input_size = len(x_train)
+    train_size = len(automl._state.X_train)
+    val_size = len(automl._state.X_val)
+
+    # With stratified split on balanced data, fast mode may have no overlap
+    assert (
+        train_size + val_size >= input_size
+    ), f"Inconsistent sizes. Input: {input_size}, Train: {train_size}, Val: {val_size}"
+
+    # Verify all classes are represented in both sets
+    train_labels = set(np.unique(automl._state.y_train))
+    val_labels = set(np.unique(automl._state.y_val))
+    all_labels = set(np.unique(y_train))
+
+    assert train_labels == all_labels, f"Not all labels in train. All: {all_labels}, Train: {train_labels}"
+    assert val_labels == all_labels, f"Not all labels in val. All: {all_labels}, Val: {val_labels}"
+
+    print(
+        f"✓ Test passed (fast mode): Input: {input_size}, Train: {train_size}, Val: {val_size}, "
+        f"Overlap: {train_size + val_size - input_size}"
+    )
+
+
+def test_allow_label_overlap_false():
+    """Test with allow_label_overlap=False (precise mode)"""
+    # Load iris dataset
+    dic_data = load_iris(as_frame=True)
+    iris_data = dic_data["frame"]
+
+    # Prepare data
+    x_train = iris_data[["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]].to_numpy()
+    y_train = iris_data["target"]
+
+    # Train with precise mode
+    automl = AutoML()
+    automl_settings = {
+        "max_iter": 5,
+        "metric": "accuracy",
+        "task": "classification",
+        "estimator_list": ["lgbm"],
+        "eval_method": "holdout",
+        "split_type": "stratified",
+        "keep_search_state": True,
+        "retrain_full": False,
+        "auto_augment": False,
+        "verbose": 0,
+        "allow_label_overlap": False,  # Precise mode
+    }
+    automl.fit(x_train, y_train, **automl_settings)
+
+    # Check that there's no overlap (or minimal overlap for single-instance classes)
+    input_size = len(x_train)
+    train_size = len(automl._state.X_train)
+    val_size = len(automl._state.X_val)
+
+    # Verify all classes are represented
+    all_labels = set(np.unique(y_train))
+
+    # Should have no overlap or minimal overlap
+    overlap = train_size + val_size - input_size
+    assert overlap <= len(all_labels), f"Excessive overlap: {overlap}"
+
+    # Verify all classes are represented
+    train_labels = set(np.unique(automl._state.y_train))
+    val_labels = set(np.unique(automl._state.y_val))
+
+    combined_labels = train_labels.union(val_labels)
+    assert combined_labels == all_labels, f"Not all labels present. All: {all_labels}, Combined: {combined_labels}"
+
+    print(
+        f"✓ Test passed (precise mode): Input: {input_size}, Train: {train_size}, Val: {val_size}, "
+        f"Overlap: {overlap}"
+    )
+
+
+def test_uniform_split_with_overlap_control():
+    """Test with uniform split and both overlap modes"""
+    # Load iris dataset
+    dic_data = load_iris(as_frame=True)
+    iris_data = dic_data["frame"]
+
+    # Prepare data
+    x_train = iris_data[["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]].to_numpy()
+    y_train = iris_data["target"]
+
+    # Test precise mode with uniform split
+    automl = AutoML()
+    automl_settings = {
+        "max_iter": 5,
+        "metric": "accuracy",
+        "task": "classification",
+        "estimator_list": ["lgbm"],
+        "eval_method": "holdout",
+        "split_type": "uniform",
+        "keep_search_state": True,
+        "retrain_full": False,
+        "auto_augment": False,
+        "verbose": 0,
+        "allow_label_overlap": False,  # Precise mode
+    }
+    automl.fit(x_train, y_train, **automl_settings)
+
+    input_size = len(x_train)
+    train_size = len(automl._state.X_train)
+    val_size = len(automl._state.X_val)
+
+    # Verify all classes are represented
+    train_labels = set(np.unique(automl._state.y_train))
+    val_labels = set(np.unique(automl._state.y_val))
+    all_labels = set(np.unique(y_train))
+
+    combined_labels = train_labels.union(val_labels)
+    assert combined_labels == all_labels, "Not all labels present with uniform split"
+
+    print(f"✓ Test passed (uniform split): Input: {input_size}, Train: {train_size}, Val: {val_size}")
+
+
+def test_with_sample_weights():
+    """Test label overlap handling with sample weights"""
+    # Create a simple dataset
+    X, y = make_classification(
+        n_samples=200,
+        n_features=10,
+        n_informative=5,
+        n_redundant=2,
+        n_classes=3,
+        n_clusters_per_class=1,
+        random_state=42,
+    )
+
+    # Create sample weights (giving more weight to some samples)
+    sample_weight = np.random.uniform(0.5, 2.0, size=len(y))
+
+    # Test fast mode with sample weights
+    automl_fast = AutoML()
+    automl_fast.fit(
+        X,
+        y,
+        task="classification",
+        metric="accuracy",
+        estimator_list=["lgbm"],
+        eval_method="holdout",
+        split_type="stratified",
+        max_iter=3,
+        keep_search_state=True,
+        retrain_full=False,
+        auto_augment=False,
+        verbose=0,
+        allow_label_overlap=True,  # Fast mode
+        sample_weight=sample_weight,
+    )
+
+    # Verify all labels present
+    train_labels_fast = set(np.unique(automl_fast._state.y_train))
+    val_labels_fast = set(np.unique(automl_fast._state.y_val))
+    all_labels = set(np.unique(y))
+
+    assert train_labels_fast == all_labels, "Not all labels in train (fast mode with weights)"
+    assert val_labels_fast == all_labels, "Not all labels in val (fast mode with weights)"
+
+    # Test precise mode with sample weights
+    automl_precise = AutoML()
+    automl_precise.fit(
+        X,
+        y,
+        task="classification",
+        metric="accuracy",
+        estimator_list=["lgbm"],
+        eval_method="holdout",
+        split_type="stratified",
+        max_iter=3,
+        keep_search_state=True,
+        retrain_full=False,
+        auto_augment=False,
+        verbose=0,
+        allow_label_overlap=False,  # Precise mode
+        sample_weight=sample_weight,
+    )
+
+    # Verify all labels present
+    train_labels_precise = set(np.unique(automl_precise._state.y_train))
+    val_labels_precise = set(np.unique(automl_precise._state.y_val))
+
+    combined_labels = train_labels_precise.union(val_labels_precise)
+    assert combined_labels == all_labels, "Not all labels present (precise mode with weights)"
+
+    print("✓ Test passed with sample weights (fast and precise modes)")
+
+
+def test_single_instance_class():
+    """Test handling of single-instance classes"""
+    # Create imbalanced dataset where one class has only 1 instance
+    X = np.random.randn(50, 4)
+    y = np.array([0] * 40 + [1] * 9 + [2] * 1)  # Class 2 has only 1 instance
+
+    # Test precise mode - should add single instance to both sets
+    automl = AutoML()
+    automl.fit(
+        X,
+        y,
+        task="classification",
+        metric="accuracy",
+        estimator_list=["lgbm"],
+        eval_method="holdout",
+        split_type="uniform",
+        max_iter=3,
+        keep_search_state=True,
+        retrain_full=False,
+        auto_augment=False,
+        verbose=0,
+        allow_label_overlap=False,  # Precise mode
+    )
+
+    # Verify all labels present
+    train_labels = set(np.unique(automl._state.y_train))
+    val_labels = set(np.unique(automl._state.y_val))
+    all_labels = set(np.unique(y))
+
+    # Single-instance class should be in both sets
+    combined_labels = train_labels.union(val_labels)
+    assert combined_labels == all_labels, "Not all labels present with single-instance class"
+
+    # Check that single-instance class (label 2) is in both sets
+    assert 2 in train_labels, "Single-instance class not in train"
+    assert 2 in val_labels, "Single-instance class not in val"
+
+    print("✓ Test passed with single-instance class")
+
+
+if __name__ == "__main__":
+    test_allow_label_overlap_true()
+    test_allow_label_overlap_false()
+    test_uniform_split_with_overlap_control()
+    test_with_sample_weights()
+    test_single_instance_class()
+    print("\n✓ All tests passed!")
diff --git a/website/docs/Best-Practices.md b/website/docs/Best-Practices.md
index a1e87f11cd..70c8d75610 100644
--- a/website/docs/Best-Practices.md
+++ b/website/docs/Best-Practices.md
@@ -28,6 +28,35 @@ print(
   - pass `sample_weight` to `AutoML.fit()`;
   - consider setting class weights via `custom_hp` / `fit_kwargs_by_estimator` for specific estimators (see [FAQ](FAQ)).
 - **Probability vs label metrics**: use `roc_auc` / `log_loss` when you care about calibrated probabilities.
+- **Label overlap control** (holdout evaluation only):
+  - By default, FLAML uses a fast strategy (`allow_label_overlap=True`) that ensures all labels are present in both training and validation sets by adding missing labels' first instances to both sets. This is efficient but may create minor overlap.
+  - For strict no-overlap validation, use `allow_label_overlap=False`. This slower but more precise strategy intelligently re-splits multi-instance classes to avoid overlap while maintaining label completeness.
+
+```python
+from flaml import AutoML
+
+# Fast version (default): allows overlap for efficiency
+automl_fast = AutoML()
+automl_fast.fit(
+    X_train,
+    y_train,
+    task="classification",
+    eval_method="holdout",
+    allow_label_overlap=True,
+)  # default
+
+# Precise version: avoids overlap when possible
+automl_precise = AutoML()
+automl_precise.fit(
+    X_train,
+    y_train,
+    task="classification",
+    eval_method="holdout",
+    allow_label_overlap=False,
+)  # slower but more precise
+```
+
+Note: This only affects holdout evaluation. CV and custom validation sets are unaffected.
 
 ## Regression