Add fallback-mode option for subset

takanabe · takanabe · commit 02946a267735 · 2026-06-11T17:18:33.000-07:00
backport v2 merged code from #1308
diff --git a/launchable/commands/subset.py b/launchable/commands/subset.py
@@ -2,9 +2,11 @@
 import json
 import os
 import pathlib
+import random
 import re
 import subprocess
 import sys
+from enum import Enum
 from multiprocessing import Process
 from os.path import join
 from typing import Any, Callable, Dict, List, Optional, Sequence, TextIO, Tuple, Union
@@ -27,6 +29,12 @@
 from .helper import find_or_create_session
 from .test_path_writer import TestPathWriter
 
+class FallbackMode(str, Enum):
+    RUN_ALL = "run-all"
+    STOP = "stop"
+    RANDOM_SAMPLE = "random-sample"
+
+
 LARGE_TEST_PATHS_THRESHOLD = 100000
 DEFAULT_CONNECT_TIMEOUT = 5
 LARGE_PAYLOAD_CONNECT_TIMEOUT = 60
@@ -231,6 +239,17 @@
     type=str,
     hidden=True,
 )
+@click.option(
+    "--fallback-mode",
+    "fallback_mode",
+    hidden=True,
+    type=click.Choice(["run-all", "stop", "random-sample"]),
+    default="run-all",
+    help="Behavior when the subset API is unavailable or the model is untrained. "
+         "'run-all' (default) runs all tests as usual; 'stop' exits with a non-zero status so CI halts; "
+         "'random-sample' picks a random subset locally based on the count derived from --target "
+         "(no duration estimates are available in this path).",
+)
 @click.pass_context
 def subset(
     context: click.core.Context,
@@ -262,7 +281,9 @@ def subset(
     use_case: Optional[str] = None,
     similarity: Optional[float] = None,
     subset_id_file: Optional[str] = None,
+    fallback_mode: str = "run-all",
 ):
+    fallback_mode_enum = FallbackMode(fallback_mode)
     app = context.obj
     tracking_client = TrackingClient(Command.SUBSET, app=app)
     client = LaunchableClient(
@@ -402,6 +423,7 @@ def __init__(self, app: Application):
             self.is_output_exclusion_rules = is_output_exclusion_rules
             self.is_get_tests_from_guess = is_get_tests_from_guess
             self.subset_id_file = subset_id_file
+            self.fallback_mode = fallback_mode_enum
             super(Optimize, self).__init__(app=app)
 
         def _default_output_handler(self, output: List[TestPath], rests: List[TestPath]):
@@ -587,6 +609,23 @@ def _write_subset_id_to_file(self, subset_result: SubsetResult):
             with open(self.subset_id_file, 'w', encoding='utf-8') as f:
                 f.write(str(subset_result.subset_id) + '\n')
 
+        def _fallback_result(self) -> SubsetResult:
+            if self.fallback_mode == FallbackMode.STOP:
+                click.echo(
+                    "Warning: the service failed to subset. Stopping build (--fallback-mode=stop).",
+                    err=True,
+                )
+                sys.exit(1)
+            elif self.fallback_mode == FallbackMode.RANDOM_SAMPLE:
+                target_fraction = float(target) if target is not None else 1.0
+                click.echo(
+                    "Warning: the service failed to subset. Falling back to local random sample at {:.0%}.".format(target_fraction),
+                    err=True,
+                )
+                return SubsetResult.from_random_sample(self.test_paths, target_fraction)
+            else:
+                return SubsetResult.from_test_paths(self.test_paths)
+
         def request_subset(self) -> SubsetResult:
             test_runner = context.invoked_subcommand
             # temporarily extend the timeout because subset API response has become slow
@@ -622,7 +661,7 @@ def request_subset(self) -> SubsetResult:
                 )
                 client.print_exception_and_recover(
                     e, "Warning: the service failed to subset. Falling back to running all tests")
-                return SubsetResult.from_test_paths(self.test_paths)
+                return self._fallback_result()
 
         def run(self):
             """called after tests are scanned to compute the optimized order"""
@@ -642,10 +681,16 @@ def run(self):
             if not session_id:
                 # Session ID in --session is missing. It might be caused by
                 # Launchable API errors.
-                subset_result = SubsetResult.from_test_paths(self.test_paths)
+                subset_result = self._fallback_result()
             else:
                 subset_result = self.request_subset()
 
+            if subset_result.is_brainless:
+                click.echo("Your model is currently in training", err=True)
+                # brainless mode splits tests on server, so skip client-side fallback for random-sample
+                if self.fallback_mode != FallbackMode.RANDOM_SAMPLE:
+                    subset_result = self._fallback_result()
+
             if len(subset_result.subset) == 0:
                 if len(subset_result.rest) > 0 and client.is_pts_v2_enabled() and confidence is not None:
                     # Adaptive Dynamic Subset can return an empty subset when the model
@@ -707,10 +752,6 @@ def run(self):
                 ],
             ]
 
-            if subset_result.is_brainless:
-                click.echo(
-                    "Your model is currently in training", err=True)
-
             click.echo(
                 "Launchable created subset {} for build {} (test session {}) in workspace {}/{}".format(
                     subset_result.subset_id,
@@ -775,3 +816,11 @@ def from_test_paths(cls, test_paths: List[TestPath]) -> 'SubsetResult':
             is_brainless=False,
             is_observation=False
         )
+
+    @classmethod
+    def from_random_sample(cls, test_paths: List[TestPath], target: float) -> 'SubsetResult':
+        count = max(1, round(len(test_paths) * target))
+        sampled = random.sample(test_paths, min(count, len(test_paths)))
+        sampled_set = {id(t): t for t in sampled}
+        rest = [t for t in test_paths if id(t) not in sampled_set]
+        return cls(subset=sampled, rest=rest, subset_id='', summary={}, is_brainless=False, is_observation=False)
diff --git a/tests/commands/test_api_error.py b/tests/commands/test_api_error.py
@@ -473,3 +473,110 @@ def assert_tracking_count(self, tracking, count: int):
                 if attempt > 10:
                     break
             self.assertEqual(tracking.call_count, count)
+
+
+class FallbackModeTest(CliTestCase):
+    test_files_dir = Path(__file__).parent.joinpath('../data/minitest/').resolve()
+
+    def _subset_args(self, rest_file_name, extra_args=()):
+        return (
+            "subset", "--target", "50%",
+            "--session", self.session,
+            "--rest", rest_file_name,
+        ) + tuple(extra_args) + (
+            "minitest",
+            str(self.test_files_dir) + "/test/**/*.rb",
+        )
+
+    # --- API error cases ---
+
+    @responses.activate
+    @mock.patch.dict(os.environ, {"LAUNCHABLE_TOKEN": CliTestCase.launchable_token})
+    def test_api_error_fallback_stop(self):
+        responses.replace(
+            responses.POST,
+            "{base}/intake/organizations/{org}/workspaces/{ws}/subset".format(
+                base=get_base_url(), org=self.organization, ws=self.workspace),
+            status=500)
+
+        with tempfile.NamedTemporaryFile(delete=False) as rest_file:
+            result = self.cli(*self._subset_args(rest_file.name, ("--fallback-mode", "stop")), mix_stderr=False)
+            self.assertEqual(result.exit_code, 1)
+
+    @responses.activate
+    @mock.patch.dict(os.environ, {"LAUNCHABLE_TOKEN": CliTestCase.launchable_token})
+    def test_api_error_fallback_random_sample(self):
+        responses.replace(
+            responses.POST,
+            "{base}/intake/organizations/{org}/workspaces/{ws}/subset".format(
+                base=get_base_url(), org=self.organization, ws=self.workspace),
+            status=500)
+
+        with tempfile.NamedTemporaryFile(delete=False) as rest_file:
+            result = self.cli(*self._subset_args(rest_file.name, ("--fallback-mode", "random-sample")), mix_stderr=False)
+            self.assert_success(result)
+            self.assertIn("example_test.rb", result.stdout)
+
+    @responses.activate
+    @mock.patch.dict(os.environ, {"LAUNCHABLE_TOKEN": CliTestCase.launchable_token})
+    def test_api_error_fallback_run_all_default(self):
+        responses.replace(
+            responses.POST,
+            "{base}/intake/organizations/{org}/workspaces/{ws}/subset".format(
+                base=get_base_url(), org=self.organization, ws=self.workspace),
+            status=500)
+
+        with tempfile.NamedTemporaryFile(delete=False) as rest_file:
+            result = self.cli(*self._subset_args(rest_file.name), mix_stderr=False)
+            self.assert_success(result)
+            self.assertIn("example_test.rb", result.stdout)
+
+    # --- Brainless mode cases ---
+
+    @responses.activate
+    @mock.patch.dict(os.environ, {"LAUNCHABLE_TOKEN": CliTestCase.launchable_token})
+    def test_brainless_fallback_stop(self):
+        responses.replace(
+            responses.POST,
+            "{base}/intake/organizations/{org}/workspaces/{ws}/subset".format(
+                base=get_base_url(), org=self.organization, ws=self.workspace),
+            json={"testPaths": [[{"type": "file", "name": "example_test.rb"}]],
+                  "rest": [], "subsettingId": 1, "isBrainless": True, "summary": {}},
+            status=200)
+
+        with tempfile.NamedTemporaryFile(delete=False) as rest_file:
+            result = self.cli(*self._subset_args(rest_file.name, ("--fallback-mode", "stop")), mix_stderr=False)
+            self.assertEqual(result.exit_code, 1)
+
+    @responses.activate
+    @mock.patch.dict(os.environ, {"LAUNCHABLE_TOKEN": CliTestCase.launchable_token})
+    def test_brainless_fallback_random_sample(self):
+        # In brainless mode the server already split the tests, so random-sample keeps the server's result as-is.
+        responses.replace(
+            responses.POST,
+            "{base}/intake/organizations/{org}/workspaces/{ws}/subset".format(
+                base=get_base_url(), org=self.organization, ws=self.workspace),
+            json={"testPaths": [[{"type": "file", "name": "example_test.rb"}]],
+                  "rest": [], "subsettingId": 1, "isBrainless": True, "summary": {}},
+            status=200)
+
+        with tempfile.NamedTemporaryFile(delete=False) as rest_file:
+            result = self.cli(*self._subset_args(rest_file.name, ("--fallback-mode", "random-sample")), mix_stderr=False)
+            self.assert_success(result)
+            self.assertIn("example_test.rb", result.stdout)
+
+    @responses.activate
+    @mock.patch.dict(os.environ, {"LAUNCHABLE_TOKEN": CliTestCase.launchable_token})
+    def test_brainless_fallback_run_all_default(self):
+        responses.replace(
+            responses.POST,
+            "{base}/intake/organizations/{org}/workspaces/{ws}/subset".format(
+                base=get_base_url(), org=self.organization, ws=self.workspace),
+            json={"testPaths": [[{"type": "file", "name": "example_test.rb"}]],
+                  "rest": [], "subsettingId": 1, "isBrainless": True, "summary": {}},
+            status=200)
+
+        with tempfile.NamedTemporaryFile(delete=False) as rest_file:
+            result = self.cli(*self._subset_args(rest_file.name), mix_stderr=False)
+            self.assert_success(result)
+            self.assertIn("example_test.rb", result.stdout)