diff --git a/.github/workflows/docker-test.yml b/.github/workflows/docker-test.yml index 8290a1e70b..155e34015c 100644 --- a/.github/workflows/docker-test.yml +++ b/.github/workflows/docker-test.yml @@ -55,6 +55,7 @@ jobs: os: ubuntu-24.04 firefox-install-lang-package: enable-managed-downloads: + retain-on-failure: - test-strategy: test_video_dynamic_name use-random-user: false test-video: true @@ -62,6 +63,7 @@ jobs: os: ubuntu-24.04 firefox-install-lang-package: enable-managed-downloads: + retain-on-failure: - test-strategy: test_video_standalone use-random-user: false test-video: true @@ -69,6 +71,7 @@ jobs: os: ubuntu-24.04 firefox-install-lang-package: enable-managed-downloads: + retain-on-failure: - test-strategy: test_node_docker use-random-user: false test-video: true @@ -76,6 +79,7 @@ jobs: os: ubuntu-24.04 firefox-install-lang-package: enable-managed-downloads: + retain-on-failure: - test-strategy: test_standalone_docker use-random-user: false test-video: true @@ -83,6 +87,7 @@ jobs: os: ubuntu-24.04 firefox-install-lang-package: enable-managed-downloads: + retain-on-failure: - test-strategy: test_parallel use-random-user: false test-video: false @@ -119,6 +124,7 @@ jobs: os: ubuntu-24.04-arm firefox-install-lang-package: true enable-managed-downloads: true + retain-on-failure: true - test-strategy: test_video_dynamic_name use-random-user: false test-video: true @@ -126,6 +132,7 @@ jobs: os: ubuntu-24.04-arm firefox-install-lang-package: true enable-managed-downloads: true + retain-on-failure: true - test-strategy: test_video_standalone use-random-user: false test-video: true @@ -133,6 +140,7 @@ jobs: os: ubuntu-24.04-arm firefox-install-lang-package: true enable-managed-downloads: true + retain-on-failure: true - test-strategy: test_node_docker_video_sidecar use-random-user: false test-video: true @@ -140,6 +148,7 @@ jobs: os: ubuntu-24.04-arm firefox-install-lang-package: true enable-managed-downloads: false + retain-on-failure: true - test-strategy: test_standalone_docker_video_sidecar use-random-user: false test-video: true @@ -147,6 +156,7 @@ jobs: os: ubuntu-24.04-arm firefox-install-lang-package: true enable-managed-downloads: true + retain-on-failure: true - test-strategy: test_parallel use-random-user: false test-video: false @@ -246,9 +256,13 @@ jobs: if [ -n "${SELENIUM_ENABLE_MANAGED_DOWNLOADS}" ]; then echo "SELENIUM_ENABLE_MANAGED_DOWNLOADS=${SELENIUM_ENABLE_MANAGED_DOWNLOADS}" >> $GITHUB_ENV fi + if [ -n "${TEST_RETAIN_ON_FAILURE}" ]; then + echo "TEST_RETAIN_ON_FAILURE=${TEST_RETAIN_ON_FAILURE}" >> $GITHUB_ENV + fi env: TEST_FIREFOX_INSTALL_LANG_PACKAGE: ${{ matrix.firefox-install-lang-package }} SELENIUM_ENABLE_MANAGED_DOWNLOADS: ${{ matrix.enable-managed-downloads }} + TEST_RETAIN_ON_FAILURE: ${{ matrix.retain-on-failure }} - name: Run Docker Compose to ${{ matrix.test-strategy }} on AMD64 if: contains(matrix.os, 'arm') == false uses: nick-invision/retry@master @@ -268,6 +282,7 @@ jobs: command: | USE_RANDOM_USER_ID=${{ matrix.use-random-user }} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} \ TEST_FIREFOX_INSTALL_LANG_PACKAGE=${TEST_FIREFOX_INSTALL_LANG_PACKAGE} SELENIUM_ENABLE_MANAGED_DOWNLOADS=${SELENIUM_ENABLE_MANAGED_DOWNLOADS} \ + TEST_RETAIN_ON_FAILURE=${TEST_RETAIN_ON_FAILURE} \ make ${{ matrix.test-strategy }} - name: Upload recorded video if: matrix.test-video == true diff --git a/ENV_VARIABLES.md b/ENV_VARIABLES.md index 6e6c22b42a..cfa6f219c3 100644 --- a/ENV_VARIABLES.md +++ b/ENV_VARIABLES.md @@ -2,11 +2,11 @@ |--------------|---------------|-------------|----------------------| | SE_SCREEN_WIDTH | 1920 | Use in Node to set the screen width | | | SE_SCREEN_HEIGHT | 1080 | Use in Node to set the screen height | | -| SE_VIDEO_FILE_NAME | auto | Use in function video recording to set the output file name. Set `auto` for dynamic file name relying on test metadata | | +| SE_VIDEO_FILE_NAME | video.mp4 | Use in function video recording to set the output file name. Set `auto` for dynamic file name relying on test metadata | | | SE_FRAME_RATE | 15 | Set the frame rate for FFmpeg in video recording | | | SE_CODEC | libx264 | Set the codec for FFmpeg in video recording | | | SE_PRESET | -preset ultrafast | Set the preset for FFmpeg in video recording | | -| SE_VIDEO_UPLOAD_ENABLED | false | Enable video upload | | +| SE_VIDEO_UPLOAD_ENABLED | false | Deprecated in event-driven mode (`video_service.py`); upload enablement is now derived from non-empty `SE_UPLOAD_DESTINATION_PREFIX` | | | SE_VIDEO_INTERNAL_UPLOAD | true | Enable video upload using Rclone in the same recorder container | | | SE_UPLOAD_DESTINATION_PREFIX | | Remote name and destination path to upload | | | SE_UPLOAD_PIPE_FILE_NAME | | Set the pipe file name for video upload to consume | | @@ -162,9 +162,9 @@ | SE_BIND_BUS | true | When true, the Standalone will start the Event Bus and connect itself. Standalone also expose publishing and subscribing port for sidecar service can listen on session events. | --bind-bus | | SE_EVENT_BUS_IMPLEMENTATION | | Full class name of non-default event bus implementation. For example: org.openqa.selenium.events.zeromq.ZeroMqEventBus | --events-implementation | | SE_NODE_KUBERNETES_CONFIG_FILENAME | kubernetes.toml | A separate TOML config file name for Dynamic Grid in Kubernetes, which avoid conflict with browser config if shared mouted volume | | -| SE_UPLOAD_FAILURE_SESSION_EVENTS | :failed,:failure | By default, a failure session event type contains ":failed" or ":failure" fires that will trigger the upload failure session only. User can define more event types which handled in your test framework, separated by comma. | | -| SE_UPLOAD_FAILURE_SESSION_ONLY | false | When true, only recording of sessions that are not exited normally (session timed out, or custom events were fired by the client match with failure events defined) | | | SE_VIDEO_EVENT_DRIVEN | true | Backend of video recorder and uploader will subscribe to Grid Event Bus for session event lifecycle for processing instead of traditional polling Node session capabilities via /status endpoint. | | | SE_PLAIN_LOGS | true | Use plain log lines | --plain-logs | | SE_DYNAMIC_MAX_SESSIONS | | Set the number of maximum concurrent sessions of Dynamic Node (both Docker and Kubernetes) | | | SE_DYNAMIC_OVERRIDE_MAX_SESSIONS | | Enable this flag for setting max session take effect in Dynamic Node (both Docker and Kubernetes) | | +| SE_FAILURE_SESSION_EVENTS | :failed,:failure,:error,:aborted | By default, a failure session event type contains ":failed", ":failure", ":error" or ":aborted" substrings that trigger the retain-on-failure sub-sequence. User can define more event types which handled in your test framework, separated by comma. | | +| SE_RETAIN_ON_FAILURE | false | When true, recordings for sessions that pass are discarded immediately. Only sessions that fail (via failure events or abnormal close) retain their recordings and are queued for upload. | | diff --git a/Makefile b/Makefile index f45f11774e..98ed303431 100644 --- a/Makefile +++ b/Makefile @@ -1111,6 +1111,7 @@ test_video: video hub chrome firefox edge chromium echo BASIC_AUTH_PASSWORD=$(or $(BASIC_AUTH_PASSWORD), "admin") >> .env ; \ echo SUB_PATH=$(or $(SUB_PATH), "/selenium") >> .env ; \ echo TEST_ADD_CAPS_RECORD_VIDEO=$(or $(TEST_ADD_CAPS_RECORD_VIDEO), "true") >> .env ; \ + echo TEST_RETAIN_ON_FAILURE=$(or $(TEST_RETAIN_ON_FAILURE), "false") >> .env ; \ if [ $$node = "NodeChrome" ] ; then \ echo BROWSER=chrome >> .env ; \ echo VIDEO_FILE_NAME=$${VIDEO_FILE_NAME:-"chrome_video.mp4"} >> .env ; \ @@ -1230,6 +1231,7 @@ test_node_docker: hub standalone_docker standalone_chrome standalone_firefox sta echo GRID_URL=$(or $(GRID_URL), "") >> .env ; \ echo HUB_CHECKS_INTERVAL=$(or $(HUB_CHECKS_INTERVAL), 20) >> .env ; \ echo TEST_CUSTOM_SPECIFIC_NAME=$(or $(TEST_CUSTOM_SPECIFIC_NAME), "true") >> .env ; \ + echo TEST_RETAIN_ON_FAILURE=$(or $(TEST_RETAIN_ON_FAILURE), "false") >> .env ; \ echo NODE=$$node >> .env ; \ echo UID=$$(id -u) >> .env ; \ echo BINDING_VERSION=$(BINDING_VERSION) >> .env ; \ diff --git a/README.md b/README.md index ac833f66dc..b00a531254 100644 --- a/README.md +++ b/README.md @@ -813,7 +813,7 @@ services: `SE_VIDEO_FILE_NAME=auto` will use the session id as the video file name. This ensures that the video file name is unique to upload. Video file name construction automatically works based on Node endpoint `/status` (and optional GraphQL endpoint) to get session ID, capabilities. -`SE_VIDEO_UPLOAD_ENABLED=true` (`false` by default) will enable the video upload feature. In the background, it will create a pipefile with file and destination for uploader to consume and proceed. +`SE_VIDEO_UPLOAD_ENABLED=true` enables upload in the legacy shell-based mode (`SE_VIDEO_EVENT_DRIVEN=false`). In event-driven mode (the default), this variable is **deprecated** — upload is enabled automatically when `SE_UPLOAD_DESTINATION_PREFIX` is set to a non-empty value. `SE_VIDEO_INTERNAL_UPLOAD=true` (by default) will use RCLONE installed in the container for upload. If you want to use another sidecar container for upload, set it to `false`. @@ -846,6 +846,70 @@ When using in Dynamic Grid, those variables should be combined with the prefix ` | `SE_UPLOAD_CONFIG_FILE_NAME` | `upload.conf` | Config file for remote host instead of set via env variable prefix SE_RCLONE_* | | `SE_UPLOAD_CONFIG_DIRECTORY` | `/opt/bin` | Directory of config file (change it when conf file in another directory is mounted) | +### Retain recordings for failed sessions only + +In event-driven mode (`SE_VIDEO_EVENT_DRIVEN=true`, the default), the video service subscribes to the Grid's ZeroMQ event bus and reacts to session lifecycle events in real time. This enables a **retain-on-failure** strategy: record every session, but automatically discard the video when the session passes and only keep (and upload) recordings from sessions that fail. + +Enable it globally with the environment variable: + +```yaml +SE_RETAIN_ON_FAILURE=true +``` + +A session is treated as **failed** when either of the following is true: + +1. The test code fires a session event whose `eventType` contains a substring from `SE_FAILURE_SESSION_EVENTS` (default: `:failed,:failure,:error,:aborted`). +2. The session closes with an abnormal reason — `TIMEOUT`, `NODE_REMOVED`, or `NODE_RESTARTED` — instead of the normal `QUIT_COMMAND`. + +| Environment variable | Default | Description | +|---------------------------|----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------| +| `SE_RETAIN_ON_FAILURE` | `false` | Discard recordings of sessions that pass. Only recordings from failed sessions are retained on disk and queued for upload. | +| `SE_FAILURE_SESSION_EVENTS` | `:failed,:failure,:error,:aborted` | Comma-separated substrings. Any session event whose `eventType` contains one of these (case-insensitive) marks the session as failed. | + +The `se:retainOnFailure` session **capability** overrides the global container env var for a specific session. For example, to retain the recording of a single session regardless of the global setting: + +```python +options.set_capability('se:retainOnFailure', True) +``` + +| `se:retainOnFailure` cap | `SE_RETAIN_ON_FAILURE` env | Effective behaviour | +|--------------------------|----------------------------|----------------------------------| +| `true` | `false` (default) | Retain on failure for this session | +| `false` | `true` | Always retain for this session | +| absent | `true` | Retain on failure (global default) | +| absent | `false` (default) | Always retain (global default) | + +#### Firing session events from test code + +The [Session Event API](https://www.selenium.dev/blog/2026/selenium-grid-4-41-deep-dive/) lets test code push named events directly to the Grid. The video service listens for these events on the ZeroMQ bus and uses them to determine session failure. + +Call `driver.fire_session_event(eventType, payload)` from your test. Any `eventType` that contains a configured failure substring (e.g. `"test:failed"` contains `":failed"`) marks the session as failed. + +```python +from selenium.webdriver.chrome.options import Options as ChromeOptions +from selenium import webdriver + +options = ChromeOptions() +options.set_capability('se:name', 'checkout_flow') +options.set_capability('se:retainOnFailure', True) # discard video if this session passes + +driver = webdriver.Remote(options=options, command_executor="http://localhost:4444") + +try: + driver.get("https://selenium.dev") + # ... test steps ... +except Exception as exc: + # "test:failed" contains ":failed" — matches the default SE_FAILURE_SESSION_EVENTS + driver.fire_session_event("test:failed", {"error": str(exc)}) + raise +finally: + driver.quit() +``` + +> **Note:** If the test catches an exception and still calls `driver.quit()` normally, the session close reason is `QUIT_COMMAND` (not abnormal). In that case, firing a failure event **before** `quit()` is the only way to mark the session as failed and prevent the recording from being discarded. + +So, you can control the **retain-on-failure** strategy fully from test code via session capabilities and fire session event. + ## Video recordings manager We utilize [File Browser](https://filebrowser.org/) as a video manager. It is a web-based file manager that allows you to manage files and folders in the storage. diff --git a/Video/Dockerfile b/Video/Dockerfile index 4be1ac5d72..9141a5f13d 100644 --- a/Video/Dockerfile +++ b/Video/Dockerfile @@ -52,7 +52,7 @@ ENV DISPLAY_NUM="99" \ SE_VIDEO_UPLOAD_ENABLED="false" \ SE_VIDEO_INTERNAL_UPLOAD="true" \ SE_UPLOAD_DESTINATION_PREFIX="" \ - SE_UPLOAD_FAILURE_SESSION_ONLY="false" \ - SE_UPLOAD_FAILURE_SESSION_EVENTS=":failed,:failure" + SE_RETAIN_ON_FAILURE="false" \ + SE_FAILURE_SESSION_EVENTS=":failed,:failure,:error,:aborted" EXPOSE 9000 diff --git a/Video/video_ready.py b/Video/video_ready.py index 00f13700ab..c4606ed22e 100755 --- a/Video/video_ready.py +++ b/Video/video_ready.py @@ -12,13 +12,25 @@ class Handler(BaseHTTPRequestHandler): def do_GET(self): - if ( - environ.get('SE_VIDEO_UPLOAD_ENABLED', 'false').lower() != 'true' - and environ.get('SE_VIDEO_FILE_NAME', 'video.mp4').lower() != 'auto' - ): - video_ready = "ffmpeg" in (p.name().lower() for p in psutil.process_iter()) - else: + event_driven = environ.get('SE_VIDEO_EVENT_DRIVEN', 'false').lower() == 'true' + record_video_enabled = environ.get('SE_RECORD_VIDEO', 'true').lower() == 'true' + + # Legacy shell mode compatibility: + # when global recording is disabled, report ready immediately. + if not event_driven and not record_video_enabled: video_ready = True + else: + # Event-driven mode enables upload by non-empty destination. + # Legacy shell mode keeps original SE_VIDEO_UPLOAD_ENABLED behaviour. + if event_driven: + upload_enabled = bool(environ.get('SE_UPLOAD_DESTINATION_PREFIX', '').strip()) + else: + upload_enabled = environ.get('SE_VIDEO_UPLOAD_ENABLED', 'false').lower() == 'true' + + if not upload_enabled and environ.get('SE_VIDEO_FILE_NAME', 'video.mp4').lower() != 'auto': + video_ready = "ffmpeg" in (p.name().lower() for p in psutil.process_iter()) + else: + video_ready = True response_code = 200 if video_ready else 404 response_text = "ready" if video_ready else "not ready" self.send_response(response_code) diff --git a/Video/video_service.py b/Video/video_service.py index ecdbad9b56..822e6b3004 100755 --- a/Video/video_service.py +++ b/Video/video_service.py @@ -23,10 +23,11 @@ SE_NODE_PORT: Node port for /status endpoint (default: 5555) SE_SERVER_PROTOCOL: Protocol for Node /status endpoint (default: http) SE_ROUTER_USERNAME, SE_ROUTER_PASSWORD: Optional Basic Auth credentials for Grid endpoints - SE_UPLOAD_FAILURE_SESSION_ONLY: Only upload videos for failed sessions (default: false) + SE_RETAIN_ON_FAILURE: Discard recordings for sessions that pass (default: false) + SE_FAILURE_SESSION_EVENTS: Comma-separated event substrings that mark a session as failed VIDEO_FOLDER: Directory to store video files SE_VIDEO_FILE_NAME: Fixed video file name ("auto" keeps per-session naming) - SE_VIDEO_UPLOAD_ENABLED: Enable video upload (default: false) + SE_UPLOAD_DESTINATION_PREFIX: Remote upload destination prefix; upload is enabled when non-empty SE_SCREEN_WIDTH, SE_SCREEN_HEIGHT: Screen dimensions SE_FRAME_RATE: Video frame rate (default: 15) """ @@ -39,9 +40,7 @@ import re import signal import ssl -import subprocess import sys -from contextlib import asynccontextmanager from dataclasses import dataclass, field from datetime import datetime from enum import Enum, auto @@ -85,8 +84,6 @@ class UploadTask: session_id: str video_file: str destination: str - should_upload: bool - reason: str # Why upload decision was made @dataclass @@ -105,6 +102,7 @@ class SessionState: has_failure_event: bool = False failure_events: List[str] = field(default_factory=list) test_name: str = "" + retain_on_failure: bool = False @property def is_failed(self) -> bool: @@ -153,9 +151,9 @@ def __init__(self): self.record_audio = os.environ.get("SE_RECORD_AUDIO", "false").lower() == "true" self.audio_source = os.environ.get("SE_AUDIO_SOURCE", "") - # Upload configuration - self.upload_enabled = os.environ.get("SE_VIDEO_UPLOAD_ENABLED", "false").lower() == "true" - self.upload_destination = os.environ.get("SE_UPLOAD_DESTINATION_PREFIX", "") + # Upload configuration (enabled when destination prefix is configured) + self.upload_destination = os.environ.get("SE_UPLOAD_DESTINATION_PREFIX", "").strip() + self.upload_enabled = bool(self.upload_destination) self.rclone_config = os.environ.get( "SE_RCLONE_CONFIG", os.environ.get("RCLONE_CONFIG", "/opt/selenium/upload.conf") ) @@ -164,13 +162,13 @@ def __init__(self): self.retain_local = os.environ.get("SE_UPLOAD_RETAIN_LOCAL_FILE", "false").lower() == "true" self.upload_batch_size = int(os.environ.get("SE_VIDEO_UPLOAD_BATCH_CHECK", "10")) self.upload_timeout = int(os.environ.get("SE_VIDEO_UPLOAD_TIMEOUT", "300")) - self.upload_failure_only = os.environ.get("SE_UPLOAD_FAILURE_SESSION_ONLY", "false").lower() == "true" - default_failure_events = [":failure", ":failed"] - custom_failure_events = os.environ.get("SE_UPLOAD_FAILURE_SESSION_EVENTS", "").lower() + self.retain_on_failure_enabled = os.environ.get("SE_RETAIN_ON_FAILURE", "false").lower() == "true" + default_failure_events = [":failure", ":failed", ":error", ":aborted"] + custom_failure_events = os.environ.get("SE_FAILURE_SESSION_EVENTS", "").lower() custom_failure_events_list = [] if custom_failure_events: custom_failure_events_list = [event.strip() for event in custom_failure_events.split(",") if event.strip()] - self.upload_failure_events = list(dict.fromkeys(default_failure_events + custom_failure_events_list)) + self.failure_events = list(dict.fromkeys(default_failure_events + custom_failure_events_list)) # Capability names self.video_cap_name = os.environ.get("VIDEO_CAP_NAME", "se:recordVideo") @@ -198,6 +196,7 @@ def __init__(self): self.se_node_port = os.environ.get("SE_NODE_PORT", default_node_port) self.node_status_verify_ssl = False self.node_poll_interval = int(os.environ.get("SE_VIDEO_POLL_INTERVAL", "2")) + self.file_ready_max_attempts = int(os.environ.get("SE_VIDEO_FILE_READY_WAIT_ATTEMPTS", "10")) # Drain configuration self.max_sessions = int(os.environ.get("SE_DRAIN_AFTER_SESSION_COUNT", "0")) @@ -298,7 +297,7 @@ def get_video_filename(self, session_id: str, capabilities: dict) -> tuple[bool, def is_failure_event_type(self, event_type: str) -> bool: """Check if event type indicates a failure.""" event_lower = event_type.lower() - return any(event in event_lower for event in self.upload_failure_events) + return any(event in event_lower for event in self.failure_events) def is_own_node_event(self, data: dict) -> bool: """Check if an event belongs to this Node. @@ -474,6 +473,7 @@ async def start_recording(self, session: SessionState) -> bool: session.ffmpeg_process = await asyncio.create_subprocess_exec( *cmd, env=env, + stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.PIPE, ) @@ -495,6 +495,45 @@ async def start_recording(self, session: SessionState) -> bool: session.status = SessionStatus.CREATED return False + async def wait_for_file_integrity(self, video_path: Path) -> bool: + """Wait for a recorded video to become readable by ffmpeg.""" + if not video_path.exists(): + logger.warning(f"Video file not found after recording stopped: {video_path}") + return False + + for attempt in range(1, self.file_ready_max_attempts + 1): + proc = await asyncio.create_subprocess_exec( + "ffmpeg", + "-v", + "error", + "-i", + str(video_path), + "-f", + "null", + "-", + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.PIPE, + ) + _, stderr_bytes = await proc.communicate() + if proc.returncode == 0: + return True + + stderr_text = stderr_bytes.decode(errors="replace").strip() + if attempt >= self.file_ready_max_attempts: + logger.warning( + f"Recorded video is not readable after {attempt} attempts: {video_path}; " + f"ffmpeg probe stderr={stderr_text}" + ) + return False + + logger.info( + f"Waiting for recorded video to become readable: {video_path} " + f"(attempt {attempt}/{self.file_ready_max_attempts})" + ) + await asyncio.sleep(self.node_poll_interval) + + return False + async def stop_recording(self, session: SessionState) -> bool: """Stop ffmpeg recording for a session.""" # Claim the process atomically before the first await. Asyncio is @@ -516,13 +555,31 @@ async def stop_recording(self, session: SessionState) -> bool: session.end_time = datetime.now() try: - proc.terminate() try: + graceful_stop_sent = False + if proc.stdin is not None and not proc.stdin.is_closing(): + try: + proc.stdin.write(b"q\n") + await proc.stdin.drain() + proc.stdin.close() + graceful_stop_sent = True + except (BrokenPipeError, ConnectionResetError): + pass + _, stderr_bytes = await asyncio.wait_for(proc.communicate(), timeout=10.0) except asyncio.TimeoutError: - logger.warning(f"ffmpeg did not stop gracefully for {session.session_id}, killing") - proc.kill() - _, stderr_bytes = await proc.communicate() + if graceful_stop_sent: + logger.warning(f"ffmpeg did not stop after quit command for {session.session_id}, terminating") + else: + logger.warning(f"ffmpeg stdin unavailable for {session.session_id}, terminating") + + proc.terminate() + try: + _, stderr_bytes = await asyncio.wait_for(proc.communicate(), timeout=5.0) + except asyncio.TimeoutError: + logger.warning(f"ffmpeg did not stop gracefully for {session.session_id}, killing") + proc.kill() + _, stderr_bytes = await proc.communicate() rc = proc.returncode if stderr_bytes: @@ -536,6 +593,12 @@ async def stop_recording(self, session: SessionState) -> bool: session.status = SessionStatus.CLOSED return False + if session.video_file: + video_path = Path(self.video_folder) / session.video_file + if not await self.wait_for_file_integrity(video_path): + session.status = SessionStatus.CLOSED + return False + self.recorded_count += 1 session.status = SessionStatus.CLOSED duration = session.duration_seconds @@ -557,44 +620,26 @@ async def queue_upload(self, session: SessionState) -> None: if not self.upload_enabled or not self.upload_destination: return + if not session.video_file: + return + video_path = f"{self.video_folder}/{session.video_file}" if not Path(video_path).exists(): logger.warning(f"Video file not found: {video_path}") return - # Determine if we should upload - should_upload = True - reason = "normal upload" - - if self.upload_failure_only: - if session.is_failed: - should_upload = True - if session.has_failure_event: - reason = f"failure event: {', '.join(session.failure_events)}" - else: - reason = f"abnormal close: {session.close_reason.value}" - else: - should_upload = False - reason = "session ended normally (SE_UPLOAD_FAILURE_SESSION_ONLY=true)" - task = UploadTask( session_id=session.session_id, video_file=video_path, destination=self.upload_destination, - should_upload=should_upload, - reason=reason, ) await self.upload_queue.put(task) - logger.debug(f"Queued upload task: {session.session_id}, should_upload={should_upload}") + logger.debug(f"Queued upload task: {session.session_id}") async def process_upload(self, task: UploadTask) -> None: """Process a single upload task.""" - if not task.should_upload: - logger.info(f"Skipping upload: {task.video_file} - {task.reason}") - return - - logger.info(f"Uploading: {task.video_file} -> {task.destination} ({task.reason})") + logger.info(f"Uploading: {task.video_file} -> {task.destination}") cmd = [ "rclone", @@ -705,17 +750,27 @@ async def handle_session_created(self, data: dict) -> None: capabilities = data.get("capabilities", {}) record_video, video_filename = self.get_video_filename(session_id, capabilities) + retain_on_failure_cap = capabilities.get("se:retainOnFailure", None) + if retain_on_failure_cap is None: + retain_on_failure = self.retain_on_failure_enabled + else: + retain_on_failure = str(retain_on_failure_cap).lower() == "true" + async with self.sessions_lock: session = SessionState( session_id=session_id, capabilities=capabilities, video_file=video_filename, record_video=record_video, + retain_on_failure=retain_on_failure, test_name=capabilities.get(self.test_name_cap, ""), ) self.sessions[session_id] = session - logger.info(f"Session created: {session_id}, record={record_video}, file={video_filename}") + logger.info( + f"Session created: {session_id}, record={record_video}, " + f"retain_on_failure={retain_on_failure}, file={video_filename}" + ) if record_video: await self.start_recording(session) @@ -753,7 +808,18 @@ async def handle_session_closed(self, data: dict) -> None: if session.ffmpeg_process is not None: stopped = await self.stop_recording(session) if stopped: - await self.queue_upload(session) + discard = session.retain_on_failure and not session.is_failed + if discard: + if session.video_file: + video_path = Path(self.video_folder) / session.video_file + if video_path.exists(): + try: + video_path.unlink() + logger.info(f"Video discarded for successful session {session_id} (retain-on-failure)") + except Exception as exc: + logger.warning(f"Failed to delete video file {video_path}: {exc}") + else: + await self.queue_upload(session) else: logger.warning(f"Recording stop failed for {session_id}, skipping upload") @@ -960,7 +1026,9 @@ async def cleanup(self) -> None: logger.info(f"Stopping recording: {session.session_id}") stopped = await self.stop_recording(session) if stopped: - await self.queue_upload(session) + discard = session.retain_on_failure and not session.is_failed + if not discard: + await self.queue_upload(session) else: logger.warning(f"Recording stop failed for {session.session_id}, skipping upload") @@ -983,8 +1051,8 @@ async def run(self) -> None: logger.info(f" Video size: {self.video_size}") logger.info(f" Upload enabled: {self.upload_enabled}") logger.info(f" Upload destination: {self.upload_destination}") - logger.info(f" Upload failure only: {self.upload_failure_only}") - logger.info(f" Upload failure events: {self.upload_failure_events}") + logger.info(f" Retain on failure: {self.retain_on_failure_enabled}") + logger.info(f" Failure events: {self.failure_events}") logger.info(f" Max sessions (drain): {self.max_sessions if self.max_sessions > 0 else 'unlimited'}") # Validate video folder diff --git a/scripts/generate_list_env_vars/description.yaml b/scripts/generate_list_env_vars/description.yaml index 3b58c7ede6..e8b6e9b2c4 100644 --- a/scripts/generate_list_env_vars/description.yaml +++ b/scripts/generate_list_env_vars/description.yaml @@ -18,7 +18,8 @@ description: Set the preset for FFmpeg in video recording cli: '' - name: SE_VIDEO_UPLOAD_ENABLED - description: Enable video upload + description: Deprecated in event-driven mode (`video_service.py`); upload enablement + is now derived from non-empty `SE_UPLOAD_DESTINATION_PREFIX` cli: '' - name: SE_VIDEO_INTERNAL_UPLOAD description: Enable video upload using Rclone in the same recorder container @@ -526,16 +527,6 @@ description: A separate TOML config file name for Dynamic Grid in Kubernetes, which avoid conflict with browser config if shared mouted volume cli: '' -- name: SE_UPLOAD_FAILURE_SESSION_EVENTS - description: By default, a failure session event type contains ":failed" or ":failure" - fires that will trigger the upload failure session only. User can define more - event types which handled in your test framework, separated by comma. - cli: '' -- name: SE_UPLOAD_FAILURE_SESSION_ONLY - description: When true, only recording of sessions that are not exited normally - (session timed out, or custom events were fired by the client match with failure - events defined) - cli: '' - name: SE_VIDEO_EVENT_DRIVEN description: Backend of video recorder and uploader will subscribe to Grid Event Bus for session event lifecycle for processing instead of traditional polling @@ -552,3 +543,14 @@ description: Enable this flag for setting max session take effect in Dynamic Node (both Docker and Kubernetes) cli: '' +- name: SE_FAILURE_SESSION_EVENTS + description: By default, a failure session event type contains ":failed", ":failure", + ":error" or ":aborted" substrings that trigger the retain-on-failure sub-sequence. + User can define more event types which handled in your test framework, separated + by comma. + cli: '' +- name: SE_RETAIN_ON_FAILURE + description: When true, recordings for sessions that pass are discarded immediately. + Only sessions that fail (via failure events or abnormal close) retain their recordings + and are queued for upload. + cli: '' diff --git a/scripts/generate_list_env_vars/value.yaml b/scripts/generate_list_env_vars/value.yaml index 583813e72f..9f360bd162 100644 --- a/scripts/generate_list_env_vars/value.yaml +++ b/scripts/generate_list_env_vars/value.yaml @@ -50,6 +50,8 @@ default: '' - name: SE_EXTRA_LIBS default: '' +- name: SE_FAILURE_SESSION_EVENTS + default: :failed,:failure,:error,:aborted - name: SE_FFMPEG_THREADS default: '' - name: SE_FRAME_RATE @@ -202,6 +204,8 @@ default: 'false' - name: SE_RELAX_CHECKS default: 'true' +- name: SE_RETAIN_ON_FAILURE + default: 'false' - name: SE_ROUTER_HOST default: '' - name: SE_ROUTER_PASSWORD @@ -278,10 +282,6 @@ default: '' - name: SE_UPLOAD_DESTINATION_PREFIX default: '' -- name: SE_UPLOAD_FAILURE_SESSION_EVENTS - default: :failed,:failure -- name: SE_UPLOAD_FAILURE_SESSION_ONLY - default: 'false' - name: SE_UPLOAD_OPTS default: '' - name: SE_UPLOAD_PIPE_FILE_NAME @@ -297,7 +297,7 @@ - name: SE_VIDEO_EVENT_DRIVEN default: 'true' - name: SE_VIDEO_FILE_NAME - default: auto + default: video.mp4 - name: SE_VIDEO_FILE_NAME_SUFFIX default: 'true' - name: SE_VIDEO_FILE_NAME_TRIM_REGEX diff --git a/tests/SeleniumTests/__init__.py b/tests/SeleniumTests/__init__.py index b931d3c8bb..050229895f 100644 --- a/tests/SeleniumTests/__init__.py +++ b/tests/SeleniumTests/__init__.py @@ -31,6 +31,7 @@ TEST_PLATFORMS = os.environ.get('TEST_PLATFORMS', 'linux/amd64') TEST_FIREFOX_INSTALL_LANG_PACKAGE = os.environ.get('TEST_FIREFOX_INSTALL_LANG_PACKAGE', 'false').lower() == 'true' TEST_ADD_CAPS_RECORD_VIDEO = os.environ.get('TEST_ADD_CAPS_RECORD_VIDEO', 'true').lower() == 'true' +TEST_RETAIN_ON_FAILURE = os.environ.get('TEST_RETAIN_ON_FAILURE', 'false').lower() == 'true' TEST_CUSTOM_SPECIFIC_NAME = os.environ.get('TEST_CUSTOM_SPECIFIC_NAME', 'false').lower() == 'true' TEST_MULTIPLE_VERSIONS = os.environ.get('TEST_MULTIPLE_VERSIONS', 'false').lower() == 'true' TEST_MULTIPLE_PLATFORMS = os.environ.get('TEST_MULTIPLE_PLATFORMS', 'false').lower() == 'true' @@ -139,6 +140,11 @@ def tearDown(self): try: if TEST_DELAY_AFTER_TEST: time.sleep(TEST_DELAY_AFTER_TEST) + if TEST_RETAIN_ON_FAILURE: + self.driver.fire_session_event( + "test:failed", + {"test": f"{self._testMethodName} ({self.__class__.__name__})"}, + ) self.driver.quit() except Exception as e: print(f"::error::Exception: {str(e)}") @@ -155,6 +161,8 @@ def setUp(self): options.add_argument('disable-features=DownloadBubble,DownloadBubbleV2') if TEST_ADD_CAPS_RECORD_VIDEO: options.set_capability('se:recordVideo', True) + if TEST_RETAIN_ON_FAILURE: + options.set_capability('se:retainOnFailure', True) if TEST_CUSTOM_SPECIFIC_NAME: options.set_capability('myApp:version', 'beta') options.set_capability('myApp:publish', 'internal') @@ -215,6 +223,8 @@ def setUp(self): options.add_argument('disable-features=DownloadBubble,DownloadBubbleV2') if TEST_ADD_CAPS_RECORD_VIDEO: options.set_capability('se:recordVideo', True) + if TEST_RETAIN_ON_FAILURE: + options.set_capability('se:retainOnFailure', True) if TEST_CUSTOM_SPECIFIC_NAME: options.set_capability('myApp:version', 'beta') options.set_capability('myApp:publish', 'internal') @@ -269,6 +279,8 @@ def setUp(self): options.profile = profile if TEST_ADD_CAPS_RECORD_VIDEO: options.set_capability('se:recordVideo', True) + if TEST_RETAIN_ON_FAILURE: + options.set_capability('se:retainOnFailure', True) if TEST_CUSTOM_SPECIFIC_NAME: options.set_capability('myApp:version', 'beta') options.set_capability('myApp:publish', 'internal') diff --git a/tests/docker-compose-v3-event-driven-arm64.yml b/tests/docker-compose-v3-event-driven-arm64.yml index 6e3e890a67..1cc811fb3f 100644 --- a/tests/docker-compose-v3-event-driven-arm64.yml +++ b/tests/docker-compose-v3-event-driven-arm64.yml @@ -37,8 +37,7 @@ services: environment: - SE_EVENT_BUS_HOST=selenium-hub - SE_RECORD_VIDEO=true - - SE_VIDEO_UPLOAD_ENABLED=true - - SE_UPLOAD_FAILURE_SESSION_ONLY=true + - SE_RETAIN_ON_FAILURE=true # Remote name and destination path to upload - SE_UPLOAD_DESTINATION_PREFIX=myftp://ftp/seluser # All configs required for RCLONE to upload to remote name myftp @@ -64,8 +63,7 @@ services: environment: - SE_EVENT_BUS_HOST=selenium-hub - SE_RECORD_VIDEO=true - - SE_VIDEO_UPLOAD_ENABLED=true - - SE_UPLOAD_FAILURE_SESSION_ONLY=true + - SE_RETAIN_ON_FAILURE=true # Remote name and destination path to upload - SE_UPLOAD_DESTINATION_PREFIX=myftp://ftp/seluser # All configs required for RCLONE to upload to remote name myftp