Skip to content

Commit 62c4d1a

Browse files
Refactor RAI error message validation in BIABPage; improve logging and handling of ambiguous states
1 parent 78d2196 commit 62c4d1a

1 file changed

Lines changed: 43 additions & 33 deletions

File tree

tests/e2e-test/pages/HomePage.py

Lines changed: 43 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -864,13 +864,13 @@ def input_rai_prompt_and_send(self, prompt_text):
864864
logger.info("✓ Send button clicked")
865865

866866
def validate_rai_error_message(self):
867-
"""Validate that the RAI 'Unable to create plan' error message is visible."""
867+
"""Validate that RAI blocked the prompt by checking for error messages."""
868868
logger.info("Validating RAI error response...")
869869

870870
# Wait a bit for system to process the request
871871
self.page.wait_for_timeout(3000)
872872

873-
# Check for various possible error messages or states
873+
# Check for various possible error messages that indicate RAI blocking
874874
possible_error_locators = [
875875
self.UNABLE_TO_CREATE_PLAN,
876876
"//span[contains(text(), 'Unable')]",
@@ -880,47 +880,57 @@ def validate_rai_error_message(self):
880880
"//p[contains(text(), 'Unable')]"
881881
]
882882

883-
error_found = False
883+
error_message_found = False
884884
for locator in possible_error_locators:
885885
try:
886886
if self.page.locator(locator).first.is_visible(timeout=5000):
887887
logger.info(f"✓ RAI error message found with locator: {locator}")
888-
error_found = True
888+
error_message_found = True
889889
break
890890
except Exception:
891891
continue
892892

893-
if not error_found:
894-
# Try to confirm plan creation started (to rule out silent acceptance)
895-
# Wait briefly to see if plan creation becomes visible
896-
try:
897-
# If plan creation becomes visible, the input was accepted (not blocked by RAI)
898-
if self.page.locator(self.CREATING_PLAN).is_visible(timeout=3000):
899-
logger.error("✗ Plan creation started - RAI did not block the prompt as expected")
900-
error_found = False # This is actually a failure case
901-
else:
902-
# Plan creation didn't start within timeout - likely rejected
903-
logger.info("✓ Plan creation did not start - input appears to have been rejected")
904-
error_found = True
905-
except Exception as e:
906-
# If we can't determine, treat as ambiguous but log it
907-
logger.warning("⚠ Could not verify CREATING_PLAN state: %s - assuming rejection", e)
908-
error_found = True
893+
# If we found an explicit error message, RAI successfully blocked
894+
if error_message_found:
895+
logger.info("✓ RAI successfully blocked the prompt with an error message")
896+
return
909897

910-
if not error_found:
911-
logger.error("✗ No RAI error or rejection state detected; prompt appears to have been accepted unexpectedly")
912-
# Take a screenshot for investigation before failing the test
913-
try:
914-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
915-
screenshots_dir = os.path.join(os.path.dirname(__file__), "..", "tests", "screenshots")
916-
os.makedirs(screenshots_dir, exist_ok=True)
917-
screenshot_path = os.path.join(screenshots_dir, f"rai_validation_failed_{timestamp}.png")
918-
self.page.screenshot(path=screenshot_path)
919-
logger.info(f"Screenshot captured for investigation: {screenshot_path}")
920-
except Exception as e:
921-
logger.warning("Failed to capture screenshot when RAI validation failed: %s", e)
898+
# No explicit error message found - check if plan creation started (would indicate RAI failed)
899+
logger.info("No explicit error message found - checking if plan creation started...")
900+
try:
901+
# Wait briefly to see if plan creation becomes visible
902+
# If it does, RAI failed to block the prompt
903+
if self.page.locator(self.CREATING_PLAN).is_visible(timeout=3000):
904+
logger.error("✗ Plan creation started - RAI did not block the prompt as expected")
905+
# Take a screenshot before failing
906+
try:
907+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
908+
screenshots_dir = os.path.join(os.path.dirname(__file__), "..", "tests", "screenshots")
909+
os.makedirs(screenshots_dir, exist_ok=True)
910+
screenshot_path = os.path.join(screenshots_dir, f"rai_validation_failed_{timestamp}.png")
911+
self.page.screenshot(path=screenshot_path)
912+
logger.info(f"Screenshot captured: {screenshot_path}")
913+
except Exception as e:
914+
logger.warning("Failed to capture screenshot: %s", e)
915+
raise AssertionError(
916+
"RAI validation failed: Plan creation started, indicating the prompt was not blocked by RAI"
917+
)
918+
else:
919+
# Plan creation didn't become visible - this could mean:
920+
# 1. RAI blocked it (good)
921+
# 2. Plan creation started and finished before we checked (bad - false positive)
922+
# Without an explicit error message, we can't be certain, so fail the test
923+
logger.error("✗ No explicit error message and no visible plan creation - ambiguous state")
924+
raise AssertionError(
925+
"RAI validation failed: No explicit error message found. Cannot confirm RAI blocked the prompt."
926+
)
927+
except AssertionError:
928+
# Re-raise assertion errors
929+
raise
930+
except Exception as e:
931+
logger.error("✗ Exception while checking CREATING_PLAN: %s", e)
922932
raise AssertionError(
923-
"Expected RAI to block the prompt, but no error message or rejection state was detected."
933+
f"RAI validation failed: Could not verify plan creation state: {e}"
924934
)
925935

926936
def validate_rai_clarification_error_message(self):

0 commit comments

Comments
 (0)