Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions scripts/check_banned_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@
"skylight-cli click --element-index is unstable"),
(re.compile(r'subprocess\.Popen.*Chrome.*(?!remote-allow-origins=\\"\*\\")'),
'Chrome MUST be launched with --remote-allow-origins="*" (with quotes!)'),
# SR-187: datetime.utcnow() is deprecated in Python 3.12 (DeprecationWarning)
# and slated for removal in 3.14. It returns a NAIVE datetime — comparing
# naive against UTC-aware DB timestamps silently mis-orders. Use
# `datetime.now(timezone.utc)` instead. See issue #186 for migration log.
(re.compile(r'\bdatetime\.utcnow\s*\('),
'datetime.utcnow() returns NAIVE dt; use datetime.now(timezone.utc) (SR-187)'),
]

ROOT = Path(__file__).resolve().parent.parent
Expand Down
83 changes: 83 additions & 0 deletions scripts/tests/test_check_banned_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,89 @@ def test_real_webauto_call_IS_flagged(self) -> None:
self.assertEqual(cbp.scan_file(f), [])


class UtcnowBanTests(unittest.TestCase):
"""SR-187 (issue #186): `datetime.utcnow()` is banned.

- POSITIVE: a real call must be flagged (so CI blocks regressions).
- NEGATIVE: the same token in a docstring/comment must NOT be flagged
(this file itself documents `datetime.utcnow()` in comments).
"""

def _write(self, tmp_path: Path, name: str, content: str) -> Path:
p = tmp_path / name
p.write_text(content)
return p

def test_real_utcnow_call_IS_flagged(self) -> None:
import tempfile
with tempfile.TemporaryDirectory() as td:
f = self._write(Path(td), "bad_dt.py", (
'from datetime import datetime\n'
'now = datetime.utcnow()\n'
))
hits = cbp.scan_file(f)
self.assertEqual(len(hits), 1,
f"datetime.utcnow() at column-zero must be flagged; got {hits}")
line_no, reason, _snippet = hits[0]
self.assertEqual(line_no, 2)
self.assertIn("SR-187", reason)

def test_utcnow_chained_call_IS_flagged(self) -> None:
# The real production sites used `datetime.utcnow().isoformat()`.
# The `\b…(` anchor must still match because `\(` matches the
# opening paren of `utcnow()`, not the chained `.isoformat`.
import tempfile
with tempfile.TemporaryDirectory() as td:
f = self._write(Path(td), "bad_dt_chain.py", (
'from datetime import datetime\n'
'ts = datetime.utcnow().isoformat() + "Z"\n'
))
hits = cbp.scan_file(f)
self.assertEqual(len(hits), 1,
f"chained datetime.utcnow().isoformat() must be flagged; got {hits}")

def test_utcnow_in_docstring_is_NOT_flagged(self) -> None:
# The pattern lives in EXECUTABLE code; docstrings are masked. This
# mirrors the SR-60 contract for every other banned pattern.
import tempfile
with tempfile.TemporaryDirectory() as td:
f = self._write(Path(td), "doc_dt.py", (
'"""Migration note: replaced datetime.utcnow() with\n'
'datetime.now(timezone.utc) per SR-187.\n'
'"""\n'
'value = 42\n'
))
self.assertEqual(cbp.scan_file(f), [],
"datetime.utcnow() in docstring must NOT be flagged")

def test_utcnow_in_comment_is_NOT_flagged(self) -> None:
import tempfile
with tempfile.TemporaryDirectory() as td:
f = self._write(Path(td), "comment_dt.py", (
'# SR-187: replaced datetime.utcnow() with timezone.utc form\n'
'from datetime import datetime, timezone\n'
'now = datetime.now(timezone.utc)\n'
))
self.assertEqual(cbp.scan_file(f), [],
"datetime.utcnow() in comment must NOT be flagged")

def test_substring_match_is_NOT_flagged(self) -> None:
# `\b` boundary: `not_datetime.utcnow(` should NOT match because
# the rule targets `datetime.utcnow(` after a word boundary.
# `my_datetime.utcnow(` is a (theoretical) custom class; we
# accept the false-negative there. What we MUST not do is fire
# on an unrelated `xutcnow(` token.
import tempfile
with tempfile.TemporaryDirectory() as td:
f = self._write(Path(td), "noisy.py", (
'def xutcnow():\n'
' return 1\n'
'xutcnow()\n'
))
self.assertEqual(cbp.scan_file(f), [],
"bare `xutcnow()` must NOT be flagged")


class SelfScanTests(unittest.TestCase):
"""The script's OWN documentation includes BANNED tokens (this is the
whole point of SR-60). Scanning the live `check_banned_patterns.py`
Expand Down
30 changes: 17 additions & 13 deletions survey-cli/commands/answer_survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,10 @@
import json
import os
import subprocess
import time
import hashlib
from datetime import datetime
from typing import Dict
from datetime import datetime, timezone
from typing import Dict, Any, List, Optional

# ═════════════════════════════════════════════════════════════════════════════
# IMPORTS — vorhandene tools nutzen
Expand All @@ -60,7 +61,8 @@
import sys
sys.path.insert(0, sys_path)

from tools.tool_snapshot import EXTRACTOR_JS
from tools.tool_snapshot import EXTRACTOR_JS, snapshot, find_submit, find_unfilled
from survey.completion_detector import CompletionDetector
from survey.session_validator import validate_session

CHROME_PORT = 9999
Expand Down Expand Up @@ -216,8 +218,7 @@ async def solve_captcha(ws) -> str:
Model: meta/llama-3.2-90b-vision-instruct (NVIDIA NIM)
API: https://integrate.api.nvidia.com/v1/chat/completions
"""
import base64
import urllib.request
import os, base64, urllib.request

# Get page text to detect captcha type
page_text = await cdp_execute_js(ws, 100, "document.body.innerText.substring(0, 500)")
Expand Down Expand Up @@ -372,6 +373,7 @@ async def solve_drag_drop(ws) -> str:
Uses CDP Input.dispatchMouseEvent (Approach B) — REAL browser-level mouse events
trigger Angular CDK's pointer event handlers. Verified working 2026-05-10.
"""
import time

# Extract target number
number = await cdp_execute_js(ws, 200, """
Expand Down Expand Up @@ -476,7 +478,7 @@ async def solve_drag_drop(ws) -> str:
""")
await asyncio.sleep(3)
return f"DRAG_SOLVED:{number}->btn_enabled"
return "DRAG_FAILED:btn_disabled"
return f"DRAG_FAILED:btn_disabled"
except:
return f"DRAG_VERIFY_ERROR:{verify}"

Expand Down Expand Up @@ -529,7 +531,7 @@ def detect_page_type(snap: Dict) -> str:
"unknown" → Nicht erkannt
"""
body = (snap.get("bodyText", "") or "").lower()
(snap.get("url", "") or "").lower()
url = (snap.get("url", "") or "").lower()

# Screen-Out Keywords
for kw in ["umfrage passt nicht", "leider", "nicht geeignet", "vorzeitig beendet",
Expand Down Expand Up @@ -763,14 +765,14 @@ async def answer_survey(survey_ws_url: str, max_pages: int = MAX_PAGES) -> Dict:
}
"""
print(f"\n{'='*60}")
print(" ANSWER_SURVEY — Manual Testing Mode")
print(f" ANSWER_SURVEY — Manual Testing Mode")
print(f"{'='*60}")
print(f" Survey Tab WS: {survey_ws_url[:60]}...")

# Pre-Flight: Session validieren
if not validate_session(CHROME_PORT):
return {"status": "error", "reason": "Session invalid — cookies expired?"}
print(" [PREFLIGHT] Session: OK")
print(f" [PREFLIGHT] Session: OK")

# Import websockets here
import websockets
Expand Down Expand Up @@ -830,7 +832,7 @@ async def answer_survey(survey_ws_url: str, max_pages: int = MAX_PAGES) -> Dict:
break

# 4. Submit (Next / Continue / Submit)
print(" [SUBMIT] Clicking submit button...")
print(f" [SUBMIT] Clicking submit button...")
submit_result = await click_submit(ws)
print(f" [SUBMIT] {submit_result}")
results["actions"].append(f"SUBMIT:{submit_result}")
Expand Down Expand Up @@ -865,7 +867,9 @@ def _update_registry(command_id: str, success: bool, result: Dict):
except:
registry = {"version": "1.0.0", "commands": []}

now = datetime.utcnow().isoformat() + "Z"
# SR-187: UTC-aware (naive utcnow() is deprecated in Py 3.12, gone in 3.14).
# Keep historical "Z" suffix for command_registry.json wire-format stability.
now = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")

# Find or create command entry
found = False
Expand Down Expand Up @@ -906,7 +910,7 @@ def _update_registry(command_id: str, success: bool, result: Dict):

# ═════════════════════════════════════════════════════════════════════════════
# CLI — MANUELLE TESTING ONLY!
# ════════════════════════════════════════════════════════════════════════════
# ═════════════════════════════════════════════���═══════════════════════════════

if __name__ == "__main__":
if len(sys.argv) < 2:
Expand Down Expand Up @@ -953,4 +957,4 @@ def _update_registry(command_id: str, success: bool, result: Dict):
elif result["status"] == "max_pages":
print(" → Max Seiten erreicht (30). Timeout.")
else:
print(f" → Unbekannter Status: {result.get('status')}")
print(f" → Unbekannter Status: {result.get('status')}")
11 changes: 8 additions & 3 deletions survey-cli/survey/captcha/fallback_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
import logging
import time
from dataclasses import dataclass, field
from datetime import datetime
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Callable, Optional

Expand Down Expand Up @@ -186,15 +186,20 @@ def _log_human_handoff(
Pfad zur Log-Datei
"""
logs_dir = _ensure_logs_dir()
date_str = datetime.utcnow().strftime("%Y%m%d")
# SR-187: UTC-aware datetimes (naive utcnow() is deprecated in Py 3.12,
# removed in 3.14; comparing naive against tz-aware silently mis-orders).
now_utc = datetime.now(timezone.utc)
date_str = now_utc.strftime("%Y%m%d")
log_path = logs_dir / f"captcha-failures-{date_str}.jsonl"

# Capture screenshot
screenshot_b64 = _capture_screenshot_b64(cdp)

# Build log entry
entry = {
"timestamp": datetime.utcnow().isoformat() + "Z",
# SR-187: isoformat() on tz-aware dt emits "+00:00"; we keep the
# historical "Z" suffix for jsonl-consumer compatibility.
"timestamp": now_utc.isoformat().replace("+00:00", "Z"),
"detected_type": detection.captcha_type,
"page_url": page_url,
"frame_id": detection.frame_id,
Expand Down
5 changes: 3 additions & 2 deletions survey-cli/survey/daemon/answer_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import re
import sqlite3
from dataclasses import dataclass, field
from datetime import datetime
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

Expand Down Expand Up @@ -1002,7 +1002,8 @@ def _store_answer(self, question: Question, answer: Answer) -> None:
question.text[:500],
json.dumps(answer.value),
self._hash_persona(),
datetime.utcnow().isoformat(),
# SR-187: UTC-aware (naive utcnow() is deprecated in Py 3.12).
datetime.now(timezone.utc).isoformat(),
),
)
conn.commit()
Expand Down
11 changes: 7 additions & 4 deletions survey-cli/survey/daemon/survey_agent_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import json
import logging
import sqlite3
from datetime import datetime
from datetime import datetime, timezone
from enum import Enum
from pathlib import Path
from typing import TypedDict
Expand Down Expand Up @@ -199,7 +199,8 @@ async def _navigate(self, state: AgentState) -> AgentState:
"""Navigate to survey URL."""
logger.info(f"Navigating to: {state['survey_url']}")
state["status"] = SurveyStatus.NAVIGATING.value
state["started_at"] = datetime.utcnow().isoformat()
# SR-187: UTC-aware (naive utcnow() is deprecated in Py 3.12).
state["started_at"] = datetime.now(timezone.utc).isoformat()

if not self._browser:
self._browser = BrowserDriver(headless=self.headless)
Expand Down Expand Up @@ -266,7 +267,8 @@ async def _check_status(self, state: AgentState) -> AgentState:
for pattern in complete_patterns:
if pattern in html:
state["status"] = SurveyStatus.COMPLETED.value
state["completed_at"] = datetime.utcnow().isoformat()
# SR-187: UTC-aware (naive utcnow() is deprecated in Py 3.12).
state["completed_at"] = datetime.now(timezone.utc).isoformat()
return state

# Check for captcha
Expand Down Expand Up @@ -582,7 +584,8 @@ async def _complete(self, state: AgentState) -> AgentState:
"""Handle successful completion."""
logger.info("Survey completed successfully")
state["status"] = SurveyStatus.COMPLETED.value
state["completed_at"] = datetime.utcnow().isoformat()
# SR-187: UTC-aware (naive utcnow() is deprecated in Py 3.12).
state["completed_at"] = datetime.now(timezone.utc).isoformat()

self._save_state(state)

Expand Down