Skip to content

Commit 25ff373

Browse files
author
Mouseback
committed
Configure frontend build gating, type safety, component smoke tests, and local LLM runtime integration
1 parent dd2a84e commit 25ff373

43 files changed

Lines changed: 3316 additions & 196 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,14 @@ jobs:
8585
working-directory: studio/frontend
8686
run: npm run lint
8787

88+
- name: Run type-check
89+
working-directory: studio/frontend
90+
run: npm run type-check
91+
92+
- name: Run test
93+
working-directory: studio/frontend
94+
run: npm run test
95+
8896
- name: Run build
8997
working-directory: studio/frontend
9098
run: npm run build

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ dependencies = [
3636
"uvicorn",
3737
"python-dotenv",
3838
"aiohttp",
39-
"pillow"
39+
"pillow",
40+
"anthropic>=0.39.0",
41+
"httpx>=0.27.0"
4042
]
4143

4244
[project.scripts]

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ uvicorn
2222
python-dotenv
2323
aiohttp
2424
pillow
25+
anthropic>=0.39.0
26+
httpx>=0.27.0

scrapewizard/cli/commands/setup.py

Lines changed: 139 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -26,37 +26,148 @@ def setup(
2626
default=current_config.get("provider", "openai")
2727
).execute()
2828

29-
if not api_key:
30-
# Check if we already have one
31-
existing_key = current_config.get("api_key", "")
32-
key_masked = f"{existing_key[:4]}...{existing_key[-4:]}" if len(existing_key) > 8 else "********" if existing_key else ""
29+
if provider == "local":
30+
import shutil
31+
from scrapewizard.llm.local_runtime import LocalRuntime
3332

34-
api_key = inquirer.text(
35-
message=f"Enter API Key (Current: {key_masked}):",
36-
default=existing_key,
37-
validate=lambda result: len(result) > 0 or "API Key cannot be empty"
38-
).execute()
39-
40-
if not model:
41-
default_models = {
42-
"openai": "gpt-4-turbo",
43-
"anthropic": "claude-3-5-sonnet-20240620",
44-
"openrouter": "google/gemini-pro",
45-
"local": "llama3"
33+
ollama_installed = shutil.which("ollama") is not None
34+
if not ollama_installed:
35+
print("⚠️ [yellow]Warning: 'ollama' executable not found on system PATH. Please ensure Ollama is installed.[/yellow]")
36+
37+
local_base_url = current_config.get("local_base_url", "http://localhost:11434")
38+
if not model:
39+
local_base_url = inquirer.text(
40+
message="Enter Ollama Base URL:",
41+
default=local_base_url
42+
).execute()
43+
44+
runtime = LocalRuntime(base_url=local_base_url)
45+
daemon_status = runtime.check_daemon()
46+
47+
if not daemon_status.running and not model:
48+
print("❌ [red]Error: Ollama daemon is not running at configured URL.[/red]")
49+
if not inquirer.confirm(message="Ollama daemon is down. Proceed anyway?", default=False).execute():
50+
log("Setup aborted.")
51+
return
52+
53+
# Detect hardware
54+
hw = runtime.detect_hardware()
55+
if not model:
56+
print(f"🖥️ [cyan]Hardware detected:[/cyan] {hw['ram_gb']} GB RAM, GPU: {hw['gpu_name']}")
57+
print(f"📦 Suggested performance tier: [green]{hw['tier'].upper()}[/green]")
58+
59+
recommended = runtime.recommend_model(hw['tier'])
60+
61+
selected_model = model
62+
if not selected_model:
63+
installed = runtime.list_models()
64+
if installed:
65+
print("Installed models:")
66+
for m in installed:
67+
print(f" • {m}")
68+
else:
69+
print("No models found in Ollama.")
70+
71+
choices = installed.copy()
72+
if recommended not in choices:
73+
choices.append(recommended)
74+
choices.append("Other (enter custom name)")
75+
76+
selected_model = inquirer.select(
77+
message="Select Ollama model:",
78+
choices=choices,
79+
default=recommended if recommended in choices else (installed[0] if installed else choices[0])
80+
).execute()
81+
82+
if selected_model == "Other (enter custom name)":
83+
selected_model = inquirer.text(
84+
message="Enter custom model name:",
85+
default=recommended
86+
).execute()
87+
88+
if selected_model not in installed:
89+
if inquirer.confirm(message=f"Model '{selected_model}' is not downloaded. Pull it now?", default=True).execute():
90+
print(f"Downloading '{selected_model}' via Ollama. Please wait...")
91+
92+
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, DownloadColumn
93+
with Progress(
94+
SpinnerColumn(),
95+
TextColumn("[progress.description]{task.description}"),
96+
BarColumn(),
97+
DownloadColumn(),
98+
) as progress:
99+
task = progress.add_task(f"Pulling {selected_model}...", total=100)
100+
101+
def callback(data):
102+
status = data.get("status", "")
103+
total = data.get("total", 0)
104+
completed = data.get("completed", 0)
105+
if total > 0:
106+
progress.update(task, completed=completed, total=total, description=f"Pulling {selected_model}: {status}")
107+
else:
108+
progress.update(task, description=f"Pulling {selected_model}: {status}")
109+
110+
success = runtime.pull_model(selected_model, callback)
111+
if success:
112+
print(f"✅ Model '{selected_model}' pulled successfully.")
113+
else:
114+
print(f"❌ Failed to pull model '{selected_model}'.")
115+
116+
# Probe model latency
117+
if daemon_status.running and not model:
118+
print("Probing model response latency...")
119+
probe_res = runtime.probe(selected_model)
120+
if probe_res.success:
121+
print(f"✅ Connection successful! Probe latency: [green]{probe_res.latency}s[/green]")
122+
else:
123+
print(f"⚠️ Probe check failed: {probe_res.error}")
124+
125+
offline_only = current_config.get("offline_only", False)
126+
if not model:
127+
offline_only = inquirer.confirm(message="Enable offline-only mode (disable all cloud fallbacks)?", default=False).execute()
128+
129+
new_config = {
130+
"provider": "local",
131+
"model": selected_model,
132+
"local_base_url": local_base_url,
133+
"local_model": selected_model,
134+
"local_tier": hw['tier'],
135+
"offline_only": offline_only
46136
}
47-
model = inquirer.text(
48-
message="Enter Model Name:",
49-
default=current_config.get("model", default_models.get(provider, ""))
50-
).execute()
137+
ConfigManager.save_config(new_config)
138+
log("Configuration saved successfully.")
139+
140+
else:
141+
if not api_key:
142+
# Check if we already have one
143+
existing_key = current_config.get("api_key", "")
144+
key_masked = f"{existing_key[:4]}...{existing_key[-4:]}" if len(existing_key) > 8 else "********" if existing_key else ""
145+
146+
api_key = inquirer.text(
147+
message=f"Enter API Key (Current: {key_masked}):",
148+
default=existing_key,
149+
validate=lambda result: len(result) > 0 or "API Key cannot be empty"
150+
).execute()
51151

52-
# Save Config
53-
new_config = {
54-
"provider": provider,
55-
"api_key": api_key,
56-
"model": model
57-
}
58-
ConfigManager.save_config(new_config)
59-
log("Configuration saved successfully.")
152+
if not model:
153+
default_models = {
154+
"openai": "gpt-4-turbo",
155+
"anthropic": "claude-3-5-sonnet-20240620",
156+
"openrouter": "google/gemini-pro"
157+
}
158+
model = inquirer.text(
159+
message="Enter Model Name:",
160+
default=current_config.get("model", default_models.get(provider, ""))
161+
).execute()
162+
163+
# Save Config
164+
new_config = {
165+
"provider": provider,
166+
"api_key": api_key,
167+
"model": model
168+
}
169+
ConfigManager.save_config(new_config)
170+
log("Configuration saved successfully.")
60171

61172
# Proxy Setup
62173
if use_proxy or inquirer.confirm(message="Configure Proxy?", default=False).execute():

scrapewizard/cli/commands/utils.py

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,66 @@ def doctor() -> None:
9696
if config_ok:
9797
try:
9898
from scrapewizard.llm.client import LLMClient
99+
from scrapewizard.llm.local_runtime import LocalRuntime
100+
from scrapewizard.core.constants import LOCAL_LLM_PROBE_TIMEOUT
101+
99102
client = LLMClient()
100-
rprint(f"• LLM Client: [green]Initialized[/green] ({client.provider}/{client.model})")
103+
provider = client.provider
104+
105+
if provider == "local":
106+
runtime = LocalRuntime()
107+
daemon = runtime.check_daemon()
108+
109+
if daemon.running:
110+
rprint(f"• Local AI Runtime: [green]Ollama {daemon.version}[/green]")
111+
models = runtime.list_models()
112+
model_name = client.model
113+
114+
model_loaded = False
115+
for m in models:
116+
if m == model_name or m.startswith(model_name + ":") or model_name.startswith(m + ":"):
117+
model_loaded = True
118+
break
119+
120+
if model_loaded:
121+
rprint(f"• Local Model: [green]{model_name}[/green]")
122+
# Run probe
123+
probe_res = runtime.probe(model_name, timeout=LOCAL_LLM_PROBE_TIMEOUT)
124+
if probe_res.success:
125+
rprint(f"• LLM Local: [green]✅ Ollama running, model {model_name} loaded, probe: {probe_res.latency}s[/green]")
126+
else:
127+
rprint(f"• LLM Local: [red]❌ Probe failed: {probe_res.error}[/red]")
128+
else:
129+
rprint(f"• Local Model: [yellow]Not pulled ({model_name})[/yellow]")
130+
rprint(f"• LLM Local: [yellow]⚠️ Model not pulled. Run 'scrapewizard setup'[/yellow]")
131+
else:
132+
rprint(f"• Local AI Runtime: [red]Ollama not running[/red]")
133+
rprint(f"• LLM Local: [red]❌ Ollama daemon is down[/red]")
134+
135+
# Hardware Tier
136+
hw = runtime.detect_hardware()
137+
rprint(f"• Hardware Tier: [cyan]{hw['tier'].upper()}[/cyan] ({hw['ram_gb']} GB RAM, {hw['gpu_name']})")
138+
139+
else:
140+
# Cloud client probe check
141+
import time
142+
start_time = time.time()
143+
try:
144+
# Let's run a small test call to verify cloud connectivity
145+
response = client.call(
146+
system_prompt="you are a health check assistant. reply with ok",
147+
user_prompt="ping",
148+
json_mode=False
149+
)
150+
latency = round(time.time() - start_time, 2)
151+
if "ok" in response.lower():
152+
rprint(f"• LLM Cloud: [green]✅ {client.provider}/{client.model}, probe: {latency}s[/green]")
153+
else:
154+
rprint(f"• LLM Cloud: [yellow]⚠️ {client.provider}/{client.model} responded but output was: {response} ({latency}s)[/yellow]")
155+
except Exception as e:
156+
rprint(f"• LLM Cloud: [red]❌ Connection failed to {client.provider}/{client.model}: {e}[/red]")
101157
except Exception as e:
102-
rprint(f"• LLM Client: [red]Error ({e})[/red]")
158+
rprint(f"• LLM Client: [red]Error initializing client ({e})[/red]")
103159

104160
rprint("\n[bold green]System check complete.[/bold green]")
105161

scrapewizard/core/config.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,11 @@ class ConfigManager:
2121

2222
DEFAULT_CONFIG = {
2323
"provider": "openai",
24-
"model": "gpt-4-turbo"
24+
"model": "gpt-4-turbo",
25+
"local_base_url": "http://localhost:11434",
26+
"local_model": "qwen2.5-coder:3b",
27+
"local_tier": "balanced",
28+
"offline_only": False,
2529
}
2630

2731
@classmethod
@@ -51,7 +55,7 @@ def migrate_from_plaintext(cls):
5155
migrated = True
5256

5357
# Pattern 2: Provider-specific keys
54-
for provider in ["openai", "anthropic", "openrouter", "local"]:
58+
for provider in ["openai", "anthropic", "openrouter", "local", "local-embedded"]:
5559
if provider in data and isinstance(data[provider], dict) and "api_key" in data[provider]:
5660
cls.save_api_key(provider, data[provider]["api_key"])
5761
del data[provider]["api_key"]
@@ -136,6 +140,8 @@ def save_proxy(cls, proxy_config: Dict[str, Any]):
136140

137141
@classmethod
138142
def check_setup(cls) -> bool:
139-
"""Check if essential configuration (API key) is set."""
143+
"""Check if essential configuration (API key or local setup) is set."""
140144
config = cls.load_config()
145+
if config.get("provider") == "local":
146+
return True
141147
return bool(config.get("api_key"))

scrapewizard/core/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@
66
PROBE_NAVIGATION_TIMEOUT = 30
77
SCAN_NAVIGATION_TIMEOUT = 45
88

9+
# Local LLM timeouts (seconds)
10+
LOCAL_LLM_COLD_START_TIMEOUT = 60 # First call loads model into RAM
11+
LOCAL_LLM_WARM_TIMEOUT = 30 # Subsequent calls
12+
LOCAL_LLM_PROBE_TIMEOUT = 15 # Doctor/setup probe
13+
14+
915
# LLM Thresholds
1016
LLM_CONFIDENCE_THRESHOLD = 0.5
1117
SCRAPING_POSSIBLE_MIN_CONFIDENCE = 0.4

scrapewizard/core/orchestrator.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,10 @@ def do_final():
855855
print(f"🤖 [bold cyan]AI Usage Summary:[/bold cyan]")
856856
print(f" • Calls: {stats['calls']}")
857857
print(f" • Tokens: {stats['input_tokens'] + stats['output_tokens']} ({stats['input_tokens']} in, {stats['output_tokens']} out)")
858-
print(f" • Est. Cost: ${cost:.4f}\n")
858+
if isinstance(cost, str):
859+
print(f" • Est. Cost: {cost}\n")
860+
else:
861+
print(f" • Est. Cost: ${cost:.4f}\n")
859862

860863
print(f"Your data is ready:")
861864
print(f"{output_file}\n")
@@ -905,7 +908,8 @@ def _bundle_output(self) -> None:
905908
try:
906909
from scrapewizard.report.html_generator import ReportGenerator
907910
generator = ReportGenerator(self.project_dir)
908-
generator.generate()
911+
duration = time.time() - self.start_time
912+
generator.generate(duration_seconds=duration)
909913
report_src = self.project_dir / "report.html"
910914
if report_src.exists():
911915
shutil.copy2(report_src, output_dir / "report.html")

0 commit comments

Comments
 (0)