Skip to content

Commit 5c5968d

Browse files
committed
feat: add OpenRouter support for LLM provider and update documentation
1 parent 658ca8f commit 5c5968d

6 files changed

Lines changed: 116 additions & 12 deletions

File tree

.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,7 @@ HF_MODELS_PATH=./hf_models
1515
LLM_PROVIDER=openai
1616
OPENAI_API_KEY=
1717
OPENAI_MODEL=gpt-4o-mini
18+
19+
# ── OpenRouter (free alternative — https://openrouter.ai/keys) ──
20+
OPENROUTER_API_KEY=
21+
OPENROUTER_MODEL=meta-llama/llama-3.1-8b-instruct:free

.github/copilot-instructions.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ etl_microservices/
136136
137137
├── ai_agent/
138138
│ ├── __init__.py
139-
│ ├── llm_provider.py # Abstract LLMProvider + OpenAIProvider + LocalProvider
139+
│ ├── llm_provider.py # Abstract LLMProvider + OpenAIProvider + OpenRouterProvider + LocalProvider
140140
│ ├── pipeline_agent.py # NL → YAML pipeline generation + validation
141141
│ └── pipeline_compiler.py # Parallel pipeline execution via Preparator SDK (dispatch registry + topological layering)
142142
@@ -326,7 +326,7 @@ Every service's `create_app()` now includes:
326326

327327
| Module | Purpose |
328328
|---|---|
329-
| `ai_agent/llm_provider.py` | Abstract `LLMProvider` + `OpenAIProvider` (GPT-4o-mini default) + `LocalProvider` (calls text-completion-llm-service) |
329+
| `ai_agent/llm_provider.py` | Abstract `LLMProvider` + `OpenAIProvider` (GPT-4o-mini default) + `OpenRouterProvider` (200+ models, free tier) + `LocalProvider` (calls text-completion-llm-service) |
330330
| `ai_agent/pipeline_agent.py` | `PipelineAgent`: builds system prompt from `service_registry.json`, calls LLM to generate YAML, validates structure + services + params + dependencies. Standalone `validate_pipeline()` module-level function enables validation-only use without instantiating the agent (e.g., Streamlit UI). |
331331
| `ai_agent/pipeline_compiler.py` | `PipelineCompiler`: executes validated pipeline definitions via Preparator SDK with **parallel execution** of independent steps (topological layering via Kahn’s algorithm + `ThreadPoolExecutor`). Uses a **dispatch registry** (`_build_dispatch_registry()`) for extensibility—add new services via `register_service()` without if/elif chains. Returns `PipelineResult` with per-step metrics + `correlation_id`. Supports `join_datasets` (2 `depends_on` entries). Exposes `last_step_outputs` dict for UI data preview. |
332332
| `schemas/service_registry.json` | Complete metadata for all 11 services: name, type, description, endpoint, input/output formats, params with types/required/defaults/enums |
@@ -368,6 +368,7 @@ pipeline:
368368
### LLM Provider Configuration
369369
370370
- `LLM_PROVIDER=openai` → uses OpenAI API (requires `OPENAI_API_KEY`)
371+
- `LLM_PROVIDER=openrouter` → uses OpenRouter API gateway (requires `OPENROUTER_API_KEY`). Supports 200+ models including free ones (e.g., `meta-llama/llama-3.1-8b-instruct:free`). OpenAI-compatible API via `openai` Python package with custom `base_url`. Get a free key at https://openrouter.ai/keys.
371372
- `LLM_PROVIDER=local` → uses the local HuggingFace text-completion-llm-service
372373
- Factory: `create_llm_provider(provider=None)` reads env var if not specified
373374

@@ -443,7 +444,7 @@ Single bridge network `etl-network`. Services reference each other by container
443444
| Volume | Mount | Purpose |
444445
|---|---|---|
445446
| `etl-containers-shared-data` | `/app/data` | Shared datasets, metadata, XCom files |
446-
| `etl-data-airflow` | `/opt/airflow` | Airflow persistence |
447+
| `etl-airflow-data` | `/opt/airflow` | Airflow persistence |
447448
| `etl-postgres-data` | PostgreSQL data dir | Airflow metadata DB |
448449
| `etl-grafana-data` | Grafana data dir | Dashboards |
449450
| `etl-prometheus-data` | Prometheus data dir | TSDB |
@@ -459,9 +460,11 @@ Single bridge network `etl-network`. Services reference each other by container
459460
| `ETL_DATA_ROOT` | `/app/data` | Base directory for datasets/metadata path resolution |
460461
| `ALLOW_PRIVATE_API_URLS` | `false` | Allow private/local API targets in `extract-api` |
461462
| `HF_MODELS_PATH` | `./hf_models` | Local HuggingFace model cache |
462-
| `LLM_PROVIDER` | `openai` | AI agent provider (`openai` or `local`) |
463+
| `LLM_PROVIDER` | `openai` | AI agent provider (`openai`, `openrouter`, or `local`) |
463464
| `OPENAI_API_KEY` | — | OpenAI API key |
464465
| `OPENAI_MODEL` | `gpt-4o-mini` | OpenAI model name |
466+
| `OPENROUTER_API_KEY` | — | OpenRouter API key (free at https://openrouter.ai/keys) |
467+
| `OPENROUTER_MODEL` | `meta-llama/llama-3.1-8b-instruct:free` | OpenRouter model identifier |
465468

466469
---
467470

ai_agent/llm_provider.py

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
"""
22
LLM Provider Abstraction Layer.
33
4-
Supports two backends:
4+
Supports three backends:
55
1. OpenAI API (GPT-4o / configurable model)
6-
2. Local HuggingFace model via the existing text-completion-llm-service
6+
2. OpenRouter API gateway (200+ models, including free ones — https://openrouter.ai)
7+
3. Local HuggingFace model via the existing text-completion-llm-service
78
89
Usage:
910
provider = create_llm_provider() # reads LLM_PROVIDER env var
@@ -67,6 +68,66 @@ def name(self) -> str:
6768
return f"OpenAI ({self.model})"
6869

6970

71+
class OpenRouterProvider(LLMProvider):
72+
"""OpenRouter API provider — OpenAI-compatible gateway to 200+ models (including free ones).
73+
74+
OpenRouter (https://openrouter.ai) aggregates LLMs from multiple providers
75+
behind a single API key. It exposes an OpenAI-compatible ``/chat/completions``
76+
endpoint, so we reuse the ``openai`` Python package with a custom ``base_url``.
77+
78+
Free models (no credit required):
79+
- meta-llama/llama-3.1-8b-instruct:free
80+
- google/gemma-2-9b-it:free
81+
- mistralai/mistral-7b-instruct:free
82+
- qwen/qwen-2.5-7b-instruct:free
83+
84+
Set OPENROUTER_API_KEY in your environment (get one at https://openrouter.ai/keys).
85+
"""
86+
87+
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
88+
89+
def __init__(self, model: str = None, api_key: str = None):
90+
try:
91+
import openai
92+
except ImportError:
93+
raise ImportError("openai package not installed. Run: pip install openai")
94+
95+
self.model = model or os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3.1-8b-instruct:free")
96+
api_key = api_key or os.getenv("OPENROUTER_API_KEY")
97+
if not api_key:
98+
raise ValueError(
99+
"OPENROUTER_API_KEY environment variable is required for OpenRouter provider. "
100+
"Get a free key at https://openrouter.ai/keys"
101+
)
102+
103+
self.client = openai.OpenAI(
104+
api_key=api_key,
105+
base_url=self.OPENROUTER_BASE_URL,
106+
default_headers={
107+
"HTTP-Referer": "https://github.com/VTvito/arrowflow",
108+
"X-Title": "ArrowFlow ETL Platform",
109+
},
110+
)
111+
logger.info(f"OpenRouter provider initialized with model: {self.model}")
112+
113+
def generate(self, prompt: str, system_prompt: str = "", temperature: float = 0.3, max_tokens: int = 2048) -> str:
114+
messages = []
115+
if system_prompt:
116+
messages.append({"role": "system", "content": system_prompt})
117+
messages.append({"role": "user", "content": prompt})
118+
119+
response = self.client.chat.completions.create(
120+
model=self.model,
121+
messages=messages,
122+
temperature=temperature,
123+
max_tokens=max_tokens,
124+
)
125+
return response.choices[0].message.content
126+
127+
def name(self) -> str:
128+
return f"OpenRouter ({self.model})"
129+
130+
70131
class LocalProvider(LLMProvider):
71132
"""Local HuggingFace provider via the text-completion-llm-service."""
72133

@@ -135,14 +196,17 @@ def create_llm_provider(provider: str = None, **kwargs) -> LLMProvider:
135196
Factory function to create an LLM provider.
136197
137198
Args:
138-
provider: "openai" or "local". Defaults to LLM_PROVIDER env var, then "openai".
199+
provider: "openai", "openrouter", or "local".
200+
Defaults to LLM_PROVIDER env var, then "openai".
139201
**kwargs: Additional arguments passed to the provider constructor.
140202
"""
141203
provider = provider or os.getenv("LLM_PROVIDER", "openai")
142204

143205
if provider == "openai":
144206
return OpenAIProvider(**kwargs)
207+
elif provider == "openrouter":
208+
return OpenRouterProvider(**kwargs)
145209
elif provider == "local":
146210
return LocalProvider(**kwargs)
147211
else:
148-
raise ValueError(f"Unknown LLM provider: '{provider}'. Supported: 'openai', 'local'")
212+
raise ValueError(f"Unknown LLM provider: '{provider}'. Supported: 'openai', 'openrouter', 'local'")

docker-compose.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ services:
3333
condition: service_started
3434
volumes:
3535
- etl-containers-shared-data:/app/data # Shared Volume between containers for data
36-
- etl-data-airflow:/opt/airflow # Volume for persistence of Airflow data
36+
- etl-airflow-data:/opt/airflow # Volume for persistence of Airflow data
3737
- ./airflow/dags:/opt/airflow/dags # Mount the local folder 'dags' in the container
3838
- ./preparator:/opt/airflow/preparator # Mount the local folder 'preparator' in the container
3939
ports:
@@ -280,6 +280,8 @@ services:
280280
- LLM_PROVIDER=${LLM_PROVIDER:-openai}
281281
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
282282
- OPENAI_MODEL=${OPENAI_MODEL:-gpt-4o-mini}
283+
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY:-}
284+
- OPENROUTER_MODEL=${OPENROUTER_MODEL:-meta-llama/llama-3.1-8b-instruct:free}
283285
volumes:
284286
- etl-containers-shared-data:/app/data
285287
ports:
@@ -300,6 +302,6 @@ networks:
300302
volumes:
301303
etl-containers-shared-data:
302304
etl-postgres-data:
303-
etl-data-airflow:
305+
etl-airflow-data:
304306
etl-grafana-data:
305307
etl-prometheus-data:

streamlit_app/Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ FROM python:3.9-slim
22

33
WORKDIR /app
44

5+
RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*
6+
57
# Install dependencies
68
COPY streamlit_app/requirements.txt /app/requirements.txt
79
RUN pip install --no-cache-dir -r requirements.txt

streamlit_app/app.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,44 @@ def render_sidebar():
8080
# LLM Provider selector
8181
provider = st.selectbox(
8282
"LLM Provider",
83-
["openai", "local"],
84-
help="OpenAI requires OPENAI_API_KEY env var. Local uses the HuggingFace service.",
83+
["openai", "openrouter", "local"],
84+
help=(
85+
"**OpenAI**: requires OPENAI_API_KEY (paid). \n"
86+
"**OpenRouter**: gateway to 200+ models, including free ones — "
87+
"get a key at https://openrouter.ai/keys. \n"
88+
"**Local**: uses the HuggingFace text-completion-llm-service (requires model download)."
89+
),
8590
)
8691

8792
if provider == "openai":
8893
api_key = st.text_input("OpenAI API Key", type="password", value=os.getenv("OPENAI_API_KEY", ""))
8994
if api_key:
9095
os.environ["OPENAI_API_KEY"] = api_key
9196

97+
elif provider == "openrouter":
98+
or_key = st.text_input(
99+
"OpenRouter API Key", type="password",
100+
value=os.getenv("OPENROUTER_API_KEY", ""),
101+
help="Free key at https://openrouter.ai/keys",
102+
)
103+
if or_key:
104+
os.environ["OPENROUTER_API_KEY"] = or_key
105+
106+
or_model = st.selectbox(
107+
"Model",
108+
[
109+
"meta-llama/llama-3.1-8b-instruct:free",
110+
"google/gemma-2-9b-it:free",
111+
"mistralai/mistral-7b-instruct:free",
112+
"qwen/qwen-2.5-7b-instruct:free",
113+
"meta-llama/llama-3.3-70b-instruct",
114+
"anthropic/claude-3.5-sonnet",
115+
"openai/gpt-4o-mini",
116+
],
117+
help="Models ending in `:free` require no credits.",
118+
)
119+
os.environ["OPENROUTER_MODEL"] = or_model
120+
92121
st.divider()
93122

94123
# Chat messages

0 commit comments

Comments
 (0)