-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathollama.py
More file actions
175 lines (145 loc) · 5.42 KB
/
Copy pathollama.py
File metadata and controls
175 lines (145 loc) · 5.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""
Ollama Provider Adapter
========================
Wraps the Ollama API to implement the AIEngineProvider interface.
This enables running local LLMs via Ollama (Llama, DeepSeek, CodeLlama, etc.).
Ollama uses an OpenAI-compatible API, so this adapter leverages the openai
package pointed at the local Ollama server.
Environment Variables:
OLLAMA_MODEL: Model identifier (e.g., llama3, deepseek-r1:7b) (required)
OLLAMA_BASE_URL: API base URL (default: http://localhost:11434)
OLLAMA_API_KEY: Optional API key for authenticated instances
"""
from typing import Any
from core.providers.adapters.openai_compat import (
OpenAICompatibleProvider,
OpenAICompatibleSession,
)
from core.providers.base import SessionConfig
DEFAULT_OLLAMA_BASE_URL = "http://localhost:11434"
# Popular Ollama models
OLLAMA_MODELS = [
"llama3.3",
"llama3.1",
"deepseek-r1",
"deepseek-r1:7b",
"qwen2.5",
"qwen2.5-coder",
"codellama",
"mistral",
"gemma2",
"phi4",
]
class OllamaSession(OpenAICompatibleSession):
"""Agent session for Ollama provider.
Uses the OpenAI-compatible API that Ollama exposes at /v1/*.
"""
def __init__(
self,
session_id: str,
model: str,
base_url: str = DEFAULT_OLLAMA_BASE_URL,
system_prompt: str = "",
api_key: str | None = None,
temperature: float | None = None,
max_tokens: int | None = None,
):
super().__init__(
session_id=session_id,
provider_name="ollama",
model=model,
system_prompt=system_prompt,
temperature=temperature,
max_tokens=max_tokens,
)
self._base_url = base_url.rstrip("/")
self._api_key = api_key
def provider_supports_native_tools(self, model: str | None) -> bool:
"""Delegate to :meth:`OllamaProvider.supports_native_tools` for the model.
The runtime calls this before opening the native tool loop so an
Ollama session running a non-tool-capable local model skips the
loop without paying for an unsupported-tools error round trip.
"""
return OllamaProvider.supports_native_tools(model or self.model)
def _build_client_kwargs(self) -> dict[str, Any]:
return {
"base_url": f"{self._base_url}/v1",
"api_key": self._api_key or "ollama", # Ollama ignores API key
}
# Substrings that identify Ollama-served models known to support the
# OpenAI-compatible ``tools`` parameter. Matched case-insensitively
# against the configured model identifier. Older or smaller models
# (llama2, codellama, phi-2, gemma:2b, mistral:7b without instruct
# tuning, etc.) generally return JSON in content rather than producing
# real tool_calls; for those we skip the native loop instead of paying
# for an exception round-trip.
_OLLAMA_NATIVE_TOOL_MODEL_TOKENS: tuple[str, ...] = (
"llama3.1",
"llama3.2",
"llama3.3",
"qwen2.5",
"qwen3",
"mistral-nemo",
"mistral-large",
"command-r",
"command-r-plus",
"firefunction",
"functionary",
"hermes-3",
"phi-4",
"phi4",
"granite3",
)
class OllamaProvider(OpenAICompatibleProvider):
"""Ollama provider implementation.
Provides access to local LLMs running via Ollama.
Uses Ollama's OpenAI-compatible API endpoint.
"""
_provider_name = "ollama"
_supported_models = OLLAMA_MODELS
@classmethod
def supports_native_tools(cls, model: str | None) -> bool:
"""Ollama-served models vary widely in tool-call support.
Returns ``True`` only when the configured model identifier
matches a known-good token from
:data:`_OLLAMA_NATIVE_TOOL_MODEL_TOKENS`. Anything else (older
llama2 lines, tiny phi/gemma variants, custom local builds)
skips the native loop in favor of the JSON action loop. The
check is intentionally string-based because Ollama lets users
run arbitrary local model tags that no central registry covers.
"""
if not model or not model.strip():
return False
haystack = model.strip().lower()
return any(token in haystack for token in _OLLAMA_NATIVE_TOOL_MODEL_TOKENS)
def _get_api_key(self) -> str | None:
return self._config.ollama_api_key
def _is_config_set(self) -> bool:
return bool(self._config.ollama_model)
def _config_env_var(self) -> str:
return "OLLAMA_MODEL"
def _get_model(self, session_config: SessionConfig) -> str:
model = session_config.model or self._config.ollama_model or ""
if session_config.extra:
model = session_config.extra.get("model", model)
return model
def _create_session_instance(
self, session_id: str, model: str, config: SessionConfig
) -> OllamaSession:
base_url = self._config.ollama_base_url or DEFAULT_OLLAMA_BASE_URL
api_key = self._config.ollama_api_key or None
if config.extra:
base_url = config.extra.get("base_url", base_url)
return OllamaSession(
session_id=session_id,
model=model,
base_url=base_url,
system_prompt=config.system_prompt,
api_key=api_key,
temperature=config.temperature,
max_tokens=config.max_tokens,
)
def __repr__(self) -> str:
return (
f"OllamaProvider(name={self.name!r}, model={self._config.ollama_model!r})"
)