-
Notifications
You must be signed in to change notification settings - Fork 310
Expand file tree
/
Copy pathmodel_variant.py
More file actions
186 lines (146 loc) · 6.98 KB
/
model_variant.py
File metadata and controls
186 lines (146 loc) · 6.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
from __future__ import annotations
import logging
from typing import Callable, List, Optional
from ..imodel import IModel
from ..exception import FoundryLocalException
from .core_interop import CoreInterop, InteropRequest
from .model_data_types import ModelInfo
from .core_interop import get_cached_model_ids
from .model_load_manager import ModelLoadManager
from ..openai.audio_client import AudioClient
from ..openai.chat_client import ChatClient
from ..openai.embedding_client import EmbeddingClient
from ..openai.responses_client import ResponsesClient
logger = logging.getLogger(__name__)
class ModelVariant(IModel):
"""A specific variant of a model (e.g. a particular device type, version, or quantization).
Implements ``IModel`` and provides download, cache, load/unload, and
client-creation operations for a single model variant.
"""
def __init__(self, model_info: ModelInfo, model_load_manager: ModelLoadManager, core_interop: CoreInterop):
"""Initialize a ModelVariant.
Args:
model_info: Catalog metadata for this variant.
model_load_manager: Manager for loading/unloading models.
core_interop: Native interop layer for Foundry Local Core.
"""
self._model_info = model_info
self._model_load_manager = model_load_manager
self._core_interop = core_interop
self._id = model_info.id
self._alias = model_info.alias
@property
def id(self) -> str:
"""Unique model variant ID (e.g. ``name:version``)."""
return self._id
@property
def alias(self) -> str:
"""Model alias shared across variants."""
return self._alias
@property
def info(self) -> ModelInfo:
"""Full catalog metadata for this variant."""
return self._model_info
@property
def context_length(self) -> Optional[int]:
"""Maximum context length (in tokens) supported by this variant, or ``None`` if unknown."""
return self._model_info.context_length
@property
def variants(self) -> List[IModel]:
"""A ModelVariant is a single variant, so variants returns itself."""
return [self]
def select_variant(self, variant: IModel) -> None:
"""SelectVariant is not supported on a ModelVariant.
Call ``Catalog.get_model()`` to get an IModel with all variants available.
:raises FoundryLocalException: Always.
"""
raise FoundryLocalException(
f"select_variant is not supported on a ModelVariant. "
f'Call Catalog.get_model("{self._alias}") to get an IModel with all variants available.'
)
@property
def input_modalities(self) -> Optional[str]:
"""Comma-separated input modalities (e.g. ``"text,image"``), or ``None`` if unknown."""
return self._model_info.input_modalities
@property
def output_modalities(self) -> Optional[str]:
"""Comma-separated output modalities (e.g. ``"text"``), or ``None`` if unknown."""
return self._model_info.output_modalities
@property
def capabilities(self) -> Optional[str]:
"""Comma-separated capability tags (e.g. ``"chat,completion"``), or ``None`` if unknown."""
return self._model_info.capabilities
@property
def supports_tool_calling(self) -> Optional[bool]:
"""Whether this variant supports tool/function calling, or ``None`` if unknown."""
return self._model_info.supports_tool_calling
@property
def is_cached(self) -> bool:
"""``True`` if this variant is present in the local model cache."""
cached_model_ids = get_cached_model_ids(self._core_interop)
return self.id in cached_model_ids
@property
def is_loaded(self) -> bool:
"""``True`` if this variant is currently loaded into memory."""
loaded_model_ids = self._model_load_manager.list_loaded()
return self.id in loaded_model_ids
def download(self, progress_callback: Callable[[float], None] = None):
"""Download this variant to the local cache.
Args:
progress_callback: Optional callback receiving download progress as a
percentage (0.0 to 100.0).
"""
request = InteropRequest(params={"Model": self.id})
if progress_callback is None:
response = self._core_interop.execute_command("download_model", request)
else:
response = self._core_interop.execute_command_with_callback(
"download_model", request,
lambda pct_str: progress_callback(float(pct_str))
)
logger.info("Download response: %s", response)
if response.error is not None:
raise FoundryLocalException(f"Failed to download model: {response.error}")
def get_path(self) -> str:
"""Get the local file-system path to this variant if cached.
Returns:
Path to the model directory.
Raises:
FoundryLocalException: If the model path cannot be retrieved.
"""
request = InteropRequest(params={"Model": self.id})
response = self._core_interop.execute_command("get_model_path", request)
if response.error is not None:
raise FoundryLocalException(f"Failed to get model path: {response.error}")
return response.data
def load(self) -> None:
"""Load this variant into memory for inference."""
self._model_load_manager.load(self.id)
def remove_from_cache(self) -> None:
"""Remove this variant from the local model cache."""
request = InteropRequest(params={"Model": self.id})
response = self._core_interop.execute_command("remove_cached_model", request)
if response.error is not None:
raise FoundryLocalException(f"Failed to remove model from cache: {response.error}")
def unload(self) -> None:
"""Unload this variant from memory."""
self._model_load_manager.unload(self.id)
def get_chat_client(self) -> ChatClient:
"""Create an OpenAI-compatible ``ChatClient`` for this variant."""
return ChatClient(self.id, self._core_interop)
def get_audio_client(self) -> AudioClient:
"""Create an OpenAI-compatible ``AudioClient`` for this variant."""
return AudioClient(self.id, self._core_interop)
def get_embedding_client(self) -> EmbeddingClient:
"""Create an OpenAI-compatible ``EmbeddingClient`` for this variant."""
return EmbeddingClient(self.id, self._core_interop)
def create_responses_client(self, base_url: str) -> ResponsesClient:
"""Create a Responses API client for this variant.
:param base_url: Base URL of the running Foundry Local web service
(e.g. ``manager.urls[0]``).
"""
return ResponsesClient(base_url, self.id)