diff --git a/data/schema.graphql b/data/schema.graphql index 700055bf40..a9d2539c8a 100644 --- a/data/schema.graphql +++ b/data/schema.graphql @@ -260,7 +260,7 @@ type AdminGetSSHKeypairPayload } """ -Added in 26.4.3. Payload for admin bulk revision refresh mutation result. +Added in UNRELEASED. Payload for admin bulk revision refresh mutation result. """ type AdminRefreshDeploymentRevisionsPayload @join__type(graph: STRAWBERRY) @@ -1682,7 +1682,7 @@ type AutoScalingRule implements Node lastTriggeredAt: DateTime """ - Added in 26.4.3. The Prometheus query preset used for metric-based auto-scaling. + Added in UNRELEASED. The Prometheus query preset used for metric-based auto-scaling. """ queryPreset: QueryDefinition } @@ -4669,26 +4669,6 @@ input DeploymentFilter openToPublic: Boolean = null tags: StringFilter = null endpointUrl: StringFilter = null - - """Added in 26.4.3. Filter by domain name.""" - domainName: StringFilter = null - - """Added in 26.4.3. Filter by project ID.""" - projectId: UUIDFilter = null - - """Added in 26.4.3. Filter by resource group name.""" - resourceGroup: StringFilter = null - - """Added in 26.4.3. Filter by the user who created the deployment.""" - createdUserId: UUIDFilter = null - - """Added in 26.4.3. Filter by deployment creation datetime.""" - createdAt: DateTimeFilter = null - - """ - Added in 26.4.3. Filter by deployment destruction datetime. Supports IS NULL / IS NOT NULL. - """ - destroyedAt: NullableDateTimeFilter = null AND: [DeploymentFilter!] = null OR: [DeploymentFilter!] = null NOT: [DeploymentFilter!] = null @@ -4713,7 +4693,7 @@ type DeploymentHistory implements Node createdAt: DateTime! updatedAt: DateTime! - """Added in 26.4.3. The deployment this history record belongs to.""" + """Added in UNRELEASED. The deployment this history record belongs to.""" deployment: ModelDeployment } @@ -4788,11 +4768,7 @@ enum DeploymentOrderField { NAME @join__enumValue(graph: STRAWBERRY) CREATED_AT @join__enumValue(graph: STRAWBERRY) - DESTROYED_AT @join__enumValue(graph: STRAWBERRY) - DOMAIN @join__enumValue(graph: STRAWBERRY) - PROJECT @join__enumValue(graph: STRAWBERRY) - RESOURCE_GROUP @join__enumValue(graph: STRAWBERRY) - TAG @join__enumValue(graph: STRAWBERRY) + UPDATED_AT @join__enumValue(graph: STRAWBERRY) } """Added in 25.19.0. Deployment policy configuration.""" @@ -4868,7 +4844,7 @@ type DeploymentRevisionPreset implements Node """Timestamp of the last modification to this deployment preset.""" updatedAt: DateTime - """Added in 26.4.3. The runtime variant this preset is designed for.""" + """Added in UNRELEASED. The runtime variant this preset is designed for.""" runtimeVariant: RuntimeVariant """Added in 26.4.2. Resource slot allocations for this preset.""" @@ -6168,7 +6144,7 @@ type EntityRef implements Node entity: EntityNode """ - Added in 26.4.3. The resolved scope object in which the entity is registered. + Added in UNRELEASED. The resolved scope object in which the entity is registered. """ scope: EntityNode } @@ -7370,7 +7346,7 @@ type KernelV2 implements Node """Added in 26.2.0. The agent running this kernel.""" agent: AgentV2 - """Added in 26.4.3. The image used by this kernel.""" + """Added in UNRELEASED. The image used by this kernel.""" image: ImageV2 """Added in 26.2.0. The user who owns this kernel.""" @@ -7698,7 +7674,7 @@ type KeyPairGQL implements Node """UUID of the user who owns this keypair.""" userId: UUID! - """Added in 26.4.3. The user who owns this keypair.""" + """Added in UNRELEASED. The user who owns this keypair.""" user: UserV2 } @@ -8192,10 +8168,10 @@ type LoginHistoryV2 implements Node """Timestamp when the login attempt occurred.""" createdAt: DateTime! - """Added in 26.4.3. The user who attempted to log in.""" + """Added in UNRELEASED. The user who attempted to log in.""" user: UserV2 - """Added in 26.4.3. The domain at the time of the login attempt.""" + """Added in UNRELEASED. The domain at the time of the login attempt.""" domain: DomainV2 } @@ -8306,7 +8282,7 @@ type LoginSessionV2 implements Node """Timestamp when the session was invalidated.""" invalidatedAt: DateTime - """Added in 26.4.3. The user who owns this login session.""" + """Added in UNRELEASED. The user who owns this login session.""" user: UserV2 } @@ -8530,13 +8506,13 @@ type ModelCardV2 implements Node """ vfolder: VFolder - """Added in 26.4.3. The domain this model card belongs to.""" + """Added in UNRELEASED. The domain this model card belongs to.""" domain: DomainV2 - """Added in 26.4.3. The project this model card belongs to.""" + """Added in UNRELEASED. The project this model card belongs to.""" project: ProjectV2 - """Added in 26.4.3. The user who created this model card.""" + """Added in UNRELEASED. The user who created this model card.""" creator: UserV2 """ @@ -8730,17 +8706,17 @@ type ModelDeployment implements Node createdUserId: ID! """ - Added in 26.4.3. The current active revision of this deployment, resolved via DataLoader. + Added in UNRELEASED. The current active revision of this deployment, resolved via DataLoader. """ currentRevision: ModelRevision """ - Added in 26.4.3. The revision currently being deployed (in progress, not yet active), resolved via DataLoader. + Added in UNRELEASED. The revision currently being deployed (in progress, not yet active), resolved via DataLoader. """ deployingRevision: ModelRevision """ - Added in 26.4.3. The user who created this deployment, resolved via DataLoader. + Added in UNRELEASED. The user who created this deployment, resolved via DataLoader. """ creator: UserV2 @@ -8793,7 +8769,7 @@ type ModelDeploymentMetadata project: GroupNode! @deprecated(reason: "Use project_v2 instead.") """ - Added in 26.4.3. The project this deployment belongs to, resolved via DataLoader. + Added in UNRELEASED. The project this deployment belongs to, resolved via DataLoader. """ projectV2: ProjectV2 @@ -8801,7 +8777,7 @@ type ModelDeploymentMetadata domain: DomainNode! @deprecated(reason: "Use domain_v2 instead.") """ - Added in 26.4.3. The domain this deployment belongs to, resolved via DataLoader. + Added in UNRELEASED. The domain this deployment belongs to, resolved via DataLoader. """ domainV2: DomainV2 projectId: ID! @@ -9030,7 +9006,7 @@ type ModelReplica implements Node session: ComputeSessionNode! @deprecated(reason: "Use session_v2 instead.") """ - Added in 26.4.3. The compute session running this replica, resolved via DataLoader. + Added in UNRELEASED. The compute session running this replica, resolved via DataLoader. """ sessionV2: SessionV2 @@ -9102,7 +9078,7 @@ type ModelRevision implements Node image: ImageNode! @deprecated(reason: "Use image_v2 instead.") """ - Added in 26.4.3. The container image used by this revision, resolved via DataLoader. + Added in UNRELEASED. The container image used by this revision, resolved via DataLoader. """ imageV2: ImageV2 @@ -9139,21 +9115,6 @@ input ModelRevisionFilter { revisionNumber: IntFilter = null deploymentId: ID = null - - """Added in 26.4.3. Filter by container image ID.""" - imageId: UUIDFilter = null - - """Added in 26.4.3. Filter by model VFolder ID.""" - modelVfolderId: UUIDFilter = null - - """Added in 26.4.3. Filter by resource group name.""" - resourceGroup: StringFilter = null - - """Added in 26.4.3. Filter by cluster mode (SINGLE_NODE / MULTI_NODE).""" - clusterMode: StringFilter = null - - """Added in 26.4.3. Filter by revision creation datetime.""" - createdAt: DateTimeFilter = null AND: [ModelRevisionFilter!] = null OR: [ModelRevisionFilter!] = null NOT: [ModelRevisionFilter!] = null @@ -9173,9 +9134,6 @@ enum ModelRevisionOrderField { REVISION_NUMBER @join__enumValue(graph: STRAWBERRY) CREATED_AT @join__enumValue(graph: STRAWBERRY) - RESOURCE_GROUP @join__enumValue(graph: STRAWBERRY) - CLUSTER_MODE @join__enumValue(graph: STRAWBERRY) - RUNTIME_VARIANT @join__enumValue(graph: STRAWBERRY) } """ @@ -10241,7 +10199,7 @@ type Mutation addModelRevision(input: AddRevisionInput!, options: AddRevisionOptions = null): AddRevisionPayload! @join__field(graph: STRAWBERRY) """ - Added in 26.4.3. Rebuild and activate a fresh revision for every active deployment (superadmin). Used to repair deployments whose current revision has stale or missing model_definition after backing store migrations. + Added in UNRELEASED. Rebuild and activate a fresh revision for every active deployment (superadmin). Used to repair deployments whose current revision has stale or missing model_definition after backing store migrations. """ adminRefreshDeploymentRevisions: AdminRefreshDeploymentRevisionsPayload! @join__field(graph: STRAWBERRY) @@ -11126,7 +11084,7 @@ input NotificationRuleTypeFilter } """ -Added in 26.4.3. Filter for nullable datetime fields with IS NULL / IS NOT NULL support. +Added in UNRELEASED. Filter for nullable datetime fields with IS NULL / IS NOT NULL support. """ input NullableDateTimeFilter @join__type(graph: STRAWBERRY) @@ -13124,7 +13082,7 @@ type Query projectSessionsV2(scope: ProjectSessionV2Scope!, filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! @join__field(graph: STRAWBERRY) """ - Added in 26.4.3. Query a single session by ID. Returns an error if not found. + Added in UNRELEASED. Query a single session by ID. Returns an error if not found. """ sessionV2(id: UUID!): SessionV2 @join__field(graph: STRAWBERRY) @@ -13597,7 +13555,7 @@ type QueryDefinition implements Node """Last update timestamp.""" updatedAt: DateTime! - """Added in 26.4.3. Resolved category entity.""" + """Added in UNRELEASED. Resolved category entity.""" category: QueryPresetCategory } @@ -14429,7 +14387,7 @@ type ResourcePresetV2 implements Node """Resource group name. Null means global preset.""" resourceGroupName: String - """Added in 26.4.3. The resource group this preset belongs to.""" + """Added in UNRELEASED. The resource group this preset belongs to.""" resourceGroup: ResourceGroup } @@ -14639,7 +14597,7 @@ type RestoreArtifactsPayload } """ -Added in 26.4.3. Per-deployment result of an admin bulk revision refresh. +Added in UNRELEASED. Per-deployment result of an admin bulk revision refresh. """ type RevisionRefreshResult @join__type(graph: STRAWBERRY) @@ -14826,7 +14784,7 @@ type RoleAssignment implements Node """The assigned user.""" user: UserV2 - """Added in 26.4.3. The user who granted this role assignment.""" + """Added in UNRELEASED. The user who granted this role assignment.""" grantedByUser: UserV2 } @@ -15055,7 +15013,7 @@ type Route implements Node session: ID @deprecated(reason: "Use session_v2 instead.") """ - Added in 26.4.3. The compute session associated with this route, resolved via DataLoader. + Added in UNRELEASED. The compute session associated with this route, resolved via DataLoader. """ sessionV2: SessionV2 @@ -15131,10 +15089,10 @@ type RouteHistory implements Node createdAt: DateTime! updatedAt: DateTime! - """Added in 26.4.3. The route this history record belongs to.""" + """Added in UNRELEASED. The route this history record belongs to.""" route: Route - """Added in 26.4.3. The deployment this history record belongs to.""" + """Added in UNRELEASED. The deployment this history record belongs to.""" deployment: ModelDeployment } @@ -16078,7 +16036,7 @@ type SessionSchedulingHistory implements Node createdAt: DateTime! updatedAt: DateTime! - """Added in 26.4.3. The session this history record belongs to.""" + """Added in UNRELEASED. The session this history record belongs to.""" session: SessionV2 } @@ -16205,7 +16163,7 @@ type SessionV2 implements Node resourceGroup: ResourceGroup """ - Added in 26.4.3. The images used by this session. Multiple images are possible in multi-kernel (cluster) sessions. + Added in UNRELEASED. The images used by this session. Multiple images are possible in multi-kernel (cluster) sessions. """ images: ImageV2Connection! diff --git a/packages/backend.ai-webui-docs/src/en/images/endpoint_current_revision_modal.png b/packages/backend.ai-webui-docs/src/en/images/endpoint_current_revision_modal.png new file mode 100644 index 0000000000..0e232a0ee2 Binary files /dev/null and b/packages/backend.ai-webui-docs/src/en/images/endpoint_current_revision_modal.png differ diff --git a/packages/backend.ai-webui-docs/src/en/images/endpoint_revision_info.png b/packages/backend.ai-webui-docs/src/en/images/endpoint_revision_info.png new file mode 100644 index 0000000000..66ce05e57d Binary files /dev/null and b/packages/backend.ai-webui-docs/src/en/images/endpoint_revision_info.png differ diff --git a/packages/backend.ai-webui-docs/src/en/images/endpoint_revision_mismatch.png b/packages/backend.ai-webui-docs/src/en/images/endpoint_revision_mismatch.png new file mode 100644 index 0000000000..c3ace13d30 Binary files /dev/null and b/packages/backend.ai-webui-docs/src/en/images/endpoint_revision_mismatch.png differ diff --git a/packages/backend.ai-webui-docs/src/en/images/endpoint_service_ready_alert.png b/packages/backend.ai-webui-docs/src/en/images/endpoint_service_ready_alert.png new file mode 100644 index 0000000000..2eefdd35ff Binary files /dev/null and b/packages/backend.ai-webui-docs/src/en/images/endpoint_service_ready_alert.png differ diff --git a/packages/backend.ai-webui-docs/src/en/images/service_launcher3.png b/packages/backend.ai-webui-docs/src/en/images/service_launcher3.png index 62fd2fa3b1..6eeeecb0da 100644 Binary files a/packages/backend.ai-webui-docs/src/en/images/service_launcher3.png and b/packages/backend.ai-webui-docs/src/en/images/service_launcher3.png differ diff --git a/packages/backend.ai-webui-docs/src/en/images/service_launcher_runtime_params.png b/packages/backend.ai-webui-docs/src/en/images/service_launcher_runtime_params.png index 06e2386ced..2d6edcf0c3 100644 Binary files a/packages/backend.ai-webui-docs/src/en/images/service_launcher_runtime_params.png and b/packages/backend.ai-webui-docs/src/en/images/service_launcher_runtime_params.png differ diff --git a/packages/backend.ai-webui-docs/src/en/model_serving/model_serving.md b/packages/backend.ai-webui-docs/src/en/model_serving/model_serving.md index 34a8557e2c..8306778f9f 100644 --- a/packages/backend.ai-webui-docs/src/en/model_serving/model_serving.md +++ b/packages/backend.ai-webui-docs/src/en/model_serving/model_serving.md @@ -458,6 +458,28 @@ The parameters are organized into categories: Unchanged parameters will use the runtime's default values. ::: +In addition to runtime parameters, the `vLLM` and `SGLang` runtime variants expose specific environment variables in the **Environment Variables** section of the service launcher: + +- **vLLM**: `BACKEND_MODEL_NAME`, `VLLM_QUANTIZATION`, `VLLM_TP_SIZE` (tensor parallelism), `VLLM_PP_SIZE` (pipeline parallelism), `VLLM_EXTRA_ARGS` (extra CLI arguments) +- **SGLang**: `BACKEND_MODEL_NAME`, `SGLANG_QUANTIZATION`, `SGLANG_TP_SIZE` (tensor parallelism), `SGLANG_PP_SIZE` (pipeline parallelism), `SGLANG_EXTRA_ARGS` (extra CLI arguments) + +:::note +These environment variables appear in the **Environment Variables** section of the launcher, +not in the Runtime Parameters section. They provide additional configuration options +specific to each runtime variant. +::: + +#### Runtime Variant Comparison + +The following table summarizes the key differences between the three main runtime variants: + +| Feature | Custom | vLLM | SGLang | +|---------|--------|------|--------| +| Runtime Parameters section | No | Yes | Yes | +| Enter Command / Use Config File toggle | Yes | No | No | +| Environment variable presets | Manual only | `VLLM_*` presets | `SGLANG_*` presets | +| Form pre-populated on edit | Yes (from latest revision) | No | No | + #### Environment and Resources Set the number of replicas and select the environment and resource group. @@ -541,9 +563,67 @@ The Service Info card displays the following details: Click the `Edit` button on the Service Info card to navigate to the update launcher and modify the service settings. -:::warning -If the endpoint belongs to a different project than the currently selected one, -a project mismatch warning is displayed. Switch to the correct project to manage the endpoint. +The Endpoint Detail Page displays contextual alert banners at the top, depending on the current state of the service: + +- **Preparing your service**: Shown while the service is being deployed or transitioning between states. Indicates the service is not yet ready to handle requests. + +![](../images/endpoint_preparing_alert.png) + + +- **Service is ready**: Shown when the service is `HEALTHY`. Includes a **Start Chat** button as a shortcut to the LLM Chat Test interface. + +![](../images/endpoint_service_ready_alert.png) + +- **Not In Project**: Shown when the endpoint belongs to a different project than the currently selected one. The Edit button is disabled while this alert is active. Click the **Switch Project** button in the alert to switch to the correct project and manage the endpoint. + + + +### Revision Info + +:::note +The Revision Info card is available when the server supports Model Card v2 +(Backend.AI version 26.4.0 and later). +::: + +The Revision Info card on the Endpoint Detail Page displays the configuration of the **latest revision** — the revision that is queued to be applied next. This may differ from the revision that is currently running on the service. + +![](../images/endpoint_revision_info.png) + +The card shows the following fields: + +- **Revision ID**: The identifier of the latest revision. +- **Model Name**: The name of the model as defined in the model definition. +- **Model Path**: The path where the model is mounted. +- **Start Command**: The command used to start the inference server. +- **Port**: The container port for the model service. +- **Health Check Path**: The HTTP endpoint path for health checks. +- **Initial Delay**: Seconds to wait before the first health check. +- **Max Retries**: Maximum consecutive health check failures allowed. + +#### Revision Mismatch State + +When a new revision has been queued but the service is still running on the previous revision, a **"The next revision is being applied."** alert is displayed on the Revision Info card. This indicates that the latest revision values shown in the card do not yet match the currently running configuration. + +![](../images/endpoint_revision_mismatch.png) + +Click the **View Current Revision** button to open a modal that shows the model definition of the revision that is **currently running**. This allows you to compare the upcoming revision (shown in the Revision Info card) with the active revision (shown in the modal). + +![](../images/endpoint_current_revision_modal.png) + +:::tip +To summarize: the **Revision Info card** always shows the **latest/upcoming** revision values, +while the **View Current Revision modal** shows the **currently running** revision values. +::: + +#### Edit Behavior With Revisions (Custom Variant Only) + +When you click the **Edit** button on the Service Info panel for a service using the `Custom` runtime variant, the service launcher form is pre-populated with the latest revision's model definition values as defaults. This makes it easy to adjust settings incrementally without re-entering all fields. + +:::note +This pre-population of model definition values applies only to the `Custom` runtime variant. +`vLLM` and `SGLang` variants do not use model definition fields at all — they expose a +**Runtime Parameters** section (`inference_runtime_config`) for framework-specific configuration. +Model definition and runtime parameters are distinct concepts stored separately in the revision. ::: ### Auto Scaling Rules @@ -666,10 +746,14 @@ The Routes Info card shows the routing status of the model service. You can filt - **Running / Finished**: Toggle between active and completed route nodes. - **Property filter**: Filter by health status and traffic status. -Click the `Sync Routes` button to synchronize the route information with the backend. Click on a route node to open the session detail drawer, where you can view individual session details. +If a route has encountered an error, clicking the error indicator on the route row opens a JSON viewer modal that displays the raw error data for that route. This is useful for diagnosing issues with individual route nodes. + +![](../images/route_error_json_viewer.png) + + ### Modifying a Service Click the `Edit` button on the endpoint detail page to modify a model service. The service launcher opens with previously entered fields already filled in. You can optionally modify only the fields you wish to change. After modifying the fields, click `Confirm` to apply the changes. diff --git a/packages/backend.ai-webui-docs/src/ja/images/endpoint_current_revision_modal.png b/packages/backend.ai-webui-docs/src/ja/images/endpoint_current_revision_modal.png new file mode 100644 index 0000000000..0e232a0ee2 Binary files /dev/null and b/packages/backend.ai-webui-docs/src/ja/images/endpoint_current_revision_modal.png differ diff --git a/packages/backend.ai-webui-docs/src/ja/images/endpoint_revision_info.png b/packages/backend.ai-webui-docs/src/ja/images/endpoint_revision_info.png new file mode 100644 index 0000000000..66ce05e57d Binary files /dev/null and b/packages/backend.ai-webui-docs/src/ja/images/endpoint_revision_info.png differ diff --git a/packages/backend.ai-webui-docs/src/ja/images/endpoint_revision_mismatch.png b/packages/backend.ai-webui-docs/src/ja/images/endpoint_revision_mismatch.png new file mode 100644 index 0000000000..c3ace13d30 Binary files /dev/null and b/packages/backend.ai-webui-docs/src/ja/images/endpoint_revision_mismatch.png differ diff --git a/packages/backend.ai-webui-docs/src/ja/images/endpoint_service_ready_alert.png b/packages/backend.ai-webui-docs/src/ja/images/endpoint_service_ready_alert.png new file mode 100644 index 0000000000..2eefdd35ff Binary files /dev/null and b/packages/backend.ai-webui-docs/src/ja/images/endpoint_service_ready_alert.png differ diff --git a/packages/backend.ai-webui-docs/src/ja/images/service_launcher3.png b/packages/backend.ai-webui-docs/src/ja/images/service_launcher3.png index 62fd2fa3b1..6eeeecb0da 100644 Binary files a/packages/backend.ai-webui-docs/src/ja/images/service_launcher3.png and b/packages/backend.ai-webui-docs/src/ja/images/service_launcher3.png differ diff --git a/packages/backend.ai-webui-docs/src/ja/images/service_launcher_runtime_params.png b/packages/backend.ai-webui-docs/src/ja/images/service_launcher_runtime_params.png index 06e2386ced..2d6edcf0c3 100644 Binary files a/packages/backend.ai-webui-docs/src/ja/images/service_launcher_runtime_params.png and b/packages/backend.ai-webui-docs/src/ja/images/service_launcher_runtime_params.png differ diff --git a/packages/backend.ai-webui-docs/src/ja/model_serving/model_serving.md b/packages/backend.ai-webui-docs/src/ja/model_serving/model_serving.md index 26240ef4ff..14ddfcc2db 100644 --- a/packages/backend.ai-webui-docs/src/ja/model_serving/model_serving.md +++ b/packages/backend.ai-webui-docs/src/ja/model_serving/model_serving.md @@ -436,6 +436,16 @@ vllm serve /models/my-model --tp 2 変更されていないパラメータはランタイムのデフォルト値を使用します。 ::: +ランタイムパラメータに加えて、`vLLM` および `SGLang` ランタイムバリアントは、サービスランチャーの**環境変数**セクションで特定の環境変数を提供します: + +- **vLLM**: `BACKEND_MODEL_NAME`、`VLLM_QUANTIZATION`、`VLLM_TP_SIZE`(テンソル並列化)、`VLLM_PP_SIZE`(パイプライン並列化)、`VLLM_EXTRA_ARGS`(追加CLIアーギュメント) +- **SGLang**: `BACKEND_MODEL_NAME`、`SGLANG_QUANTIZATION`、`SGLANG_TP_SIZE`(テンソル並列化)、`SGLANG_PP_SIZE`(パイプライン並列化)、`SGLANG_EXTRA_ARGS`(追加CLIアーギュメント) + +:::note +これらの環境変数は、ランタイムパラメータセクションではなく、ランチャーの**環境変数**セクションに +表示されます。各ランタイムバリアント固有の追加構成オプションを提供します。 +::: + #### 環境とリソース レプリカ数を設定し、環境とリソースグループを選択します。 @@ -516,9 +526,67 @@ vllm serve /models/my-model --tp 2 サービス情報カードの`Edit`ボタンをクリックすると、更新ランチャーに移動してサービス設定を変更できます。 -:::warning -エンドポイントが現在選択されているプロジェクトとは異なるプロジェクトに属している場合、 -プロジェクト不一致の警告が表示されます。エンドポイントを管理するには、正しいプロジェクトに切り替えてください。 +エンドポイント詳細ページでは、サービスの現在の状態に応じて、ページ上部にコンテキストに応じたアラートバナーが表示されます: + +- **サービスを準備しています**: サービスがデプロイ中またはステータス遷移中に表示されます。サービスがまだリクエストを処理する準備ができていないことを示します。 + +![](../images/endpoint_preparing_alert.png) + + +- **サービスの準備が整いました**: サービスのステータスが `HEALTHY` の場合に表示されます。このバナーには、LLMチャットテストインターフェースへのショートカットを提供する **チャットを開始** ボタンが含まれています。 + +![](../images/endpoint_service_ready_alert.png) + +- **このモデルサービスは別のプロジェクトに属しています**: エンドポイントが現在選択されているプロジェクトとは異なるプロジェクトに属している場合に表示されます。このアラートが表示されている間は Edit ボタンが無効になります。アラート内の **プロジェクトを切り替える** ボタンをクリックして正しいプロジェクトに切り替えてください。 + + + +### リビジョン情報 + +:::note +リビジョン情報カードは、サーバーが Model Card v2 をサポートしている場合 +(Backend.AI バージョン 26.4.0 以降)に利用可能です。 +::: + +エンドポイント詳細ページのリビジョン情報カードは、**最新リビジョン** — 次に適用される予定のリビジョンの構成を表示します。これは、現在サービスで実行されているリビジョンとは異なる場合があります。 + +![](../images/endpoint_revision_info.png) + +カードには以下のフィールドが表示されます: + +- **Revision ID**: 最新リビジョンの識別子です。 +- **Model Name**: モデル定義で定義されたモデル名です。 +- **Model Path**: モデルがマウントされているパスです。 +- **Start Command**: 推論サーバーの起動に使用されるコマンドです。 +- **Port**: モデルサービス用のコンテナポートです。 +- **Health Check Path**: ヘルスチェック用のHTTPエンドポイントパスです。 +- **Initial Delay**: 最初のヘルスチェックまでの待機秒数です。 +- **Max Retries**: 許容される連続ヘルスチェック失敗の最大回数です。 + +#### リビジョン不一致の状態 + +新しいリビジョンがキューに追加されたが、サービスがまだ前のリビジョンで実行されている場合、リビジョン情報カードに **「次のリビジョンを適用中です。」** アラートが表示されます。これは、カードに表示されている最新リビジョンの値が、現在実行中の構成とまだ一致していないことを示します。 + +![](../images/endpoint_revision_mismatch.png) + +**現在のリビジョンを表示** ボタンをクリックすると、**現在実行中の**リビジョンのモデル定義を表示するモーダルが開きます。これにより、今後のリビジョン(リビジョン情報カードに表示)と現在アクティブなリビジョン(モーダルに表示)を比較できます。 + +![](../images/endpoint_current_revision_modal.png) + +:::tip +まとめると:**リビジョン情報カード**は常に**最新/今後の**リビジョン値を表示し、 +**現在のリビジョンを表示モーダル**は**現在実行中の**リビジョン値を表示します。 +::: + +#### リビジョンを使用した編集動作(Customバリアント専用) + +`Custom` ランタイムバリアントを使用しているサービスで、サービス情報パネルの **Edit** ボタンをクリックすると、サービスランチャーフォームに最新リビジョンのモデル定義値がデフォルト値として事前入力されます。これにより、すべてのフィールドを再入力することなく、設定を段階的に調整できます。 + +:::note +このモデル定義値の事前入力動作は、`Custom` ランタイムバリアントにのみ適用されます。 +`vLLM` および `SGLang` バリアントはモデル定義フィールドを使用しません。代わりに、 +フレームワーク固有の設定用の **ランタイムパラメータ** セクション(`inference_runtime_config`)を提供します。 +モデル定義とランタイムパラメータは、リビジョン内に別々に保存される独立した概念です。 ::: ### 自動スケーリングルール @@ -642,10 +710,14 @@ Backend.AI バージョン 26.4.0 以降では、Prometheus メトリックソ - **Running / Finished**: アクティブなルートノードと完了したルートノードを切り替えます。 - **プロパティフィルター**: ヘルスステータスおよびトラフィックステータスでフィルタリングします。 -`Sync Routes`ボタンをクリックして、ルート情報をバックエンドと同期します。 ルートノードをクリックするとセッション詳細ドロワーが開き、個別のセッション詳細を表示できます。 +ルートにエラーが発生した場合、ルート行のエラーインジケーターをクリックすると、そのルートの生のエラーデータを表示するJSONビューアーモーダルが開きます。これは、個々のルートノードの問題を診断するのに役立ちます。 + +![](../images/route_error_json_viewer.png) + + ### サービスの変更 エンドポイント詳細ページの`Edit`ボタンをクリックして、モデルサービスを変更します。以前に入力したフィールドが入力された状態でサービスランチャーが開きます。変更したいフィールドのみを選択的に変更できます。フィールドを変更した後、`Confirm`をクリックして変更を適用します。 diff --git a/packages/backend.ai-webui-docs/src/ko/images/endpoint_current_revision_modal.png b/packages/backend.ai-webui-docs/src/ko/images/endpoint_current_revision_modal.png new file mode 100644 index 0000000000..0e232a0ee2 Binary files /dev/null and b/packages/backend.ai-webui-docs/src/ko/images/endpoint_current_revision_modal.png differ diff --git a/packages/backend.ai-webui-docs/src/ko/images/endpoint_revision_info.png b/packages/backend.ai-webui-docs/src/ko/images/endpoint_revision_info.png new file mode 100644 index 0000000000..66ce05e57d Binary files /dev/null and b/packages/backend.ai-webui-docs/src/ko/images/endpoint_revision_info.png differ diff --git a/packages/backend.ai-webui-docs/src/ko/images/endpoint_revision_mismatch.png b/packages/backend.ai-webui-docs/src/ko/images/endpoint_revision_mismatch.png new file mode 100644 index 0000000000..c3ace13d30 Binary files /dev/null and b/packages/backend.ai-webui-docs/src/ko/images/endpoint_revision_mismatch.png differ diff --git a/packages/backend.ai-webui-docs/src/ko/images/endpoint_service_ready_alert.png b/packages/backend.ai-webui-docs/src/ko/images/endpoint_service_ready_alert.png new file mode 100644 index 0000000000..2eefdd35ff Binary files /dev/null and b/packages/backend.ai-webui-docs/src/ko/images/endpoint_service_ready_alert.png differ diff --git a/packages/backend.ai-webui-docs/src/ko/images/service_launcher3.png b/packages/backend.ai-webui-docs/src/ko/images/service_launcher3.png index 62fd2fa3b1..6eeeecb0da 100644 Binary files a/packages/backend.ai-webui-docs/src/ko/images/service_launcher3.png and b/packages/backend.ai-webui-docs/src/ko/images/service_launcher3.png differ diff --git a/packages/backend.ai-webui-docs/src/ko/images/service_launcher_runtime_params.png b/packages/backend.ai-webui-docs/src/ko/images/service_launcher_runtime_params.png index 06e2386ced..2d6edcf0c3 100644 Binary files a/packages/backend.ai-webui-docs/src/ko/images/service_launcher_runtime_params.png and b/packages/backend.ai-webui-docs/src/ko/images/service_launcher_runtime_params.png differ diff --git a/packages/backend.ai-webui-docs/src/ko/model_serving/model_serving.md b/packages/backend.ai-webui-docs/src/ko/model_serving/model_serving.md index 56f71cac1b..6601649e57 100644 --- a/packages/backend.ai-webui-docs/src/ko/model_serving/model_serving.md +++ b/packages/backend.ai-webui-docs/src/ko/model_serving/model_serving.md @@ -437,6 +437,27 @@ vllm serve /models/my-model --tp 2 변경하지 않은 파라미터는 런타임 기본값이 사용됩니다. ::: +런타임 파라미터 외에도, `vLLM` 및 `SGLang` 런타임 변형은 서비스 런처의 **환경 변수** 섹션에서 특정 환경 변수를 제공합니다: + +- **vLLM**: `BACKEND_MODEL_NAME`, `VLLM_QUANTIZATION`, `VLLM_TP_SIZE` (텐서 병렬화), `VLLM_PP_SIZE` (파이프라인 병렬화), `VLLM_EXTRA_ARGS` (추가 CLI 인자) +- **SGLang**: `BACKEND_MODEL_NAME`, `SGLANG_QUANTIZATION`, `SGLANG_TP_SIZE` (텐서 병렬화), `SGLANG_PP_SIZE` (파이프라인 병렬화), `SGLANG_EXTRA_ARGS` (추가 CLI 인자) + +:::note +이러한 환경 변수는 런타임 파라미터 섹션이 아닌 런처의 **환경 변수** 섹션에 나타납니다. +각 런타임 변형에 특화된 추가 구성 옵션을 제공합니다. +::: + +#### 런타임 변형 비교 + +다음 표는 세 가지 주요 런타임 변형 간의 주요 차이점을 요약합니다: + +| 기능 | Custom | vLLM | SGLang | +|------|--------|------|--------| +| 런타임 파라미터 섹션 | 없음 | 있음 | 있음 | +| 명령어 입력 / 설정 파일 사용 전환 | 있음 | 없음 | 없음 | +| 환경 변수 프리셋 | 수동 입력만 | `VLLM_*` 프리셋 | `SGLANG_*` 프리셋 | +| 편집 시 폼 사전 채우기 | 있음 (최신 리비전 기준) | 없음 | 없음 | + #### 환경 및 리소스 레플리카 수를 설정하고 환경 및 자원 그룹을 선택합니다. @@ -518,9 +539,67 @@ vllm serve /models/my-model --tp 2 서비스 정보 카드에서 `Edit` 버튼을 클릭하면 업데이트 런처로 이동하여 서비스 설정을 수정할 수 있습니다. -:::warning -엔드포인트가 현재 선택된 프로젝트와 다른 프로젝트에 속하는 경우, -프로젝트 불일치 경고가 표시됩니다. 엔드포인트를 관리하려면 올바른 프로젝트로 전환하세요. +엔드포인트 상세 페이지는 서비스의 현재 상태에 따라 페이지 상단에 상황별 알림 배너를 표시합니다: + +- **서비스를 준비하고 있습니다**: 서비스가 배포 중이거나 상태 전환 중일 때 표시됩니다. 서비스가 아직 요청을 처리할 준비가 되지 않았음을 나타냅니다. + +![](../images/endpoint_preparing_alert.png) + + +- **서비스가 준비되었습니다**: 서비스 상태가 `HEALTHY`일 때 표시됩니다. 이 배너에는 LLM 채팅 테스트 인터페이스로의 바로가기를 제공하는 **채팅 시작** 버튼이 포함됩니다. + +![](../images/endpoint_service_ready_alert.png) + +- **이 모델 서비스는 다른 프로젝트에 속해 있습니다**: 엔드포인트가 현재 선택된 프로젝트와 다른 프로젝트에 속할 때 표시됩니다. 이 알림이 표시되는 동안 Edit 버튼은 비활성화됩니다. 알림의 **프로젝트 전환** 버튼을 클릭하여 올바른 프로젝트로 전환하고 엔드포인트를 관리할 수 있습니다. + + + +### 리비전 정보 + +:::note +리비전 정보 카드는 서버가 Model Card v2를 지원하는 경우 +(Backend.AI 버전 26.4.0 이상)에 사용할 수 있습니다. +::: + +엔드포인트 상세 페이지의 리비전 정보 카드는 **최신 리비전** — 다음에 적용될 예정인 리비전의 구성을 표시합니다. 이는 현재 서비스에서 실행 중인 리비전과 다를 수 있습니다. + +![](../images/endpoint_revision_info.png) + +카드에는 다음 필드가 표시됩니다: + +- **Revision ID**: 최신 리비전의 식별자입니다. +- **Model Name**: 모델 정의에서 정의된 모델 이름입니다. +- **Model Path**: 모델이 마운트된 경로입니다. +- **Start Command**: 추론 서버를 시작하는 데 사용되는 명령어입니다. +- **Port**: 모델 서비스를 위한 컨테이너 포트입니다. +- **Health Check Path**: 상태 확인을 위한 HTTP 엔드포인트 경로입니다. +- **Initial Delay**: 첫 번째 상태 확인 전 대기 시간(초)입니다. +- **Max Retries**: 허용되는 최대 연속 상태 확인 실패 횟수입니다. + +#### 리비전 불일치 상태 + +새 리비전이 대기열에 추가되었지만 서비스가 여전히 이전 리비전에서 실행 중인 경우, 리비전 정보 카드에 **"다음 리비전을 적용 중입니다."** 알림이 표시됩니다. 이는 카드에 표시된 최신 리비전 값이 현재 실행 중인 구성과 아직 일치하지 않음을 나타냅니다. + +![](../images/endpoint_revision_mismatch.png) + +**현재 리비전 보기** 버튼을 클릭하면 **현재 실행 중인** 리비전의 모델 정의를 보여주는 모달이 열립니다. 이를 통해 예정된 리비전(리비전 정보 카드에 표시됨)과 현재 활성 리비전(모달에 표시됨)을 비교할 수 있습니다. + +![](../images/endpoint_current_revision_modal.png) + +:::tip +요약하면: **리비전 정보 카드**는 항상 **최신/예정된** 리비전 값을 표시하고, +**현재 리비전 보기 모달**은 **현재 실행 중인** 리비전 값을 표시합니다. +::: + +#### 리비전을 활용한 편집 동작 (Custom 변형 전용) + +`Custom` 런타임 변형을 사용하는 서비스에서 서비스 정보 패널의 **Edit** 버튼을 클릭하면, 서비스 런처 폼에 최신 리비전의 모델 정의 값이 기본값으로 미리 채워집니다. 이를 통해 모든 필드를 다시 입력하지 않고도 설정을 점진적으로 조정할 수 있습니다. + +:::note +이 모델 정의 값의 사전 채우기 동작은 `Custom` 런타임 변형에만 적용됩니다. +`vLLM` 및 `SGLang` 변형은 모델 정의 필드를 사용하지 않으며, 대신 프레임워크별 설정을 위한 +**런타임 파라미터** 섹션(`inference_runtime_config`)을 제공합니다. +모델 정의와 런타임 파라미터는 리비전에 별도로 저장되는 서로 다른 개념입니다. ::: ### 자동 스케일링 규칙 @@ -645,10 +724,14 @@ Backend.AI 버전 26.4.0 이상에서는 Prometheus 메트릭 소스, 세그먼 - **Running / Finished**: 활성 라우트 노드와 완료된 라우트 노드 간 전환합니다. - **속성 필터**: 건강 상태 및 트래픽 상태로 필터링합니다. -`Sync Routes` 버튼을 클릭하여 라우트 정보를 백엔드와 동기화합니다. 라우트 노드를 클릭하면 세션 상세 드로어가 열리며, 개별 세션 세부 정보를 볼 수 있습니다. +라우트에 오류가 발생한 경우, 라우트 행의 오류 표시기를 클릭하면 해당 라우트의 원시 오류 데이터를 표시하는 JSON 뷰어 모달이 열립니다. 이는 개별 라우트 노드의 문제를 진단하는 데 유용합니다. + +![](../images/route_error_json_viewer.png) + + ### 서비스 수정 엔드포인트 상세 페이지에서 `Edit` 버튼을 클릭하여 모델 서비스를 수정합니다. 이전에 입력한 필드가 채워진 상태로 서비스 런처가 열립니다. 변경하려는 필드만 선택적으로 수정할 수 있습니다. 필드를 수정한 후 `Confirm`을 클릭하여 변경 사항을 적용합니다. diff --git a/packages/backend.ai-webui-docs/src/th/images/endpoint_current_revision_modal.png b/packages/backend.ai-webui-docs/src/th/images/endpoint_current_revision_modal.png new file mode 100644 index 0000000000..0e232a0ee2 Binary files /dev/null and b/packages/backend.ai-webui-docs/src/th/images/endpoint_current_revision_modal.png differ diff --git a/packages/backend.ai-webui-docs/src/th/images/endpoint_revision_info.png b/packages/backend.ai-webui-docs/src/th/images/endpoint_revision_info.png new file mode 100644 index 0000000000..66ce05e57d Binary files /dev/null and b/packages/backend.ai-webui-docs/src/th/images/endpoint_revision_info.png differ diff --git a/packages/backend.ai-webui-docs/src/th/images/endpoint_revision_mismatch.png b/packages/backend.ai-webui-docs/src/th/images/endpoint_revision_mismatch.png new file mode 100644 index 0000000000..c3ace13d30 Binary files /dev/null and b/packages/backend.ai-webui-docs/src/th/images/endpoint_revision_mismatch.png differ diff --git a/packages/backend.ai-webui-docs/src/th/images/endpoint_service_ready_alert.png b/packages/backend.ai-webui-docs/src/th/images/endpoint_service_ready_alert.png new file mode 100644 index 0000000000..2eefdd35ff Binary files /dev/null and b/packages/backend.ai-webui-docs/src/th/images/endpoint_service_ready_alert.png differ diff --git a/packages/backend.ai-webui-docs/src/th/images/service_launcher3.png b/packages/backend.ai-webui-docs/src/th/images/service_launcher3.png index 62fd2fa3b1..6eeeecb0da 100644 Binary files a/packages/backend.ai-webui-docs/src/th/images/service_launcher3.png and b/packages/backend.ai-webui-docs/src/th/images/service_launcher3.png differ diff --git a/packages/backend.ai-webui-docs/src/th/images/service_launcher_runtime_params.png b/packages/backend.ai-webui-docs/src/th/images/service_launcher_runtime_params.png index 06e2386ced..2d6edcf0c3 100644 Binary files a/packages/backend.ai-webui-docs/src/th/images/service_launcher_runtime_params.png and b/packages/backend.ai-webui-docs/src/th/images/service_launcher_runtime_params.png differ diff --git a/packages/backend.ai-webui-docs/src/th/model_serving/model_serving.md b/packages/backend.ai-webui-docs/src/th/model_serving/model_serving.md index 253b040dbf..a8cd5ea67f 100644 --- a/packages/backend.ai-webui-docs/src/th/model_serving/model_serving.md +++ b/packages/backend.ai-webui-docs/src/th/model_serving/model_serving.md @@ -423,6 +423,16 @@ vllm serve /models/my-model --tp 2 พารามิเตอร์ที่ไม่เปลี่ยนแปลงจะใช้ค่าเริ่มต้นของ runtime ::: +นอกจากพารามิเตอร์ runtime แล้ว ตัวแปร runtime `vLLM` และ `SGLang` ยังให้ตัวแปรสภาพแวดล้อมเฉพาะในส่วน **ตัวแปรสภาพแวดล้อม** ของตัวเปิดใช้บริการ: + +- **vLLM**: `BACKEND_MODEL_NAME`, `VLLM_QUANTIZATION`, `VLLM_TP_SIZE` (tensor parallelism), `VLLM_PP_SIZE` (pipeline parallelism), `VLLM_EXTRA_ARGS` (อาร์กิวเมนต์ CLI เพิ่มเติม) +- **SGLang**: `BACKEND_MODEL_NAME`, `SGLANG_QUANTIZATION`, `SGLANG_TP_SIZE` (tensor parallelism), `SGLANG_PP_SIZE` (pipeline parallelism), `SGLANG_EXTRA_ARGS` (อาร์กิวเมนต์ CLI เพิ่มเติม) + +:::note +ตัวแปรสภาพแวดล้อมเหล่านี้จะปรากฏในส่วน **ตัวแปรสภาพแวดล้อม** ของตัวเปิดใช้บริการ +ไม่ใช่ในส่วนพารามิเตอร์ Runtime โดยให้ตัวเลือกการกำหนดค่าเพิ่มเติมที่เฉพาะเจาะจงสำหรับแต่ละตัวแปร runtime +::: + #### สภาพแวดล้อมและทรัพยากร ตั้งค่าจำนวนเรพลิกาและเลือกสภาพแวดล้อมและกลุ่มทรัพยากร @@ -504,9 +514,67 @@ vllm serve /models/my-model --tp 2 คลิกปุ่ม `Edit` บนการ์ดข้อมูลบริการเพื่อไปยังตัวเปิดใช้อัปเดตและแก้ไขการตั้งค่าบริการ -:::warning -หาก endpoint เป็นของโปรเจกต์อื่นที่ไม่ใช่โปรเจกต์ที่เลือกในปัจจุบัน -จะแสดงคำเตือนการไม่ตรงกันของโปรเจกต์ สลับไปยังโปรเจกต์ที่ถูกต้องเพื่อจัดการ endpoint +หน้ารายละเอียด Endpoint แสดงแบนเนอร์แจ้งเตือนตามบริบทที่ด้านบนของหน้า ขึ้นอยู่กับสถานะปัจจุบันของบริการ: + +- **กำลังเตรียมบริการของคุณ**: แสดงเมื่อบริการกำลังถูก deploy หรืออยู่ระหว่างการเปลี่ยนสถานะ บ่งบอกว่าบริการยังไม่พร้อมรับคำขอ + +![](../images/endpoint_preparing_alert.png) + + +- **บริการพร้อมแล้ว**: แสดงเมื่อสถานะบริการเป็น `HEALTHY` แบนเนอร์นี้มีปุ่ม **เริ่มแชท** ที่ให้ทางลัดไปยังอินเทอร์เฟซ LLM Chat Test + +![](../images/endpoint_service_ready_alert.png) + +- **บริการโมเดลนี้อยู่ในโปรเจกต์อื่น**: แสดงเมื่อ endpoint เป็นของโปรเจกต์อื่นที่ไม่ใช่โปรเจกต์ที่เลือกในปัจจุบัน ปุ่ม Edit จะถูกปิดใช้งานขณะที่การแจ้งเตือนนี้แสดงอยู่ คลิกปุ่ม **สลับโปรเจกต์** ในการแจ้งเตือนเพื่อสลับไปยังโปรเจกต์ที่ถูกต้อง + + + +### ข้อมูลรีวิชัน + +:::note +การ์ดข้อมูลรีวิชันจะใช้งานได้เมื่อเซิร์ฟเวอร์รองรับ Model Card v2 +(Backend.AI เวอร์ชัน 26.4.0 ขึ้นไป) +::: + +การ์ดข้อมูลรีวิชันบนหน้ารายละเอียด Endpoint จะแสดงการกำหนดค่าของ **รีวิชันล่าสุด** — รีวิชันที่อยู่ในคิวเพื่อนำไปใช้ถัดไป ซึ่งอาจแตกต่างจากรีวิชันที่กำลังทำงานอยู่บนบริการในปัจจุบัน + +![](../images/endpoint_revision_info.png) + +การ์ดจะแสดงฟิลด์ต่อไปนี้: + +- **Revision ID**: ตัวระบุของรีวิชันล่าสุด +- **Model Name**: ชื่อโมเดลตามที่กำหนดในคำจำกัดความโมเดล +- **Model Path**: เส้นทางที่โมเดลถูกเมาต์ +- **Start Command**: คำสั่งที่ใช้เริ่มเซิร์ฟเวอร์ inference +- **Port**: พอร์ตคอนเทนเนอร์สำหรับบริการโมเดล +- **Health Check Path**: เส้นทาง HTTP endpoint สำหรับการตรวจสอบสุขภาพ +- **Initial Delay**: วินาทีที่ต้องรอก่อนการตรวจสอบสุขภาพครั้งแรก +- **Max Retries**: จำนวนสูงสุดของการตรวจสอบสุขภาพที่ล้มเหลวต่อเนื่องที่อนุญาต + +#### สถานะรีวิชันไม่ตรงกัน + +เมื่อมีรีวิชันใหม่ถูกเพิ่มในคิวแต่บริการยังคงทำงานบนรีวิชันก่อนหน้า การ์ดข้อมูลรีวิชันจะแสดงการแจ้งเตือน **"กำลังใช้งานรีวิชันถัดไป"** สิ่งนี้บ่งบอกว่าค่ารีวิชันล่าสุดที่แสดงในการ์ดยังไม่ตรงกับการกำหนดค่าที่กำลังทำงานอยู่ + +![](../images/endpoint_revision_mismatch.png) + +คลิกปุ่ม **ดูรีวิชันปัจจุบัน** เพื่อเปิดโมดอลที่แสดงคำจำกัดความโมเดลของรีวิชันที่ **กำลังทำงานอยู่ในปัจจุบัน** ทำให้คุณสามารถเปรียบเทียบรีวิชันที่กำลังจะมา (แสดงในการ์ดข้อมูลรีวิชัน) กับรีวิชันที่ใช้งานอยู่ (แสดงในโมดอล) + +![](../images/endpoint_current_revision_modal.png) + +:::tip +สรุป: **การ์ดข้อมูลรีวิชัน** จะแสดง **ค่ารีวิชันล่าสุด/ที่กำลังจะมา** เสมอ +ขณะที่ **โมดอลดูรีวิชันปัจจุบัน** จะแสดง **ค่ารีวิชันที่กำลังทำงานอยู่ในปัจจุบัน** +::: + +#### พฤติกรรมการแก้ไขพร้อมรีวิชัน (เฉพาะ Custom เท่านั้น) + +เมื่อคุณคลิกปุ่ม **Edit** บนแผงข้อมูลบริการสำหรับบริการที่ใช้ตัวแปร runtime `Custom` ฟอร์มตัวเปิดใช้บริการจะถูกกรอกล่วงหน้าด้วยค่าคำจำกัดความโมเดลของรีวิชันล่าสุดเป็นค่าเริ่มต้น ทำให้ง่ายต่อการปรับการตั้งค่าทีละน้อยโดยไม่ต้องกรอกฟิลด์ทั้งหมดใหม่ + +:::note +พฤติกรรมการกรอกล่วงหน้าของค่าการกำหนดโมเดลนี้ใช้ได้เฉพาะกับตัวแปร runtime `Custom` เท่านั้น +ตัวแปร `vLLM` และ `SGLang` ไม่ใช้ฟิลด์การกำหนดโมเดล แต่จะแสดงส่วน **พารามิเตอร์ Runtime** +(`inference_runtime_config`) สำหรับการกำหนดค่าเฉพาะเฟรมเวิร์ก +การกำหนดโมเดลและพารามิเตอร์ runtime เป็นแนวคิดที่แตกต่างกันและจัดเก็บแยกกันในรีวิชัน ::: ### กฎการปรับขนาดอัตโนมัติ @@ -630,10 +698,14 @@ vllm serve /models/my-model --tp 2 - **Running / Finished**: สลับระหว่างโหนดเส้นทางที่ใช้งานอยู่และที่เสร็จสิ้น - **ตัวกรองคุณสมบัติ**: กรองตามสถานะสุขภาพและสถานะการรับส่งข้อมูล -คลิกปุ่ม `Sync Routes` เพื่อซิงโครไนซ์ข้อมูลเส้นทางกับแบ็กเอนด์ คลิกโหนดเส้นทางเพื่อเปิดลิ้นชักรายละเอียดเซสชัน ซึ่งคุณสามารถดูรายละเอียดเซสชันแต่ละรายการ +หากเส้นทางพบข้อผิดพลาด การคลิกตัวบ่งชี้ข้อผิดพลาดบนแถวเส้นทางจะเปิดโมดอล JSON viewer ที่แสดงข้อมูลข้อผิดพลาดดิบสำหรับเส้นทางนั้น สิ่งนี้มีประโยชน์สำหรับการวินิจฉัยปัญหาของโหนดเส้นทางแต่ละรายการ + +![](../images/route_error_json_viewer.png) + + ### การแก้ไขบริการ คลิกปุ่ม `Edit` บนหน้ารายละเอียด endpoint เพื่อแก้ไขบริการโมเดล ตัวเปิดใช้บริการจะเปิดขึ้นพร้อมฟิลด์ที่ป้อนไว้ก่อนหน้านี้ คุณสามารถเลือกแก้ไขเฉพาะฟิลด์ที่คุณต้องการเปลี่ยนแปลง หลังจากแก้ไขฟิลด์แล้ว ให้คลิก `Confirm` เพื่อใช้การเปลี่ยนแปลง