Skip to content

Commit ef6302b

Browse files
seedspiritclaude
andauthored
feat(BA-6644): Add domain id, resource group id in session row (#12452)
Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
1 parent a2c828b commit ef6302b

79 files changed

Lines changed: 1603 additions & 490 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

changes/12452.feature.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add domain_id and resource_group_id columns to session rows and populate them at session creation

src/ai/backend/manager/data/session/creation.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from uuid import UUID
1212

1313
from ai.backend.common.docker import ImageRef
14+
from ai.backend.common.identifier.domain import DomainID
15+
from ai.backend.common.identifier.resource_group import ResourceGroupID
1416
from ai.backend.common.types import AccessKey
1517

1618

@@ -62,7 +64,9 @@ class DeploymentContext:
6264
created_user: UserContext
6365
session_owner: UserContext
6466
container_user: ContainerUserContext
67+
domain_id: DomainID
6568
group_id: UUID
69+
resource_group_id: ResourceGroupID
6670
resource_policy: dict[str, Any]
6771
image: ImageContext
6872
resolved_presets: ResolvedPresetValues | None = None

src/ai/backend/manager/data/session/draft.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@
3737
import yarl
3838
from pydantic import ConfigDict, Field
3939

40-
from ai.backend.common.identifier.domain import DomainName
40+
from ai.backend.common.identifier.domain import DomainID, DomainName
4141
from ai.backend.common.identifier.image import ImageID
4242
from ai.backend.common.identifier.project import ProjectID
43-
from ai.backend.common.identifier.resource_group import ResourceGroupName
43+
from ai.backend.common.identifier.resource_group import ResourceGroupID, ResourceGroupName
4444
from ai.backend.common.identifier.session import SessionID
4545
from ai.backend.common.types import (
4646
AccessKey,
@@ -179,8 +179,10 @@ class SessionNetworkDraft(_DraftBaseModel):
179179
class SessionScopeDraft(_DraftBaseModel):
180180
"""Optional-heavy mirror of ``SessionScope``."""
181181

182+
domain_id: DomainID | None = None
182183
domain_name: DomainName | None = None
183184
project_id: ProjectID | None = None
185+
resource_group_id: ResourceGroupID | None = None
184186
resource_group_name: ResourceGroupName | None = None
185187

186188

src/ai/backend/manager/data/session/spec.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424
from pydantic import ConfigDict, Field
2525

2626
from ai.backend.common.exception import BackendAIError
27-
from ai.backend.common.identifier.domain import DomainName
27+
from ai.backend.common.identifier.domain import DomainID, DomainName
2828
from ai.backend.common.identifier.project import ProjectID
29-
from ai.backend.common.identifier.resource_group import ResourceGroupName
29+
from ai.backend.common.identifier.resource_group import ResourceGroupID, ResourceGroupName
3030
from ai.backend.common.identifier.session import SessionID
3131
from ai.backend.common.types import (
3232
AccessKey,
@@ -69,8 +69,10 @@ class SessionIdentity(_SpecBaseModel):
6969
class SessionScope(_SpecBaseModel):
7070
"""Ownership / placement scope of the session."""
7171

72+
domain_id: DomainID
7273
domain_name: DomainName
7374
project_id: ProjectID
75+
resource_group_id: ResourceGroupID
7476
resource_group_name: ResourceGroupName
7577

7678

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""add session scope id columns
2+
3+
Revision ID: ada41cb881bb
4+
Revises: a379b72f1206
5+
Create Date: 2026-06-30
6+
7+
"""
8+
9+
import sqlalchemy as sa
10+
from alembic import op
11+
12+
from ai.backend.manager.models.base import GUID
13+
14+
# revision identifiers, used by Alembic.
15+
revision = "ada41cb881bb"
16+
down_revision = "a379b72f1206"
17+
# Part of: 26.7.0
18+
branch_labels = None
19+
depends_on = None
20+
21+
22+
def upgrade() -> None:
23+
op.add_column("sessions", sa.Column("domain_id", GUID(), nullable=True))
24+
op.add_column("sessions", sa.Column("resource_group_id", GUID(), nullable=True))
25+
op.create_index("ix_sessions_domain_id", "sessions", ["domain_id"])
26+
op.create_index("ix_sessions_resource_group_id", "sessions", ["resource_group_id"])
27+
28+
op.execute(
29+
sa.text("""
30+
UPDATE sessions
31+
SET domain_id = domains.id
32+
FROM domains
33+
WHERE sessions.domain_name = domains.name
34+
AND sessions.domain_id IS NULL
35+
""")
36+
)
37+
op.execute(
38+
sa.text("""
39+
UPDATE sessions
40+
SET resource_group_id = scaling_groups.id
41+
FROM scaling_groups
42+
WHERE sessions.scaling_group_name = scaling_groups.name
43+
AND sessions.resource_group_id IS NULL
44+
""")
45+
)
46+
op.alter_column("sessions", "domain_id", nullable=False)
47+
op.alter_column("sessions", "resource_group_id", nullable=False)
48+
op.create_foreign_key(
49+
op.f("fk_sessions_domain_id_domains"),
50+
"sessions",
51+
"domains",
52+
["domain_id"],
53+
["id"],
54+
)
55+
op.create_foreign_key(
56+
op.f("fk_sessions_resource_group_id_scaling_groups"),
57+
"sessions",
58+
"scaling_groups",
59+
["resource_group_id"],
60+
["id"],
61+
)
62+
63+
64+
def downgrade() -> None:
65+
op.drop_constraint(
66+
op.f("fk_sessions_resource_group_id_scaling_groups"),
67+
"sessions",
68+
type_="foreignkey",
69+
)
70+
op.drop_constraint(op.f("fk_sessions_domain_id_domains"), "sessions", type_="foreignkey")
71+
op.drop_index("ix_sessions_resource_group_id", table_name="sessions")
72+
op.drop_index("ix_sessions_domain_id", table_name="sessions")
73+
op.drop_column("sessions", "resource_group_id")
74+
op.drop_column("sessions", "domain_id")

src/ai/backend/manager/models/domain/row.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,11 @@ class DomainRow(Base): # type: ignore[misc]
135135
"dotfiles", sa.LargeBinary(length=MAXIMUM_DOTFILE_SIZE), nullable=False, default=b"\x90"
136136
)
137137

138-
sessions: Mapped[list[SessionRow]] = relationship("SessionRow", back_populates="domain")
138+
sessions: Mapped[list[SessionRow]] = relationship(
139+
"SessionRow",
140+
back_populates="domain",
141+
foreign_keys="[SessionRow.domain_name]",
142+
)
139143
users: Mapped[list[UserRow]] = relationship("UserRow", back_populates="domain")
140144
groups: Mapped[list[GroupRow]] = relationship("GroupRow", back_populates="domain")
141145
sgroup_for_domains_rows: Mapped[list[ScalingGroupForDomainRow]] = relationship(

src/ai/backend/manager/models/scaling_group/row.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,11 @@ class ScalingGroupRow(Base): # type: ignore[misc]
330330
default=DefaultSessionOptions,
331331
)
332332

333-
sessions: Mapped[list[SessionRow]] = relationship("SessionRow", back_populates="scaling_group")
333+
sessions: Mapped[list[SessionRow]] = relationship(
334+
"SessionRow",
335+
back_populates="scaling_group",
336+
foreign_keys="[SessionRow.scaling_group_name]",
337+
)
334338
agents: Mapped[list[AgentRow]] = relationship("AgentRow", back_populates="scaling_group_row")
335339

336340
sgroup_for_domains_rows: Mapped[list[ScalingGroupForDomainRow]] = relationship(

src/ai/backend/manager/models/session/row.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939

4040
from ai.backend.common.defs.session import SESSION_PRIORITY_DEFAULT
4141
from ai.backend.common.exception import BackendAIError
42+
from ai.backend.common.identifier.domain import DomainID
43+
from ai.backend.common.identifier.resource_group import ResourceGroupID
4244
from ai.backend.common.types import (
4345
AccessKey,
4446
ClusterMode,
@@ -453,11 +455,20 @@ class SessionRow(Base): # type: ignore[misc]
453455
kernels: Mapped[list[KernelRow]] = relationship("KernelRow", back_populates="session")
454456

455457
# Resource ownership
458+
resource_group_id: Mapped[ResourceGroupID] = mapped_column(
459+
"resource_group_id",
460+
GUID(ResourceGroupID),
461+
sa.ForeignKey("scaling_groups.id"),
462+
index=True,
463+
nullable=False,
464+
)
456465
scaling_group_name: Mapped[str] = mapped_column(
457466
"scaling_group_name", sa.ForeignKey("scaling_groups.name"), index=True, nullable=False
458467
)
459468
scaling_group: Mapped[ScalingGroupRow] = relationship(
460-
"ScalingGroupRow", back_populates="sessions"
469+
"ScalingGroupRow",
470+
back_populates="sessions",
471+
foreign_keys=[scaling_group_name],
461472
)
462473
target_sgroup_names: Mapped[list[str] | None] = mapped_column(
463474
"target_sgroup_names",
@@ -469,7 +480,18 @@ class SessionRow(Base): # type: ignore[misc]
469480
domain_name: Mapped[str] = mapped_column(
470481
"domain_name", sa.String(length=64), sa.ForeignKey("domains.name"), nullable=False
471482
)
472-
domain: Mapped[DomainRow] = relationship("DomainRow", back_populates="sessions")
483+
domain_id: Mapped[DomainID] = mapped_column(
484+
"domain_id",
485+
GUID(DomainID),
486+
sa.ForeignKey("domains.id"),
487+
index=True,
488+
nullable=False,
489+
)
490+
domain: Mapped[DomainRow] = relationship(
491+
"DomainRow",
492+
back_populates="sessions",
493+
foreign_keys=[domain_name],
494+
)
473495
group_id: Mapped[UUID] = mapped_column(
474496
"group_id", GUID, sa.ForeignKey("groups.id"), nullable=False
475497
)

src/ai/backend/manager/registry.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1119,8 +1119,13 @@ def _build_execution_spec(
11191119
if not groups_by_role:
11201120
raise InvalidAPIParameters("No kernel groups resolved from the enqueue request.")
11211121

1122+
domain_name = DomainName(user_scope.domain_name)
1123+
domain_id = await self._scheduler_repository.get_domain_id_by_name(domain_name)
11221124
if scaling_group:
11231125
resource_group_name = ResourceGroupName(scaling_group)
1126+
resource_group_id = await self._scheduler_repository.get_resource_group_id_by_name(
1127+
resource_group_name
1128+
)
11241129
else:
11251130
resource_group_id = await self._scheduler_repository.pick_default_resource_group(
11261131
access_key=access_key,
@@ -1140,8 +1145,10 @@ def _build_execution_spec(
11401145
user_uuid=user_scope.user_uuid,
11411146
),
11421147
scope=SessionScopeDraft(
1143-
domain_name=DomainName(user_scope.domain_name),
1148+
domain_id=domain_id,
1149+
domain_name=domain_name,
11441150
project_id=ProjectID(user_scope.group_id),
1151+
resource_group_id=resource_group_id,
11451152
resource_group_name=resource_group_name,
11461153
),
11471154
classification=SessionClassificationDraft(

src/ai/backend/manager/repositories/deployment/db_source/db_source.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,11 @@
2929
from ai.backend.common.identifier.deployment import DeploymentID
3030
from ai.backend.common.identifier.deployment_preset import DeploymentPresetID
3131
from ai.backend.common.identifier.deployment_revision import DeploymentRevisionID
32+
from ai.backend.common.identifier.domain import DomainID
3233
from ai.backend.common.identifier.image import ImageID
3334
from ai.backend.common.identifier.replica import ReplicaID
3435
from ai.backend.common.identifier.replica_group import ReplicaGroupID
35-
from ai.backend.common.identifier.resource_group import ResourceGroupName
36+
from ai.backend.common.identifier.resource_group import ResourceGroupID, ResourceGroupName
3637
from ai.backend.common.identifier.runtime_variant import RuntimeVariantID
3738
from ai.backend.common.identifier.vfolder import VFolderUUID
3839
from ai.backend.common.types import (
@@ -105,6 +106,7 @@
105106
UserNotFoundInDeployment,
106107
)
107108
from ai.backend.manager.errors.resource import (
109+
DomainNotFound,
108110
ProjectNotFound,
109111
RuntimeVariantNotFound,
110112
ScalingGroupNotFound,
@@ -124,6 +126,7 @@
124126
from ai.backend.manager.models.deployment_revision_preset.row import (
125127
DeploymentRevisionPresetRow,
126128
)
129+
from ai.backend.manager.models.domain import DomainRow
127130
from ai.backend.manager.models.endpoint import (
128131
EndpointAutoScalingRuleRow,
129132
EndpointRow,
@@ -2099,6 +2102,29 @@ async def _resolve_user_and_active_access_key(
20992102
raise UserNotFoundInDeployment(f"{user_uuid} not found")
21002103
raise NoActiveKeypairForDeployment(f"{user_uuid} has no active keypair")
21012104

2105+
async def _resolve_deployment_scope_ids(
2106+
self,
2107+
db_sess: SASession,
2108+
deployment_info: DeploymentInfo,
2109+
) -> tuple[DomainID, ResourceGroupID]:
2110+
domain_id = await db_sess.scalar(
2111+
sa.select(DomainRow.id).where(DomainRow.name == deployment_info.metadata.domain)
2112+
)
2113+
if domain_id is None:
2114+
raise DomainNotFound(deployment_info.metadata.domain)
2115+
2116+
resource_group_id = await db_sess.scalar(
2117+
sa.select(ScalingGroupRow.id).where(
2118+
ScalingGroupRow.name == deployment_info.metadata.resource_group
2119+
)
2120+
)
2121+
if resource_group_id is None:
2122+
raise ScalingGroupNotFound(
2123+
f"Resource group {deployment_info.metadata.resource_group!r} not found"
2124+
)
2125+
2126+
return DomainID(domain_id), ResourceGroupID(resource_group_id)
2127+
21022128
async def fetch_deployment_context(
21032129
self,
21042130
deployment_info: DeploymentInfo,
@@ -2147,6 +2173,10 @@ async def fetch_deployment_context(
21472173
group_id = user_info.group_id
21482174
resource_policy = user_info.resource_policy
21492175

2176+
domain_id, resource_group_id = await self._resolve_deployment_scope_ids(
2177+
db_sess, deployment_info
2178+
)
2179+
21502180
revision_query = (
21512181
sa.select(DeploymentRevisionRow)
21522182
.where(DeploymentRevisionRow.id == revision_id)
@@ -2212,7 +2242,9 @@ async def fetch_deployment_context(
22122242
main_gid=session_owner_user.container_main_gid,
22132243
supplementary_gids=session_owner_user.container_gids or [],
22142244
),
2245+
domain_id=domain_id,
22152246
group_id=group_id,
2247+
resource_group_id=resource_group_id,
22162248
resource_policy=dict(resource_policy),
22172249
image=ImageContext(
22182250
ref=image_row.image_ref,

0 commit comments

Comments
 (0)