Skip to content

Commit 245f2c4

Browse files
nlydickCopilot
andcommitted
feat(ai-foundry): add foundry jobs and mlflow
Add the Foundry jobs TypeSpec surface, generated OpenAPI snapshots, and examples. Model training creation now submits with POST /jobs and Operation-Id idempotency, while SDK customization keeps the convenience operation under beta models. Expose job attempts, artifacts, metrics, and outputs under /jobs, including attempt-pinned views. Add MLflow tracking and registry compatibility routes under /mlflow/api/2.0/mlflow. Authored-by: GitHub Copilot for VS Code 0.45.1 Model: GitHub Copilot Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent bb7b5d8 commit 245f2c4

20 files changed

Lines changed: 16532 additions & 885 deletions

specification/ai-foundry/data-plane/Foundry/client.tsp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import "./src/connections/routes.tsp";
1212
import "./src/datasets/routes.tsp";
1313
import "./src/deployments/routes.tsp";
1414
import "./src/indexes/routes.tsp";
15+
import "./src/mlflow/routes.tsp";
1516

1617
// Beta operations (preview routes, requiring opt-in request header)
1718
import "./src/evaluation-taxonomies/routes.tsp";
@@ -21,6 +22,7 @@ import "./src/memory-stores/routes.tsp";
2122
import "./src/red-teams/routes.tsp";
2223
import "./src/schedules/routes.tsp";
2324
import "./src/toolsets/routes.tsp";
25+
import "./src/jobs/routes.tsp";
2426

2527
// Create a ".beta" subclient for the above beta operations
2628
import "./relocate-beta-operations.tsp";
@@ -236,6 +238,25 @@ using Azure.AI.Projects;
236238
// Less generic names
237239
@@clientName(Sku, "ModelDeploymentSku");
238240

241+
@@clientName(Azure.AI.Projects.Jobs.get, "get", "python");
242+
@@clientName(Azure.AI.Projects.Jobs.list, "list", "python");
243+
@@clientName(Azure.AI.Projects.Jobs.createTrainingJob,
244+
"create_training_job",
245+
"python"
246+
);
247+
@@clientName(Azure.AI.Projects.Jobs.createTrainingJob,
248+
"createTrainingJob",
249+
"javascript"
250+
);
251+
@@clientName(Azure.AI.Projects.Jobs.beginDelete,
252+
"begin_delete",
253+
"python"
254+
);
255+
@@clientName(Azure.AI.Projects.Jobs.beginCancel,
256+
"begin_cancel",
257+
"python"
258+
);
259+
239260
// --------------------------------------------------------------------------------
240261
// To support custom client-side handling of "opt-in" to preview features.
241262
// --------------------------------------------------------------------------------
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"title": "Jobs_BeginCancel_MaximumSet",
3+
"operationId": "Jobs_BeginCancel",
4+
"parameters": {
5+
"api-version": "v1",
6+
"name": "my_training_job_01",
7+
"x-ms-client-request-id": "cf35b680-dc80-4815-ab83-9364acc3bce6",
8+
"Foundry-Features": "Jobs=V1Preview"
9+
},
10+
"responses": {
11+
"202": {
12+
"headers": {
13+
"Location": "https://management.azure.com/operations/cancel-operation-id",
14+
"Retry-After": "10"
15+
}
16+
}
17+
}
18+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"title": "Jobs_BeginCancel_MinimumSet",
3+
"operationId": "Jobs_BeginCancel",
4+
"parameters": {
5+
"api-version": "v1",
6+
"name": "my_training_job_01"
7+
},
8+
"responses": {
9+
"200": {}
10+
}
11+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"title": "Jobs_BeginDelete_MaximumSet",
3+
"operationId": "Jobs_BeginDelete",
4+
"parameters": {
5+
"api-version": "v1",
6+
"name": "my_training_job_01",
7+
"x-ms-client-request-id": "cf35b680-dc80-4815-ab83-9364acc3bce6",
8+
"Foundry-Features": "Jobs=V1Preview"
9+
},
10+
"responses": {
11+
"202": {
12+
"headers": {
13+
"Location": "https://management.azure.com/operations/delete-operation-id",
14+
"Retry-After": "10",
15+
"x-ms-async-operation-timeout": "PT1H"
16+
}
17+
}
18+
}
19+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"title": "Jobs_BeginDelete_MinimumSet",
3+
"operationId": "Jobs_BeginDelete",
4+
"parameters": {
5+
"api-version": "v1",
6+
"name": "my_training_job_01"
7+
},
8+
"responses": {
9+
"204": {}
10+
}
11+
}
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
{
2+
"title": "Jobs_CreateTrainingJob_MaximumSet",
3+
"operationId": "Jobs_CreateTrainingJob",
4+
"parameters": {
5+
"api-version": "v1",
6+
"Operation-Id": "create-my-training-job-01",
7+
"Foundry-Features": "Jobs=V1Preview",
8+
"job": {
9+
"name": "my_training_job_01",
10+
"properties": {
11+
"jobType": "Command",
12+
"command": "python train.py --data ${{inputs.training_data}} --output ${{outputs.model}}",
13+
"environmentImageReference": "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04:latest",
14+
"displayName": "My Training Job",
15+
"description": "A sample command job for training",
16+
"tags": {
17+
"framework": "pytorch"
18+
},
19+
"properties": {
20+
"experimentName": "my-experiment"
21+
},
22+
"codeId": "azureai:my-training-code:1",
23+
"computeId": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/my-rg/providers/Microsoft.MachineLearningServices/workspaces/my-ws/computes/my-cluster",
24+
"inputs": {
25+
"training_data": {
26+
"jobInputType": "uri_folder",
27+
"uri": "azureai://datastores/mystore/paths/data/train",
28+
"mode": "ReadOnlyMount"
29+
},
30+
"learning_rate": {
31+
"jobInputType": "literal",
32+
"value": "0.001"
33+
}
34+
},
35+
"outputs": {
36+
"model": {
37+
"jobOutputType": "uri_folder",
38+
"mode": "ReadWriteMount",
39+
"assetName": "my-trained-model",
40+
"assetVersion": "1",
41+
"uri": "azureai://datastores/workspaceblobstore/paths/outputs/model",
42+
"baseModelId": "azureai:qwen-base-model:1",
43+
"description": "Trained model output"
44+
}
45+
},
46+
"environmentVariables": {
47+
"AZUREML_DATASET_FILE": "/mnt/data/train.csv"
48+
},
49+
"distribution": {
50+
"distributionType": "PyTorch",
51+
"processCountPerInstance": 4
52+
},
53+
"resources": {
54+
"instanceCount": 2,
55+
"instanceType": "Standard_NC6s_v3",
56+
"shmSize": "2g",
57+
"dockerArgs": "--privileged"
58+
},
59+
"limits": {
60+
"jobLimitsType": "Command",
61+
"timeout": "PT2H30M"
62+
},
63+
"services": {
64+
"Studio": {
65+
"jobServiceType": "Studio",
66+
"port": 8080,
67+
"endpoint": "https://studio.example.com"
68+
},
69+
"Tracking": {
70+
"jobServiceType": "Tracking"
71+
}
72+
},
73+
"queueSettings": {
74+
"jobTier": "Standard"
75+
},
76+
"userAssignedIdentityId": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/my-rg/providers/Microsoft.ManagedIdentity/userAssignedIdentities/my-identity",
77+
"isArchived": false
78+
}
79+
}
80+
},
81+
"responses": {
82+
"201": {
83+
"body": {
84+
"id": "my_training_job_01",
85+
"name": "my_training_job_01",
86+
"type": "Microsoft.MachineLearningServices/workspaces/jobs",
87+
"properties": {
88+
"jobType": "Command",
89+
"command": "python train.py --data ${{inputs.training_data}} --output ${{outputs.model}}",
90+
"environmentImageReference": "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04:latest",
91+
"displayName": "My Training Job",
92+
"description": "A sample command job for training",
93+
"tags": {
94+
"framework": "pytorch"
95+
},
96+
"properties": {
97+
"experimentName": "my-experiment"
98+
},
99+
"codeId": "azureai:my-training-code:1",
100+
"computeId": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/my-rg/providers/Microsoft.MachineLearningServices/workspaces/my-ws/computes/my-cluster",
101+
"inputs": {
102+
"training_data": {
103+
"jobInputType": "uri_folder",
104+
"uri": "azureai://datastores/mystore/paths/data/train",
105+
"mode": "ReadOnlyMount"
106+
},
107+
"learning_rate": {
108+
"jobInputType": "literal",
109+
"value": "0.001"
110+
}
111+
},
112+
"outputs": {
113+
"model": {
114+
"jobOutputType": "uri_folder",
115+
"mode": "ReadWriteMount",
116+
"assetName": "my-trained-model",
117+
"assetVersion": "1",
118+
"uri": "azureai://datastores/workspaceblobstore/paths/outputs/model",
119+
"baseModelId": "azureai:qwen-base-model:1",
120+
"description": "Trained model output"
121+
}
122+
},
123+
"environmentVariables": {
124+
"AZUREML_DATASET_FILE": "/mnt/data/train.csv"
125+
},
126+
"distribution": {
127+
"distributionType": "PyTorch",
128+
"processCountPerInstance": 4
129+
},
130+
"resources": {
131+
"instanceCount": 2,
132+
"instanceType": "Standard_NC6s_v3",
133+
"shmSize": "2g",
134+
"dockerArgs": "--privileged"
135+
},
136+
"limits": {
137+
"jobLimitsType": "Command",
138+
"timeout": "PT2H30M"
139+
},
140+
"services": {
141+
"Studio": {
142+
"jobServiceType": "Studio",
143+
"port": 8080,
144+
"endpoint": "https://studio.example.com"
145+
},
146+
"Tracking": {
147+
"jobServiceType": "Tracking"
148+
}
149+
},
150+
"queueSettings": {
151+
"jobTier": "Standard"
152+
},
153+
"userAssignedIdentityId": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/my-rg/providers/Microsoft.ManagedIdentity/userAssignedIdentities/my-identity",
154+
"status": "NotStarted",
155+
"isArchived": false
156+
},
157+
"systemData": {
158+
"createdAt": "2020-01-01T12:34:56.999Z",
159+
"createdBy": "user@example.com",
160+
"createdByType": "User",
161+
"lastModifiedAt": "2020-01-01T12:34:56.999Z",
162+
"lastModifiedBy": "user@example.com",
163+
"lastModifiedByType": "User"
164+
}
165+
}
166+
}
167+
}
168+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"title": "Jobs_CreateTrainingJob_MinimumSet",
3+
"operationId": "Jobs_CreateTrainingJob",
4+
"parameters": {
5+
"api-version": "v1",
6+
"Operation-Id": "create-my-training-job-01",
7+
"Foundry-Features": "Jobs=V1Preview",
8+
"job": {
9+
"properties": {
10+
"jobType": "Command",
11+
"command": "python train.py",
12+
"environmentImageReference": "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04:latest",
13+
"computeId": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/my-rg/providers/Microsoft.MachineLearningServices/workspaces/my-ws/computes/my-cluster"
14+
}
15+
}
16+
},
17+
"responses": {
18+
"201": {
19+
"body": {
20+
"name": "my_training_job_01",
21+
"properties": {
22+
"jobType": "Command",
23+
"command": "python train.py",
24+
"environmentImageReference": "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04:latest",
25+
"computeId": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/my-rg/providers/Microsoft.MachineLearningServices/workspaces/my-ws/computes/my-cluster"
26+
}
27+
}
28+
}
29+
}
30+
}

0 commit comments

Comments
 (0)