Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
245f2c4
feat(ai-foundry): add foundry jobs and mlflow
nlydick Apr 30, 2026
aff30b5
fix(ai-foundry): sync training job surface
nlydick May 5, 2026
bf5259c
feat(foundry): model job run compute details
nlydick May 5, 2026
9618895
chore(foundry): update generated openapi output
nlydick May 5, 2026
ef45f09
feat(foundry): model training job recipes
nlydick May 5, 2026
471d10a
fix(foundry): model job lro status polling
nlydick May 5, 2026
205c1c1
feat(typespec): add training job services route
nlydick May 5, 2026
746ff22
feat(typespec): add training job artifact prefix info
nlydick May 5, 2026
b76ec8f
fix(typespec): correct training jobs create contract
nlydick May 5, 2026
dd744be
fix(typespec): unblock foundry jobs validation
nlydick May 5, 2026
a61e399
fix(typespec): mark foundry readmes autorest
nlydick May 5, 2026
4d8c5e2
feat(typespec): align project mlflow routes
nlydick May 5, 2026
f130d26
fix(foundry): use snake case training job routes
nlydick May 5, 2026
38f2580
feat(foundry): add managed training and ray contracts
nlydick May 6, 2026
35a0f18
feat(foundry): expose ray client remote access
nlydick May 6, 2026
61007a9
docs(foundry): clarify managed training columns
nlydick May 6, 2026
99b34e1
feat(foundry): add ray session cluster APIs
nlydick May 6, 2026
ca831aa
fix(foundry): rename training job runs to attempts
nlydick May 6, 2026
82fdbe5
fix(foundry): route latest training job attempts
nlydick May 6, 2026
b173460
fix(foundry): sync training job attempt routes
nlydick May 6, 2026
27596a3
fix(foundry): repair training job SDK projection
nlydick May 6, 2026
de15402
feat(foundry): use typed training job create
nlydick May 6, 2026
d8b223b
fix(foundry): align training jobs surface
nlydick May 7, 2026
17e9020
feat(foundry): add interactive Ray SDK job model
nlydick May 7, 2026
0491dda
fix(foundry): place training jobs under beta client
nlydick May 7, 2026
0643fa8
fix(foundry): relocate all training job SDK operations
nlydick May 7, 2026
dcc6b2a
fix(foundry): move mlflow SDK surface under beta
nlydick May 7, 2026
b46afc9
feat(foundry): flatten training jobs contract
nlydick May 8, 2026
1997193
fix(foundry): move model training operations to beta
nlydick May 8, 2026
b3349fd
fix(foundry): collapse training job sdk overloads
nlydick May 8, 2026
c45f9a7
feat(foundry): enrich training jobs response data
nlydick May 8, 2026
9857a44
feat(foundry): add latest attempt include
nlydick May 8, 2026
f0d74ea
fix(foundry): align training job path parameter name
nlydick May 8, 2026
d6c78f8
fix(ai-foundry): narrow training job paging parameters
nlydick May 8, 2026
d334d51
feat(ai-foundry): add training job include expansions
nlydick May 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions specification/ai-foundry/data-plane/Foundry/client.csharp.tsp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ import "./src/connections/routes.tsp";
import "./src/datasets/routes.tsp";
import "./src/indexes/routes.tsp";
import "./src/deployments/routes.tsp";
import "./src/training_jobs/routes.tsp";
import "./src/models/routes.tsp";
import "./src/red-teams/routes.tsp";
import "./src/evaluation-rules/routes.tsp";
import "./src/evaluation-taxonomies/routes.tsp";
Expand Down Expand Up @@ -80,9 +82,12 @@ namespace Azure.AI.Projects;

// Less generic names for C# subclients
@@clientName(Connections, "AIProjectConnectionsOperations", "csharp");
@@clientName(Clusters, "AIProjectClustersOperations", "csharp");
@@clientName(Datasets, "AIProjectDatasetsOperations", "csharp");
@@clientName(Deployments, "AIProjectDeploymentsOperations", "csharp");
@@clientName(Indexes, "AIProjectIndexesOperations", "csharp");
@@clientName(Models, "AIProjectModelsOperations", "csharp");
@@clientName(TrainingJobs, "AIProjectTrainingJobsOperations", "csharp");

// Need to explicitly make all get and list methods include the object name for C#
@@clientName(Connections.get, "getConnection", "csharp");
Expand All @@ -99,6 +104,11 @@ namespace Azure.AI.Projects;
@@clientName(Indexes.getVersion, "getIndex", "csharp");
@@clientName(Indexes.listLatest, "listIndexes", "csharp");
@@clientName(Indexes.listVersions, "listIndexVersions", "csharp");
@@clientName(TrainingJobs.create, "createTrainingJob", "csharp");
@@clientName(TrainingJobs.get, "getTrainingJob", "csharp");
@@clientName(TrainingJobs.list, "listTrainingJobs", "csharp");
@@clientName(TrainingJobs.beginDelete, "beginDeleteTrainingJob", "csharp");
@@clientName(TrainingJobs.beginCancel, "beginCancelTrainingJob", "csharp");

// Make these two internal, since all SDKs hand-write a single public method with boolean "includeCredentials"
// input parameter that calls either on these two.
Expand Down
63 changes: 53 additions & 10 deletions specification/ai-foundry/data-plane/Foundry/client.tsp
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,19 @@ import "./src/datasets/routes.tsp";
import "./src/deployments/routes.tsp";
import "./src/indexes/routes.tsp";

// Beta SDK operations without preview feature headers
import "./src/mlflow/routes.tsp";

// Beta operations (preview routes, requiring opt-in request header)
import "./src/evaluation-taxonomies/routes.tsp";
import "./src/evaluators/routes.tsp";
import "./src/insights/routes.tsp";
import "./src/memory-stores/routes.tsp";
import "./src/models/routes.tsp";
import "./src/red-teams/routes.tsp";
import "./src/schedules/routes.tsp";
import "./src/toolsets/routes.tsp";
import "./src/training_jobs/routes.tsp";

// Create a ".beta" subclient for the above beta operations
import "./relocate-beta-operations.tsp";
Expand All @@ -41,7 +46,7 @@ using Azure.AI.Projects;
@@clientNamespace(OpenAI, "azure.ai.projects", "python");

// --------------------------------------------------------------------------------
// Agents subclient
// Agents sub-client
// --------------------------------------------------------------------------------

// Use short form method names
Expand Down Expand Up @@ -139,7 +144,7 @@ using Azure.AI.Projects;
@@clientName(MemoryStoreObject, "MemoryStore", "javascript");

// --------------------------------------------------------------------------------
// Responses subclient
// Responses sub-client
// --------------------------------------------------------------------------------

// Allow assigning a `dict[str, Any]` object directly to the "schema" property of TextResponseFormatJsonSchema
Expand All @@ -155,7 +160,7 @@ using Azure.AI.Projects;
);

// --------------------------------------------------------------------------------
// Evaluators subclient
// Evaluators sub-client
// --------------------------------------------------------------------------------
@@clientName(Evaluators.listLatestVersions, "list");
@@clientName(Evaluators.startPendingUpload, "pendingUpload");
Expand All @@ -165,13 +170,27 @@ using Azure.AI.Projects;
@@alternateType(EvaluatorVersion.modified_at, utcDateTime, "python");

// --------------------------------------------------------------------------------
// Insights subclient
// Insights sub-client
// --------------------------------------------------------------------------------
@@clientName(Azure.AI.Projects.Insight.id, "insight_id");
@@clientName(InsightsGetParams.id, "insight_id");

// --------------------------------------------------------------------------------
// Datasets sub‐client
// Models sub-client
// --------------------------------------------------------------------------------

@@clientName(Models.createTrainingJob, "training_job", "python");
@@clientName(Models.createTrainingJob, "trainingJob", "javascript");
@@scope(Models.createSftTrainingJob, "!(python,javascript)");
@@scope(Models.createPreferenceTrainingJob, "!(python,javascript)");
@@scope(Models.createKtoTrainingJob, "!(python,javascript)");
@@scope(Models.createRewardModelTrainingJob, "!(python,javascript)");
@@scope(Models.createGrpoTrainingJob, "!(python,javascript)");
@@scope(Models.createPolicyGradientTrainingJob, "!(python,javascript)");
@@scope(Models.createPpoTrainingJob, "!(python,javascript)");

// --------------------------------------------------------------------------------
// Datasets sub-client
// --------------------------------------------------------------------------------

// Shorter method names for SDK datasets operations
Expand All @@ -186,7 +205,7 @@ using Azure.AI.Projects;
@@clientName(SasCredential, "BlobReferenceSasCredential"); // Not to be confused with class "SASCredentials"

// --------------------------------------------------------------------------------
// Indexes subclient
// Indexes sub-client
// --------------------------------------------------------------------------------

// Shorter method names for SDK Index operations
Expand All @@ -196,7 +215,7 @@ using Azure.AI.Projects;
@@clientName(Indexes.createOrUpdateVersion, "createOrUpdate");

// --------------------------------------------------------------------------------
// Schedules subclient
// Schedules sub-client
// --------------------------------------------------------------------------------

@@clientName(SchedulesCreateOrUpdateParams.resource, "schedule");
Expand All @@ -211,7 +230,7 @@ using Azure.AI.Projects;
@@alternateType(ScheduleRun.triggerTime, utcDateTime, "python");

// --------------------------------------------------------------------------------
// Toolsets subclient
// Toolsets sub-client
// --------------------------------------------------------------------------------

@@clientName(Toolsets.createToolset, "create");
Expand All @@ -221,7 +240,7 @@ using Azure.AI.Projects;
@@clientName(Toolsets.deleteToolset, "delete");

// --------------------------------------------------------------------------------
// Connections subclient
// Connections sub-client
// --------------------------------------------------------------------------------

// Make these two internal, since all SDKs hand-write a single public method with boolean "includeCredentials"
Expand All @@ -230,12 +249,36 @@ using Azure.AI.Projects;
@@access(Connections.getWithCredentials, Access.internal);

// --------------------------------------------------------------------------------
// Deployment subclient
// Deployment sub-client
// --------------------------------------------------------------------------------

// Less generic names
@@clientName(Sku, "ModelDeploymentSku");

@@clientName(Azure.AI.Projects.TrainingJobs.get, "get", "python");
@@clientName(Azure.AI.Projects.TrainingJobs.list, "list", "python");
@@clientName(Azure.AI.Projects.TrainingJobs.create, "create", "python");
@@clientName(Azure.AI.Projects.TrainingJobs.create, "create", "javascript");
@@alternateType(JobCreate, InteractiveRayJob);
@@usage(InteractiveRayJob, Usage.input);
@@access(InteractiveRayJob, Access.public);
@@usage(InteractiveRayJobProperties, Usage.input);
@@access(InteractiveRayJobProperties, Access.public);
@@usage(InteractiveRayJobTrainingConfiguration, Usage.input);
@@access(InteractiveRayJobTrainingConfiguration, Access.public);
@@usage(InteractiveRayJobFramework, Usage.input);
@@access(InteractiveRayJobFramework, Access.public);
@@clientName(Azure.AI.Projects.Beta.TrainingJobs, "training_jobs", "python");
@@clientName(Azure.AI.Projects.Beta.TrainingJobs, "trainingJobs", "javascript");
@@clientName(Azure.AI.Projects.TrainingJobs.beginDelete,
"begin_delete",
"python"
);
@@clientName(Azure.AI.Projects.TrainingJobs.beginCancel,
"begin_cancel",
"python"
);

// --------------------------------------------------------------------------------
// To support custom client-side handling of "opt-in" to preview features.
// --------------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
{
"title": "Models_CreateTrainingJobGrpo_MaximumSet",
"operationId": "Models_createTrainingJob",
"parameters": {
"api-version": "v1",
"Operation-Id": "create-my-grpo-training-job-01",
"Foundry-Features": "TrainingJobs=V1Preview",
"job": {
"type": "grpo",
"name": "qwen-reasoning-grpo",
"version": "1",
"description": "GRPO model trained for reasoning tasks",
"tags": {
"algorithm": "grpo"
},
"model": "azureai://registries/azureml/models/qwen-base/versions/1",
"dataset": {
"train": "azureai://accounts/my-account/projects/my-project/data/train-dataset/versions/1",
"eval": "azureai://accounts/my-account/projects/my-project/data/eval-dataset/versions/1",
"columns": {
"prompt": "prompt",
"reference": "reference"
},
"data_format": {
"chatTemplate": "chatml"
}
},
"runtime": {
"framework": "verl",
"hyperparameters": {
"learning_rate": 0.00001,
"num_generations": 8,
"max_completion_length": 512
},
"judge": {
"kind": "inline_function",
"inline_function": {
"def": "def score(samples, **kwargs):\n return [1.0 for _ in samples]",
"runtime": {
"pip": [
"numpy>=1.26"
]
}
}
},
"lora": {
"rank": 16,
"alpha": 32,
"dropout": 0.05,
"targetModules": "auto",
"quantize": true
},
"rollout": {
"engine": "vllm",
"tensorParallelSize": 2,
"gpuMemoryUtilization": 0.9
},
"eval": {
"benchmark": "gsm8k",
"everyNSteps": 100
}
},
"compute": {
"id": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/my-rg/providers/Microsoft.CognitiveServices/accounts/my-account/computes/my-cluster",
"resources": {
"instanceCount": 4,
"instanceType": "Standard_NC96ads_A100_v4"
},
"gpu_count": 32
}
}
},
"responses": {
"202": {
"headers": {
"Operation-Location": "https://my-account.services.ai.azure.com/api/projects/my-project/models/operations/create-my-grpo-training-job-01/result?api-version=v1"
},
"body": {
"operation_id": "create-my-grpo-training-job-01",
"status": "queued"
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{
"title": "Models_CreateTrainingJobKto_MaximumSet",
"operationId": "Models_createTrainingJob",
"parameters": {
"api-version": "v1",
"Operation-Id": "create-my-kto-training-job-01",
"Foundry-Features": "TrainingJobs=V1Preview",
"job": {
"type": "kto",
"name": "qwen-feedback-kto",
"version": "1",
"description": "KTO model trained from binary desirability feedback",
"tags": {
"algorithm": "kto"
},
"model": "azureai://registries/azureml/models/qwen-base/versions/1",
"dataset": {
"train": "azureai://accounts/my-account/projects/my-project/data/kto-train/versions/1",
"eval": "azureai://accounts/my-account/projects/my-project/data/kto-eval/versions/1",
"columns": {
"prompt": "prompt",
"response": "response",
"label": "is_desirable"
},
"data_format": {
"chatTemplate": "chatml"
}
},
"runtime": {
"framework": "trl",
"desirable_weight": 1,
"undesirable_weight": 1.33,
"hyperparameters": {
"learning_rate": 0.00005,
"beta": 0.1
},
"lora": {
"rank": 16,
"alpha": 32,
"targetModules": "auto"
},
"packaging": {
"mode": "merged_model"
}
},
"compute": {
"id": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/my-rg/providers/Microsoft.CognitiveServices/accounts/my-account/computes/my-cluster",
"resources": {
"instanceCount": 2,
"instanceType": "Standard_NC96ads_A100_v4"
},
"gpu_count": 16
}
}
},
"responses": {
"202": {
"headers": {
"Operation-Location": "https://my-account.services.ai.azure.com/api/projects/my-project/models/operations/create-my-kto-training-job-01/result?api-version=v1"
},
"body": {
"operation_id": "create-my-kto-training-job-01",
"status": "queued"
}
}
}
}
Loading
Loading