From b82a03d8097257cd87ed77a119d430d300eafb4d Mon Sep 17 00:00:00 2001 From: Lu Peng <118394507+lu-ohai@users.noreply.github.com> Date: Wed, 4 Mar 2026 13:40:29 -0500 Subject: [PATCH 1/3] Update model-deployment-tips.md --- ai-quick-actions/model-deployment-tips.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/ai-quick-actions/model-deployment-tips.md b/ai-quick-actions/model-deployment-tips.md index e9d5d7a6..4634cfc9 100644 --- a/ai-quick-actions/model-deployment-tips.md +++ b/ai-quick-actions/model-deployment-tips.md @@ -40,7 +40,11 @@ form to quickly deploy the model: ![Deploy Model](web_assets/deploy-model.png) -### Compute Shape +### Infrastructure + +AQUA supports two types of infrastructure resources for deploying a single model: Compute Shape and Compute Target (Managed Computer Cluster). When deploying a model, you can either specify a compute shape or choose a compute target. + +#### Compute Shape The compute shape selection is critical, the list available is selected to be suitable for the chosen model. @@ -55,6 +59,21 @@ For a full list of shapes and their definitions see the [compute shape docs](htt The relationship between model parameter size and GPU memory is roughly 2x parameter count in GB, so for example a model that has 7B parameters will need a minimum of 14 GB for inference. At runtime the memory is used for both holding the weights, along with the concurrent contexts for the user's requests. +#### Compute Target + +A Data Science Compute Target manages the underlying compute, networking, and Kubernetes infrastructure, ensuring security and compliance. Each Compute Target is supported by dedicated compute capacity within a multi-tenant Kubernetes environment. + +To deploy a model on a managed compute cluster, you must first create a Compute Target and reference it in AQUA during model deployment creation. When select Compute Target as infrastructure, it's also required to specify the resource configurations, including the number of GPUs, OCPUs, and memory (in GB). + +```bash +--compute_target_details '{"compute_target_id":"ocid1.datasciencecomputetargetint.oc1.iad.", "gpu_count":2, "ocpus": "15", "memory_in_gbs": 240}' +``` + +For more details regarding compute target creation and required policy, refer to [Data Science Compute Target](). + +**Note:** Currently AQUA only supports deploying service managed models on Compute Target. + + #### Quantization Support To deploy large language models efficiently on CPU-based compute shapes, AQUA provides quantization support. Quantization reduces the precision of model weights (e.g., from 16-bit to 4-bit), significantly lowering memory and compute requirements while maintaining good accuracy. This enables faster and more cost-effective model inference without requiring a GPU. Learn more about [how to configure and deploy models with quantization](quantization-tips.md). From 70556f6366d467f54d24a24008c3f0297a330741 Mon Sep 17 00:00:00 2001 From: Lu Peng <118394507+lu-ohai@users.noreply.github.com> Date: Wed, 4 Mar 2026 13:40:58 -0500 Subject: [PATCH 2/3] Update README.md --- ai-quick-actions/policies/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ai-quick-actions/policies/README.md b/ai-quick-actions/policies/README.md index 20e742f6..b76afc5d 100644 --- a/ai-quick-actions/policies/README.md +++ b/ai-quick-actions/policies/README.md @@ -98,6 +98,8 @@ After the stack is created and its Stack details page opens, click Plan from the Allow dynamic-group aqua-dynamic-group to inspect compartments in tenancy + Allow dynamic-group aqua-dynamic-group to manage data-science-compute-targets in compartment + Allow dynamic-group aqua-dynamic-group to manage object-family in compartment where any {target.bucket.name=''} Allow dynamic-group to read repos in compartment where any {request.operation='ReadDockerRepositoryMetadata',request.operation='ReadDockerRepositoryManifest',request.operation='PullDockerLayer'} From fddf64b8f314a0d9ff1e6384fa027096bf2f2a78 Mon Sep 17 00:00:00 2001 From: Lu Peng <118394507+lu-ohai@users.noreply.github.com> Date: Wed, 4 Mar 2026 13:41:33 -0500 Subject: [PATCH 3/3] Update iam.tf --- ai-quick-actions/policies/terraform/iam.tf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ai-quick-actions/policies/terraform/iam.tf b/ai-quick-actions/policies/terraform/iam.tf index 41ea98f2..a3984881 100644 --- a/ai-quick-actions/policies/terraform/iam.tf +++ b/ai-quick-actions/policies/terraform/iam.tf @@ -48,6 +48,7 @@ locals { "Allow dynamic-group id ${oci_identity_dynamic_group.aqua-dynamic-group[0].id} to manage data-science-modelversionsets in ${local.compartment_policy_string}", "Allow dynamic-group id ${oci_identity_dynamic_group.aqua-dynamic-group[0].id} to read buckets in ${local.compartment_policy_string}", "Allow dynamic-group id ${oci_identity_dynamic_group.aqua-dynamic-group[0].id} to read objectstorage-namespaces in ${local.compartment_policy_string}", + "Allow dynamic-group id ${oci_identity_dynamic_group.aqua-dynamic-group[0].id} to manage data-science-compute-targets in ${local.compartment_policy_string}", "Allow dynamic-group id ${oci_identity_dynamic_group.aqua-dynamic-group[0].id} to inspect compartments in tenancy", "Allow dynamic-group id ${oci_identity_dynamic_group.aqua-dynamic-group[0].id} to read repos in ${local.compartment_policy_string} where any {request.operation='ReadDockerRepositoryMetadata',request.operation='ReadDockerRepositoryManifest',request.operation='PullDockerLayer'}" ]:[] @@ -66,6 +67,7 @@ locals { "Allow dynamic-group id ${oci_identity_dynamic_group.aqua-dynamic-group[0].id} to manage data-science-modelversionsets in ${local.compartment_policy_string}", "Allow dynamic-group id ${oci_identity_dynamic_group.aqua-dynamic-group[0].id} to read buckets in ${local.compartment_policy_string}", "Allow dynamic-group id ${oci_identity_dynamic_group.aqua-dynamic-group[0].id} to read objectstorage-namespaces in ${local.compartment_policy_string}", + "Allow dynamic-group id ${oci_identity_dynamic_group.aqua-dynamic-group[0].id} to manage data-science-compute-targets in ${local.compartment_policy_string}", "Allow dynamic-group id ${oci_identity_dynamic_group.aqua-dynamic-group[0].id} to inspect compartments in ${local.compartment_policy_string}", "Allow dynamic-group id ${oci_identity_dynamic_group.aqua-dynamic-group[0].id} to read repos in ${local.compartment_policy_string} where any {request.operation='ReadDockerRepositoryMetadata',request.operation='ReadDockerRepositoryManifest',request.operation='PullDockerLayer'}" ]:[]