aws
diff --git a/‎sagemaker-serve/src/sagemaker/serve/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎sagemaker-serve/src/sagemaker/serve/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎sagemaker-serve/src/sagemaker/serve/ai_inference_recommender/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎sagemaker-serve/src/sagemaker/serve/ai_inference_recommender/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎sagemaker-serve/src/sagemaker/serve/ai_inference_recommender/_model_builder_methods.py‎
Lines changed: 50 additions & 20 deletions b/‎sagemaker-serve/src/sagemaker/serve/ai_inference_recommender/_model_builder_methods.py‎
Lines changed: 50 additions & 20 deletions
diff --git a/‎sagemaker-serve/src/sagemaker/serve/ai_inference_recommender/_recommendation_view.py‎
Lines changed: 148 additions & 9 deletions b/‎sagemaker-serve/src/sagemaker/serve/ai_inference_recommender/_recommendation_view.py‎
Lines changed: 148 additions & 9 deletions
@@ -37,6 +37,7 @@
     Workload,
     FeatureGatedError,
     WorkloadValidationError,
+    start_benchmark,
 )
 
 __all__ = [
@@ -50,4 +51,5 @@
     "Workload",
     "FeatureGatedError",
     "WorkloadValidationError",
+    "start_benchmark",
 ]
@@ -28,6 +28,9 @@
 )
 from sagemaker.serve.ai_inference_recommender.secrets import Secret
 from sagemaker.serve.ai_inference_recommender.workload import Workload
+from sagemaker.serve.ai_inference_recommender._model_builder_methods import (
+    start_benchmark,
+)
 
 
 __all__ = [
@@ -40,4 +43,5 @@
     "Secret",
     "Workload",
     "WorkloadValidationError",
+    "start_benchmark",
 ]
@@ -10,12 +10,12 @@
 # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
-"""Internal helpers backing ModelBuilder.start_benchmark and the recommendation branch of ModelBuilder.optimize."""
+"""Internal helpers backing the public start_benchmark function and ModelBuilder.generate_deployment_recommendations."""
 from __future__ import absolute_import
 
 import time
 import uuid
-from typing import List, Optional, Union
+from typing import Any, List, Optional, Union
 
 from sagemaker.core.helper.session_helper import Session, get_execution_role
 from sagemaker.core.resources import (
@@ -24,6 +24,8 @@
     AIWorkloadConfig,
     Endpoint,
 )
+from sagemaker.core.telemetry.constants import Feature
+from sagemaker.core.telemetry.telemetry_logging import _telemetry_emitter
 from sagemaker.core.shapes.shapes import (
     AIBenchmarkEndpoint,
     AIBenchmarkInferenceComponent,
@@ -51,10 +53,12 @@
 from sagemaker.serve.ai_inference_recommender.workload import Workload
 
 
+@_telemetry_emitter(
+    feature=Feature.MODEL_CUSTOMIZATION, func_name="ai_inference_recommender.start_benchmark"
+)
 def start_benchmark(
-    builder,  # ModelBuilder; not annotated to avoid a circular import.
     endpoint: Union[Endpoint, str],
-    workload: Union[Workload, str],
+    workload: Optional[Union[Workload, str]] = None,
     *,
     output_path: Optional[str] = None,
     role: Optional[str] = None,
@@ -64,14 +68,17 @@ def start_benchmark(
     name: Optional[str] = None,
     workload_config_name: Optional[str] = None,
     wait: bool = True,
+    **workload_kwargs: Any,
 ) -> AIBenchmarkJob:
     """Start an AI benchmark job against a SageMaker endpoint.
 
     Args:
         endpoint: An ``Endpoint`` resource, or the name/ARN of an existing
             endpoint to benchmark.
-        workload: Either a ``Workload`` (auto-creates a workload config) or
-            the name/ARN of an existing ``AIWorkloadConfig``.
+        workload: Optional. A ``Workload`` instance, or the name/ARN of an
+            existing ``AIWorkloadConfig``. Omit this and pass workload
+            keyword arguments inline (``tokenizer=``, ``concurrency=``,
+            etc.) to construct a synthetic workload on the fly.
         output_path: ``s3://`` URI for benchmark output. Defaults to the
             session's default bucket.
         role: IAM execution role ARN. Defaults to the SageMaker execution
@@ -83,13 +90,28 @@ def start_benchmark(
         name: Optional benchmark job name. Auto-generated if omitted.
         workload_config_name: Optional name for the auto-created workload
             config. Auto-generated if omitted.
-        wait: If True, block until the job reaches a terminal state.
+        wait: If True (default), block until the job reaches a terminal
+            state.
+        **workload_kwargs: Inline workload parameters. Only used when
+            ``workload`` is omitted; forwarded to ``Workload.synthetic``.
 
     Returns:
-        The created ``AIBenchmarkJob`` resource. After it reaches a terminal
-        state, pass it to ``BenchmarkResult.from_job(job)`` to retrieve the
-        parsed metrics.
+        The created :class:`BenchmarkJob`. Once terminal, call
+        ``job.show_result()`` to download and parse the metrics.
     """
+    if workload is None:
+        if not workload_kwargs:
+            raise ValueError(
+                "start_benchmark requires either a workload= argument or "
+                "inline workload keyword arguments (e.g. tokenizer=...)."
+            )
+        workload = Workload.synthetic(**workload_kwargs)
+    elif workload_kwargs:
+        raise ValueError(
+            "start_benchmark accepts either workload= or inline workload "
+            "keyword arguments, not both."
+        )
+
     sagemaker_session = Session()
     role_arn = role or get_execution_role(sagemaker_session=sagemaker_session)
     output_location = output_path or _default_output_path(sagemaker_session, "benchmarks")
@@ -124,8 +146,11 @@ def start_benchmark(
         network_config=network_config,
         tags=tags,
     )
-    if builder is not None:
-        builder._benchmark_job = job
+    # Surface the BenchmarkJob subclass (which adds show_result) on the
+    # returned instance.
+    from sagemaker.serve.ai_inference_recommender.jobs import BenchmarkJob
+
+    job.__class__ = BenchmarkJob
     if wait:
         job.wait()
     return job
@@ -140,7 +165,7 @@ def run_recommendation_job(
     role_arn: Optional[str] = None,
     instance_types: Optional[List[str]] = None,
     capacity_reservation_arns: Optional[List[str]] = None,
-    optimize_model: bool = True,
+    advanced_optimization: bool = True,
     framework: Optional[str] = None,
     model_package_group: Optional[str] = None,
     tags: Optional[List[Tag]] = None,
@@ -150,9 +175,8 @@ def run_recommendation_job(
 ) -> AIRecommendationJob:
     """Submit an ``AIRecommendationJob`` for the model configured on this builder.
 
-    Backs the recommendation branch of :meth:`ModelBuilder.optimize`. Not
-    intended to be called directly; pass ``workload`` and ``performance_target``
-    to :meth:`ModelBuilder.optimize` instead.
+    Backs :meth:`ModelBuilder.generate_deployment_recommendations`. Not intended
+    to be called directly.
 
     Args:
         workload: Either a ``Workload`` (auto-creates a workload config) or
@@ -165,8 +189,9 @@ def run_recommendation_job(
             role from the ambient session.
         instance_types: Up to 3 instance types to evaluate.
         capacity_reservation_arns: Optional list of ML reservation ARNs.
-        optimize_model: If True (default), allow the service to apply model
-            optimizations such as speculative decoding and kernel tuning.
+        advanced_optimization: If True (default), allow the service to apply
+            model optimizations such as speculative decoding and kernel
+            tuning.
         framework: Inference framework. ``"LMI"`` or ``"VLLM"``.
         model_package_group: Optional model package group identifier in
             which to register the optimized model.
@@ -189,7 +214,7 @@ def run_recommendation_job(
     if not s3_uri:
         raise ValueError(
             "ModelBuilder must be configured with an S3 model_path before "
-            "calling optimize() with a workload. Call build() first."
+            "calling generate_deployment_recommendations. Call build() first."
         )
 
     if instance_types and len(instance_types) > MAX_INSTANCE_TYPES:
@@ -235,10 +260,15 @@ def run_recommendation_job(
         ),
         role_arn=resolved_role_arn,
         inference_specification=inference_spec,
-        optimize_model=optimize_model,
+        optimize_model=advanced_optimization,
         compute_spec=compute_spec,
         tags=tags,
     )
+    # Surface the RecommendationJob subclass (which adds show_result) on the
+    # returned instance.
+    from sagemaker.serve.ai_inference_recommender.jobs import RecommendationJob
+
+    job.__class__ = RecommendationJob
     if wait:
         job.wait()
     return job
 
@@ -12,39 +12,178 @@
 # language governing permissions and limitations under the License.
 """Pretty-printing wrapper over an AIRecommendation row.
 
-Exists only because the auto-generated ``AIRecommendation`` shape has
-Pydantic's default repr (dumps every field). Wrapping each row in this
-class swaps the repr without owning the data — attribute access forwards
-to the raw shape transparently.
+Wraps each row to replace the default repr without owning the data;
+attribute access forwards to the underlying shape transparently.
 """
 from __future__ import absolute_import
 
+from collections import defaultdict
+from typing import Any, Dict, List, Optional
+
 from sagemaker.serve.ai_inference_recommender.result import (
     _fmt_number,
     _format_table,
     _indent,
 )
 
 
+class _ExpectedPerformanceMetric:
+    """Aggregated stats for a single metric in ``expected_performance``.
+
+    Each metric on the recommendation row is reported as one or more rows
+    keyed by ``stat`` (avg, p50, p90, p99, ...). This view groups the rows
+    so customers can do ``rec.expected_performance.request_throughput.avg``
+    or ``.p99`` directly.
+    """
+
+    __slots__ = ("_stats", "unit")
+
+    def __init__(self, stats: Dict[str, float], unit: Optional[str]):
+        object.__setattr__(self, "_stats", stats)
+        object.__setattr__(self, "unit", unit)
+
+    @property
+    def avg(self) -> Optional[float]:
+        return self._stats.get("avg")
+
+    @property
+    def p50(self) -> Optional[float]:
+        return self._stats.get("p50")
+
+    @property
+    def p90(self) -> Optional[float]:
+        return self._stats.get("p90")
+
+    @property
+    def p99(self) -> Optional[float]:
+        return self._stats.get("p99")
+
+    @property
+    def stats(self) -> Dict[str, float]:
+        return dict(self._stats)
+
+    def __repr__(self) -> str:
+        parts = ", ".join(
+            f"{stat}={_fmt_number(v)}" for stat, v in self._stats.items()
+        )
+        unit = f" {self.unit}" if self.unit else ""
+        return f"<{parts}{unit}>"
+
+
+class _ExpectedPerformanceView:
+    """Typed + dict-style accessor over a recommendation's expected_performance.
+
+    Service shape is ``List[AIRecommendationPerformanceMetric]`` with one row
+    per (metric, stat). This view groups rows by metric name so customers
+    can do ``view.request_throughput.avg`` (snake_case attribute), or
+    ``view.get("RequestThroughput").p99`` (raw service name).
+    """
+
+    __slots__ = ("_by_metric",)
+
+    def __init__(self, raw_rows: Optional[List[Any]]):
+        by_metric: Dict[str, Dict[str, Any]] = defaultdict(
+            lambda: {"unit": None, "stats": {}}
+        )
+        for row in raw_rows or []:
+            metric = getattr(row, "metric", None)
+            if not metric:
+                continue
+            stat = getattr(row, "stat", None) or "value"
+            value = _to_float(getattr(row, "value", None))
+            if value is None:
+                continue
+            entry = by_metric[metric]
+            entry["stats"][stat] = value
+            unit = getattr(row, "unit", None)
+            if unit and not entry["unit"]:
+                entry["unit"] = unit
+
+        compiled: Dict[str, _ExpectedPerformanceMetric] = {
+            name: _ExpectedPerformanceMetric(entry["stats"], entry["unit"])
+            for name, entry in by_metric.items()
+        }
+        object.__setattr__(self, "_by_metric", compiled)
+
+    def get(self, name: str) -> Optional[_ExpectedPerformanceMetric]:
+        """Look up a metric by raw service name (e.g. ``"RequestThroughput"``)."""
+        return self._by_metric.get(name)
+
+    def __getattr__(self, name: str) -> _ExpectedPerformanceMetric:
+        # snake_case attribute access. Translate to CamelCase service name.
+        service_name = _snake_to_camel(name)
+        metric = self._by_metric.get(service_name) or self._by_metric.get(name)
+        if metric is None:
+            raise AttributeError(
+                f"No expected_performance metric named {name!r}. "
+                f"Available: {sorted(self._by_metric)}"
+            )
+        return metric
+
+    def __contains__(self, name: str) -> bool:
+        return name in self._by_metric or _snake_to_camel(name) in self._by_metric
+
+    def __iter__(self):
+        return iter(self._by_metric)
+
+    def keys(self):
+        return self._by_metric.keys()
+
+    def items(self):
+        return self._by_metric.items()
+
+    def values(self):
+        return self._by_metric.values()
+
+    def __len__(self) -> int:
+        return len(self._by_metric)
+
+    def __repr__(self) -> str:
+        return "{" + ", ".join(
+            f"{name}: {metric!r}" for name, metric in self._by_metric.items()
+        ) + "}"
+
+
+def _to_float(value):
+    try:
+        return float(value) if value is not None else None
+    except (TypeError, ValueError):
+        return None
+
+
+def _snake_to_camel(name: str) -> str:
+    return "".join(word.capitalize() for word in name.split("_"))
+
+
 class _RecommendationView:
     """Read-only view over a single recommendation row."""
 
-    __slots__ = ("_raw", "_index")
+    __slots__ = ("_raw", "_index", "_expected_performance")
 
     def __init__(self, raw, index: int = 0):
         # Use object.__setattr__ to avoid triggering __getattr__ during init.
         object.__setattr__(self, "_raw", raw)
         object.__setattr__(self, "_index", index)
+        object.__setattr__(
+            self,
+            "_expected_performance",
+            _ExpectedPerformanceView(getattr(raw, "expected_performance", None)),
+        )
 
     @property
     def raw(self):
-        """The underlying auto-generated ``AIRecommendation`` shape."""
+        """The underlying ``AIRecommendation`` shape."""
         return self._raw
 
+    @property
+    def expected_performance(self) -> _ExpectedPerformanceView:
+        """Typed + dict-style accessor for the recommendation's expected metrics."""
+        return self._expected_performance
+
     def __getattr__(self, name):
-        # Fall through to the underlying shape so ``view.model_details``,
-        # ``view.deployment_configuration``, and ``view.expected_performance``
-        # work as if the customer held the raw row.
+        # Fall through to the underlying shape so ``view.model_details`` and
+        # ``view.deployment_configuration`` work as if the customer held the
+        # raw row. ``expected_performance`` is intercepted by the property above.
         return getattr(self._raw, name)
 
     def __repr__(self) -> str:
Original file line number	Diff line number	Diff line change
`@@ -37,6 +37,7 @@`
`37`	`37`	`Workload,`
`38`	`38`	`FeatureGatedError,`
`39`	`39`	`WorkloadValidationError,`
	`40`	`+ start_benchmark,`
`40`	`41`	`)`
`41`	`42`
`42`	`43`	`__all__ = [`
`@@ -50,4 +51,5 @@`
`50`	`51`	`"Workload",`
`51`	`52`	`"FeatureGatedError",`
`52`	`53`	`"WorkloadValidationError",`
	`54`	`+ "start_benchmark",`
`53`	`55`	`]`
Original file line number	Diff line number	Diff line change
`@@ -28,6 +28,9 @@`
`28`	`28`	`)`
`29`	`29`	`from sagemaker.serve.ai_inference_recommender.secrets import Secret`
`30`	`30`	`from sagemaker.serve.ai_inference_recommender.workload import Workload`
	`31`	`+from sagemaker.serve.ai_inference_recommender._model_builder_methods import (`
	`32`	`+ start_benchmark,`
	`33`	`+)`
`31`	`34`
`32`	`35`
`33`	`36`	`__all__ = [`
`@@ -40,4 +43,5 @@`
`40`	`43`	`"Secret",`
`41`	`44`	`"Workload",`
`42`	`45`	`"WorkloadValidationError",`
	`46`	`+ "start_benchmark",`
`43`	`47`	`]`