Skip to content

Commit dd58598

Browse files
committed
feature: Feat: Feature Store in Sagemaker SDK v3 (5496)
1 parent 6497a94 commit dd58598

5 files changed

Lines changed: 355 additions & 2 deletions

File tree

sagemaker-mlops/src/sagemaker/mlops/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,21 @@
1111
Key components:
1212
- workflow: Pipeline and step orchestration
1313
- model_builder: Model building and orchestration
14+
- feature_store: Feature Store management (FeatureGroup, FeatureStore, ingestion, etc.)
1415
1516
Example usage:
1617
from sagemaker.mlops import ModelBuilder
1718
from sagemaker.mlops.workflow import Pipeline, TrainingStep
19+
20+
# Feature Store
21+
from sagemaker.mlops.feature_store import (
22+
FeatureGroup,
23+
FeatureStore,
24+
FeatureMetadata,
25+
create_dataset,
26+
ingest_dataframe,
27+
create_athena_query,
28+
)
1829
"""
1930
from __future__ import absolute_import
2031

@@ -27,7 +38,12 @@
2738
# from sagemaker.mlops import workflow
2839
# from sagemaker.mlops.workflow import Pipeline, TrainingStep, etc.
2940

41+
# Feature Store submodule is available via:
42+
# from sagemaker.mlops import feature_store
43+
# from sagemaker.mlops.feature_store import FeatureGroup, FeatureStore, create_dataset, etc.
44+
3045
__all__ = [
3146
"ModelBuilder",
3247
"workflow", # Submodule
48+
"feature_store", # Submodule
3349
]

sagemaker-mlops/src/sagemaker/mlops/feature_store/MIGRATION_GUIDE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ These V2 features are **not wrapped** because core provides them directly:
471471
from sagemaker.mlops.feature_store import (
472472
# Resources (from core)
473473
FeatureGroup,
474-
FeatureStore,
474+
FeatureStore, # For search() and batch operations
475475
FeatureMetadata,
476476

477477
# Shapes (from core)

sagemaker-mlops/src/sagemaker/mlops/feature_store/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""SageMaker FeatureStore V3 - powered by sagemaker-core."""
44

55
# Resources from core
6-
from sagemaker.core.resources import FeatureGroup, FeatureMetadata
6+
from sagemaker.core.resources import FeatureGroup, FeatureMetadata, FeatureStore
77

88
# Shapes from core (Pydantic - no to_dict() needed)
99
from sagemaker.core.shapes import (
@@ -51,6 +51,7 @@
5151
from sagemaker.mlops.feature_store.feature_utils import (
5252
as_hive_ddl,
5353
create_athena_query,
54+
create_dataset,
5455
get_session_from_role,
5556
ingest_dataframe,
5657
load_feature_definitions_from_dataframe,
@@ -74,6 +75,7 @@
7475
# Resources
7576
"FeatureGroup",
7677
"FeatureMetadata",
78+
"FeatureStore",
7779
# Shapes
7880
"DataCatalogConfig",
7981
"FeatureParameter",
@@ -110,6 +112,7 @@
110112
# Utility functions
111113
"as_hive_ddl",
112114
"create_athena_query",
115+
"create_dataset",
113116
"get_session_from_role",
114117
"ingest_dataframe",
115118
"load_feature_definitions_from_dataframe",

sagemaker-mlops/src/sagemaker/mlops/feature_store/feature_utils.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,64 @@ def as_hive_ddl(
421421
return ddl
422422

423423

424+
@_telemetry_emitter(Feature.FEATURE_STORE, "create_dataset")
425+
def create_dataset(
426+
base,
427+
output_path: str,
428+
session: Session,
429+
record_identifier_feature_name: str = None,
430+
event_time_identifier_feature_name: str = None,
431+
included_feature_names=None,
432+
kms_key_id: str = None,
433+
):
434+
"""Create a DatasetBuilder for generating a Dataset from FeatureGroups.
435+
436+
This is a convenience function that constructs a DatasetBuilder instance.
437+
The base can be either a FeatureGroup or a pandas DataFrame.
438+
439+
Args:
440+
base (Union[FeatureGroup, DataFrame]): A FeatureGroup or DataFrame to use as the base.
441+
output_path (str): S3 URI for storing query results.
442+
session (Session): SageMaker session for boto calls.
443+
record_identifier_feature_name (str): Required if base is a DataFrame.
444+
The feature name used as the record identifier (default: None).
445+
event_time_identifier_feature_name (str): Required if base is a DataFrame.
446+
The feature name used as the event time identifier (default: None).
447+
included_feature_names (List[str]): Features to include in the output.
448+
If not set, all features will be included (default: None).
449+
kms_key_id (str): KMS key for encryption (default: None).
450+
451+
Returns:
452+
DatasetBuilder: A DatasetBuilder instance configured with the provided parameters.
453+
454+
Raises:
455+
ValueError: If base is a DataFrame and record_identifier_feature_name or
456+
event_time_identifier_feature_name is not provided.
457+
458+
Example:
459+
>>> from sagemaker.mlops.feature_store import create_dataset, FeatureGroup
460+
>>> fg = FeatureGroup.get(feature_group_name="my-fg")
461+
>>> builder = create_dataset(
462+
... base=fg,
463+
... output_path="s3://bucket/output",
464+
... session=session,
465+
... )
466+
>>> builder.with_feature_group(other_fg, target_feature_name_in_base="id")
467+
>>> df, query = builder.to_dataframe()
468+
"""
469+
from sagemaker.mlops.feature_store.dataset_builder import DatasetBuilder
470+
471+
return DatasetBuilder.create(
472+
base=base,
473+
output_path=output_path,
474+
session=session,
475+
record_identifier_feature_name=record_identifier_feature_name,
476+
event_time_identifier_feature_name=event_time_identifier_feature_name,
477+
included_feature_names=included_feature_names,
478+
kms_key_id=kms_key_id,
479+
)
480+
481+
424482
@_telemetry_emitter(Feature.FEATURE_STORE, "ingest_dataframe")
425483
def ingest_dataframe(
426484
feature_group_name: str,

0 commit comments

Comments
 (0)