@@ -421,6 +421,64 @@ def as_hive_ddl(
421421 return ddl
422422
423423
424+ @_telemetry_emitter (Feature .FEATURE_STORE , "create_dataset" )
425+ def create_dataset (
426+ base ,
427+ output_path : str ,
428+ session : Session ,
429+ record_identifier_feature_name : str = None ,
430+ event_time_identifier_feature_name : str = None ,
431+ included_feature_names = None ,
432+ kms_key_id : str = None ,
433+ ):
434+ """Create a DatasetBuilder for generating a Dataset from FeatureGroups.
435+
436+ This is a convenience function that constructs a DatasetBuilder instance.
437+ The base can be either a FeatureGroup or a pandas DataFrame.
438+
439+ Args:
440+ base (Union[FeatureGroup, DataFrame]): A FeatureGroup or DataFrame to use as the base.
441+ output_path (str): S3 URI for storing query results.
442+ session (Session): SageMaker session for boto calls.
443+ record_identifier_feature_name (str): Required if base is a DataFrame.
444+ The feature name used as the record identifier (default: None).
445+ event_time_identifier_feature_name (str): Required if base is a DataFrame.
446+ The feature name used as the event time identifier (default: None).
447+ included_feature_names (List[str]): Features to include in the output.
448+ If not set, all features will be included (default: None).
449+ kms_key_id (str): KMS key for encryption (default: None).
450+
451+ Returns:
452+ DatasetBuilder: A DatasetBuilder instance configured with the provided parameters.
453+
454+ Raises:
455+ ValueError: If base is a DataFrame and record_identifier_feature_name or
456+ event_time_identifier_feature_name is not provided.
457+
458+ Example:
459+ >>> from sagemaker.mlops.feature_store import create_dataset, FeatureGroup
460+ >>> fg = FeatureGroup.get(feature_group_name="my-fg")
461+ >>> builder = create_dataset(
462+ ... base=fg,
463+ ... output_path="s3://bucket/output",
464+ ... session=session,
465+ ... )
466+ >>> builder.with_feature_group(other_fg, target_feature_name_in_base="id")
467+ >>> df, query = builder.to_dataframe()
468+ """
469+ from sagemaker .mlops .feature_store .dataset_builder import DatasetBuilder
470+
471+ return DatasetBuilder .create (
472+ base = base ,
473+ output_path = output_path ,
474+ session = session ,
475+ record_identifier_feature_name = record_identifier_feature_name ,
476+ event_time_identifier_feature_name = event_time_identifier_feature_name ,
477+ included_feature_names = included_feature_names ,
478+ kms_key_id = kms_key_id ,
479+ )
480+
481+
424482@_telemetry_emitter (Feature .FEATURE_STORE , "ingest_dataframe" )
425483def ingest_dataframe (
426484 feature_group_name : str ,
0 commit comments