Skip to content

Commit 7f1a30b

Browse files
Merge branch 'aws:master' into master
2 parents 13cb251 + 45f56f0 commit 7f1a30b

74 files changed

Lines changed: 15540 additions & 5 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

docs/api/sagemaker_mlops.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,12 @@ Local Development
2828
:members:
2929
:undoc-members:
3030
:show-inheritance:
31+
32+
33+
Feature Store
34+
-------------
35+
36+
.. automodule:: sagemaker.mlops.feature_store
37+
:members:
38+
:undoc-members:
39+
:show-inheritance:

requirements/extras/test_requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ scipy
1111
omegaconf
1212
graphene
1313
typing_extensions>=4.9.0
14-
tensorflow>=2.16.2,<=2.19.0
14+
tensorflow>=2.16.2,<=2.19.0
15+
build

sagemaker-core/src/sagemaker/core/common_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@
5656
"cn-north-1": "amazonaws.com.cn",
5757
"cn-northwest-1": "amazonaws.com.cn",
5858
"us-iso-east-1": "c2s.ic.gov",
59+
"us-iso-west-1": "c2s.ic.gov",
5960
"us-isob-east-1": "sc2s.sgov.gov",
61+
"us-isob-west-1": "sc2s.sgov.gov",
6062
"us-isof-south-1": "csp.hci.ic.gov",
6163
"us-isof-east-1": "csp.hci.ic.gov",
6264
"eu-isoe-west-1": "cloud.adc-e.uk",

sagemaker-core/src/sagemaker/core/image_uri_config/djl-lmi.json

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,13 @@
8585
"ap-southeast-7": "590183813437",
8686
"ap-southeast-4": "457447274322",
8787
"ap-southeast-5": "550225433462",
88-
"mx-central-1": "637423239942"
88+
"mx-central-1": "637423239942",
89+
"us-iso-east-1": "763104351884",
90+
"us-iso-west-1": "763104351884",
91+
"us-isob-east-1": "763104351884",
92+
"us-isob-west-1": "763104351884",
93+
"us-isof-east-1": "763104351884",
94+
"us-isof-south-1": "763104351884"
8995
},
9096
"repository": "djl-inference",
9197
"tag_prefix": "0.35.0-lmi17.0.0-cu128"
@@ -127,7 +133,13 @@
127133
"ap-southeast-7": "590183813437",
128134
"ap-southeast-4": "457447274322",
129135
"ap-southeast-5": "550225433462",
130-
"mx-central-1": "637423239942"
136+
"mx-central-1": "637423239942",
137+
"us-iso-east-1": "763104351884",
138+
"us-iso-west-1": "763104351884",
139+
"us-isob-east-1": "763104351884",
140+
"us-isob-west-1": "763104351884",
141+
"us-isof-east-1": "763104351884",
142+
"us-isof-south-1": "763104351884"
131143
},
132144
"repository": "djl-inference",
133145
"tag_prefix": "0.34.0-lmi16.0.0-cu128"
@@ -169,7 +181,13 @@
169181
"ap-southeast-7": "590183813437",
170182
"ap-southeast-4": "457447274322",
171183
"ap-southeast-5": "550225433462",
172-
"mx-central-1": "637423239942"
184+
"mx-central-1": "637423239942",
185+
"us-iso-east-1": "763104351884",
186+
"us-iso-west-1": "763104351884",
187+
"us-isob-east-1": "763104351884",
188+
"us-isob-west-1": "763104351884",
189+
"us-isof-east-1": "763104351884",
190+
"us-isof-south-1": "763104351884"
173191
},
174192
"repository": "djl-inference",
175193
"tag_prefix": "0.33.0-lmi15.0.0-cu128"

sagemaker-mlops/pyproject.toml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,21 @@ dependencies = [
3131
]
3232

3333
[project.optional-dependencies]
34+
feature-processor = [
35+
"pyspark==3.3.2",
36+
"sagemaker-feature-store-pyspark-3.3",
37+
"setuptools<82",
38+
]
39+
3440
test = [
3541
"pytest",
3642
"pytest-cov",
3743
"mock",
44+
"setuptools<82",
45+
"pyspark==3.3.2",
46+
"sagemaker-feature-store-pyspark-3.3",
47+
"pandas<3.0",
48+
"numpy<3.0",
3849
]
3950
dev = [
4051
"pytest",
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
"""Exported classes for the sagemaker.mlops.feature_store.feature_processor module."""
14+
from __future__ import absolute_import
15+
16+
from sagemaker.mlops.feature_store.feature_processor._data_source import ( # noqa: F401
17+
CSVDataSource,
18+
FeatureGroupDataSource,
19+
ParquetDataSource,
20+
BaseDataSource,
21+
PySparkDataSource,
22+
)
23+
from sagemaker.mlops.feature_store.feature_processor._exceptions import ( # noqa: F401
24+
IngestionError,
25+
)
26+
from sagemaker.mlops.feature_store.feature_processor.feature_processor import ( # noqa: F401
27+
feature_processor,
28+
)
29+
from sagemaker.mlops.feature_store.feature_processor.feature_scheduler import ( # noqa: F401
30+
to_pipeline,
31+
schedule,
32+
describe,
33+
put_trigger,
34+
delete_trigger,
35+
enable_trigger,
36+
disable_trigger,
37+
delete_schedule,
38+
list_pipelines,
39+
execute,
40+
TransformationCode,
41+
FeatureProcessorPipelineEvents,
42+
)
43+
from sagemaker.mlops.feature_store.feature_processor._enums import ( # noqa: F401
44+
FeatureProcessorPipelineExecutionStatus,
45+
)
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
"""Contains classes for preparing and uploading configs for a scheduled feature processor."""
14+
from __future__ import absolute_import
15+
from typing import Callable, Dict, Optional, Tuple, List, Union
16+
17+
import attr
18+
19+
from sagemaker.core.helper.session_helper import Session
20+
from sagemaker.mlops.feature_store.feature_processor._constants import (
21+
SPARK_JAR_FILES_PATH,
22+
SPARK_PY_FILES_PATH,
23+
SPARK_FILES_PATH,
24+
S3_DATA_DISTRIBUTION_TYPE,
25+
)
26+
from sagemaker.core.inputs import TrainingInput
27+
from sagemaker.core.shapes import Channel, DataSource, S3DataSource
28+
from sagemaker.core.remote_function.core.stored_function import StoredFunction
29+
from sagemaker.core.remote_function.job import (
30+
_prepare_and_upload_workspace,
31+
_prepare_and_upload_runtime_scripts,
32+
_JobSettings,
33+
RUNTIME_SCRIPTS_CHANNEL_NAME,
34+
REMOTE_FUNCTION_WORKSPACE,
35+
SPARK_CONF_CHANNEL_NAME,
36+
_prepare_and_upload_spark_dependent_files,
37+
)
38+
from sagemaker.core.remote_function.runtime_environment.runtime_environment_manager import (
39+
RuntimeEnvironmentManager,
40+
)
41+
from sagemaker.core.remote_function.spark_config import SparkConfig
42+
from sagemaker.core.remote_function.custom_file_filter import CustomFileFilter
43+
from sagemaker.core.s3 import s3_path_join
44+
45+
46+
@attr.s
47+
class ConfigUploader:
48+
"""Prepares and uploads customer provided configs to S3"""
49+
50+
remote_decorator_config: _JobSettings = attr.ib()
51+
runtime_env_manager: RuntimeEnvironmentManager = attr.ib()
52+
53+
def prepare_step_input_channel_for_spark_mode(
54+
self, func: Callable, s3_base_uri: str, sagemaker_session: Session
55+
) -> Tuple[List[Channel], Dict]:
56+
"""Prepares input channels for SageMaker Pipeline Step.
57+
58+
Returns:
59+
Tuple of (List[Channel], spark_dependency_paths dict)
60+
"""
61+
self._prepare_and_upload_callable(func, s3_base_uri, sagemaker_session)
62+
bootstrap_scripts_s3uri = self._prepare_and_upload_runtime_scripts(
63+
self.remote_decorator_config.spark_config,
64+
s3_base_uri,
65+
self.remote_decorator_config.s3_kms_key,
66+
sagemaker_session,
67+
)
68+
dependencies_list_path = self.runtime_env_manager.snapshot(
69+
self.remote_decorator_config.dependencies
70+
)
71+
user_workspace_s3uri = self._prepare_and_upload_workspace(
72+
dependencies_list_path,
73+
self.remote_decorator_config.include_local_workdir,
74+
self.remote_decorator_config.pre_execution_commands,
75+
self.remote_decorator_config.pre_execution_script,
76+
s3_base_uri,
77+
self.remote_decorator_config.s3_kms_key,
78+
sagemaker_session,
79+
self.remote_decorator_config.custom_file_filter,
80+
)
81+
82+
(
83+
submit_jars_s3_paths,
84+
submit_py_files_s3_paths,
85+
submit_files_s3_path,
86+
config_file_s3_uri,
87+
) = self._prepare_and_upload_spark_dependent_files(
88+
self.remote_decorator_config.spark_config,
89+
s3_base_uri,
90+
self.remote_decorator_config.s3_kms_key,
91+
sagemaker_session,
92+
)
93+
94+
channels = [
95+
Channel(
96+
channel_name=RUNTIME_SCRIPTS_CHANNEL_NAME,
97+
data_source=DataSource(
98+
s3_data_source=S3DataSource(
99+
s3_uri=bootstrap_scripts_s3uri,
100+
s3_data_type="S3Prefix",
101+
s3_data_distribution_type=S3_DATA_DISTRIBUTION_TYPE,
102+
)
103+
),
104+
input_mode="File",
105+
)
106+
]
107+
108+
if user_workspace_s3uri:
109+
channels.append(
110+
Channel(
111+
channel_name=REMOTE_FUNCTION_WORKSPACE,
112+
data_source=DataSource(
113+
s3_data_source=S3DataSource(
114+
s3_uri=s3_path_join(s3_base_uri, REMOTE_FUNCTION_WORKSPACE),
115+
s3_data_type="S3Prefix",
116+
s3_data_distribution_type=S3_DATA_DISTRIBUTION_TYPE,
117+
)
118+
),
119+
input_mode="File",
120+
)
121+
)
122+
123+
if config_file_s3_uri:
124+
channels.append(
125+
Channel(
126+
channel_name=SPARK_CONF_CHANNEL_NAME,
127+
data_source=DataSource(
128+
s3_data_source=S3DataSource(
129+
s3_uri=config_file_s3_uri,
130+
s3_data_type="S3Prefix",
131+
s3_data_distribution_type=S3_DATA_DISTRIBUTION_TYPE,
132+
)
133+
),
134+
input_mode="File",
135+
)
136+
)
137+
138+
return channels, {
139+
SPARK_JAR_FILES_PATH: submit_jars_s3_paths,
140+
SPARK_PY_FILES_PATH: submit_py_files_s3_paths,
141+
SPARK_FILES_PATH: submit_files_s3_path,
142+
}
143+
144+
def _prepare_and_upload_callable(
145+
self, func: Callable, s3_base_uri: str, sagemaker_session: Session
146+
) -> None:
147+
"""Prepares and uploads callable to S3"""
148+
stored_function = StoredFunction(
149+
sagemaker_session=sagemaker_session,
150+
s3_base_uri=s3_base_uri,
151+
s3_kms_key=self.remote_decorator_config.s3_kms_key,
152+
)
153+
stored_function.save(func)
154+
155+
def _prepare_and_upload_workspace(
156+
self,
157+
local_dependencies_path: str,
158+
include_local_workdir: bool,
159+
pre_execution_commands: List[str],
160+
pre_execution_script_local_path: str,
161+
s3_base_uri: str,
162+
s3_kms_key: str,
163+
sagemaker_session: Session,
164+
custom_file_filter: Optional[Union[Callable[[str, List], List], CustomFileFilter]] = None,
165+
) -> str:
166+
"""Upload the training step dependencies to S3 if present"""
167+
return _prepare_and_upload_workspace(
168+
local_dependencies_path=local_dependencies_path,
169+
include_local_workdir=include_local_workdir,
170+
pre_execution_commands=pre_execution_commands,
171+
pre_execution_script_local_path=pre_execution_script_local_path,
172+
s3_base_uri=s3_base_uri,
173+
s3_kms_key=s3_kms_key,
174+
sagemaker_session=sagemaker_session,
175+
custom_file_filter=custom_file_filter,
176+
)
177+
178+
def _prepare_and_upload_runtime_scripts(
179+
self,
180+
spark_config: SparkConfig,
181+
s3_base_uri: str,
182+
s3_kms_key: str,
183+
sagemaker_session: Session,
184+
) -> str:
185+
"""Copy runtime scripts to a folder and upload to S3"""
186+
return _prepare_and_upload_runtime_scripts(
187+
spark_config=spark_config,
188+
s3_base_uri=s3_base_uri,
189+
s3_kms_key=s3_kms_key,
190+
sagemaker_session=sagemaker_session,
191+
)
192+
193+
def _prepare_and_upload_spark_dependent_files(
194+
self,
195+
spark_config: SparkConfig,
196+
s3_base_uri: str,
197+
s3_kms_key: str,
198+
sagemaker_session: Session,
199+
) -> Tuple:
200+
"""Upload the spark dependencies to S3 if present"""
201+
if not spark_config:
202+
return None, None, None, None
203+
204+
return _prepare_and_upload_spark_dependent_files(
205+
spark_config=spark_config,
206+
s3_base_uri=s3_base_uri,
207+
s3_kms_key=s3_kms_key,
208+
sagemaker_session=sagemaker_session,
209+
)

0 commit comments

Comments
 (0)