-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Expand file tree
/
Copy pathmodel.py
More file actions
83 lines (61 loc) · 2.8 KB
/
model.py
File metadata and controls
83 lines (61 loc) · 2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""This module is for Triton Python backend."""
from __future__ import absolute_import
import os
import logging
from pathlib import Path
import platform
import triton_python_backend_utils as pb_utils
import cloudpickle
from sagemaker.serve.validations.check_integrity import perform_integrity_check
logger = logging.getLogger(__name__)
TRITON_MODEL_DIR = os.getenv("TRITON_MODEL_DIR")
class TritonPythonModel:
"""A class for Triton Python Backend"""
@staticmethod
def auto_complete_config(auto_complete_model_config):
"""Placeholder docstring"""
return auto_complete_model_config
def initialize(self, args: dict) -> None:
"""Placeholder docstring"""
serve_path = Path(TRITON_MODEL_DIR).joinpath("serve.pkl")
metadata_path = Path(TRITON_MODEL_DIR).joinpath("metadata.json")
with open(str(serve_path), mode="rb") as f:
buffer = f.read()
perform_integrity_check(buffer=buffer, metadata_path=str(metadata_path))
with open(str(serve_path), mode="rb") as f:
inference_spec, schema_builder = cloudpickle.load(f)
self.inference_spec = inference_spec
self.schema_builder = schema_builder
self.model = inference_spec.load(model_dir=TRITON_MODEL_DIR)
def execute(self, requests):
"""Placeholder docstring"""
responses = []
for request in requests:
input_ndarray = pb_utils.get_input_tensor_by_name(request, "input_1").as_numpy()
converted_input = self.schema_builder.input_deserializer.deserialize(input_ndarray)
output = self.inference_spec.invoke(input_object=converted_input, model=self.model)
output_ndarray = self.schema_builder.output_serializer.serialize(output)
response = pb_utils.InferenceResponse(
output_tensors=[pb_utils.Tensor("output_1", output_ndarray)]
)
responses.append(response)
return responses
def _run_preflight_diagnostics():
_py_vs_parity_check()
_pickle_file_integrity_check()
def _py_vs_parity_check():
container_py_vs = platform.python_version()
local_py_vs = os.getenv("LOCAL_PYTHON")
if not local_py_vs or container_py_vs.split(".")[1] != local_py_vs.split(".")[1]:
logger.warning(
f"The local python version {local_py_vs} differs from the python version "
f"{container_py_vs} on the container. Please align the two to avoid unexpected behavior"
)
def _pickle_file_integrity_check():
serve_path = Path(TRITON_MODEL_DIR).joinpath("serve.pkl")
metadata_path = Path(TRITON_MODEL_DIR).joinpath("metadata.json")
with open(str(serve_path), "rb") as f:
buffer = f.read()
perform_integrity_check(buffer=buffer, metadata_path=metadata_path)
# on import, execute
_run_preflight_diagnostics()