sagemaker-python-sdk/sagemaker-serve/src/sagemaker/serve/model_server/triton/model.py at e79aa74559a58d99af2fb186d968bec31c3da427 · aws/sagemaker-python-sdk · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""This module is for Triton Python backend."""

from __future__ import absolute_import
import os
import logging
from pathlib import Path
import platform

import triton_python_backend_utils as pb_utils
import cloudpickle
from sagemaker.serve.validations.check_integrity import perform_integrity_check

logger = logging.getLogger(__name__)

TRITON_MODEL_DIR = os.getenv("TRITON_MODEL_DIR")


class TritonPythonModel:
    """A class for Triton Python Backend"""

    @staticmethod
    def auto_complete_config(auto_complete_model_config):
        """Placeholder docstring"""
        return auto_complete_model_config

    def initialize(self, args: dict) -> None:
        """Placeholder docstring"""
        serve_path = Path(TRITON_MODEL_DIR).joinpath("serve.pkl")
        metadata_path = Path(TRITON_MODEL_DIR).joinpath("metadata.json")
        with open(str(serve_path), mode="rb") as f:
            buffer = f.read()
        perform_integrity_check(buffer=buffer, metadata_path=str(metadata_path))

        with open(str(serve_path), mode="rb") as f:
            inference_spec, schema_builder = cloudpickle.load(f)

        self.inference_spec = inference_spec
        self.schema_builder = schema_builder
        self.model = inference_spec.load(model_dir=TRITON_MODEL_DIR)

    def execute(self, requests):
        """Placeholder docstring"""
        responses = []

        for request in requests:
            input_ndarray = pb_utils.get_input_tensor_by_name(request, "input_1").as_numpy()
            converted_input = self.schema_builder.input_deserializer.deserialize(input_ndarray)
            output = self.inference_spec.invoke(input_object=converted_input, model=self.model)
            output_ndarray = self.schema_builder.output_serializer.serialize(output)
            response = pb_utils.InferenceResponse(
                output_tensors=[pb_utils.Tensor("output_1", output_ndarray)]
            )
            responses.append(response)

        return responses


def _run_preflight_diagnostics():
    _py_vs_parity_check()
    _pickle_file_integrity_check()


def _py_vs_parity_check():
    container_py_vs = platform.python_version()
    local_py_vs = os.getenv("LOCAL_PYTHON")

    if not local_py_vs or container_py_vs.split(".")[1] != local_py_vs.split(".")[1]:
        logger.warning(
            f"The local python version {local_py_vs} differs from the python version "
            f"{container_py_vs} on the container. Please align the two to avoid unexpected behavior"
        )


def _pickle_file_integrity_check():
    serve_path = Path(TRITON_MODEL_DIR).joinpath("serve.pkl")
    metadata_path = Path(TRITON_MODEL_DIR).joinpath("metadata.json")
    with open(str(serve_path), "rb") as f:
        buffer = f.read()
    perform_integrity_check(buffer=buffer, metadata_path=metadata_path)


# on import, execute
_run_preflight_diagnostics()