-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel.py
More file actions
133 lines (88 loc) · 3.5 KB
/
model.py
File metadata and controls
133 lines (88 loc) · 3.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import json
import base64
import boto3
import mlflow
import pandas as pd
# pylint: disable=invalid-name
def get_model_location(run_id):
model_location = os.getenv("MODEL_LOCATION")
if model_location is not None:
return model_location
S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME", "mlartifact-s3")
EXP_ID = os.getenv("EXP_ID", "6")
model_location = f"s3://{S3_BUCKET_NAME}/{EXP_ID}/{run_id}/artifacts/model"
return model_location
def load_mode(run_id):
# local path
model_path = get_model_location(run_id)
model = mlflow.pyfunc.load_model(model_path)
return model
def base64_decode(encoded_data):
decoded_data = base64.b64decode(encoded_data).decode("utf-8")
sales_event = json.loads(decoded_data)
return sales_event
class ModelService:
def __init__(self, model, model_version=None, callbacks=None):
self.model = model
self.model_version = model_version
self.callbacks = callbacks or []
def prepare_features(self, row):
date = pd.to_datetime(row["date"])
features = {
"store": row["store"],
"promo": row["promo"],
"holiday": row["holiday"],
"year": date.year,
"month": date.month,
"dayofweek": date.dayofweek,
"is_weekend": int(date.dayofweek >= 5),
}
return features
def predict(self, features):
pred = self.model.predict(features)
return float(pred[0])
def lambda_handler(self, event):
predictions_events = []
for record in event["Records"]:
encoded_data = record["kinesis"]["data"]
decoded_data = base64.b64decode(encoded_data).decode("utf-8")
sales_event = json.loads(decoded_data)
sales = sales_event["sales_input"]
sales_id = sales_event["sales_id"]
features = self.prepare_features(sales)
prediction = self.predict(features)
prediction_event = {
"model": "sales_prediction_model",
"version": self.model_version,
"prediction": {"sales_prediction": prediction, "sales_id": sales_id},
}
for callback in self.callbacks:
callback(prediction_event)
predictions_events.append(prediction_event)
return {"predictions": predictions_events}
class KinesisCallbacks:
def __init__(self, kinesis_client, prediction_stream_name):
self.kinesis_client = kinesis_client
self.prediction_stream_name = prediction_stream_name
def put_record(self, prediction_event):
sales_id = prediction_event["prediction"]["sales_id"]
self.kinesis_client.put_record(
StreamName=self.prediction_stream_name,
Data=json.dumps(prediction_event),
PartitionKey=str(sales_id),
)
def create_kinesis_client():
endpoint_url = os.getenv("KINESIS_ENDPOINT_URL")
if endpoint_url is None:
return boto3.client("kinesis")
return boto3.client("kinesis", endpoint_url=endpoint_url)
def init(prediction_stream_name: str, run_id: str, test_run: bool):
callbacks = []
model = load_mode(run_id=run_id)
if not test_run:
kinesis_client = create_kinesis_client()
kinesis_callback = KinesisCallbacks(kinesis_client, prediction_stream_name)
callbacks.append(kinesis_callback.put_record)
model_service = ModelService(model=model, model_version=run_id, callbacks=callbacks)
return model_service