Skip to content

Commit e7b23a2

Browse files
authored
Merge pull request #18 from IloBe/9-api-creation
Feat: add fast api and its tests #9
2 parents b160012 + 12b39d8 commit e7b23a2

9 files changed

Lines changed: 1344 additions & 7 deletions

File tree

requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ pytest_html==4.0.0
1010
requests==2.31.0
1111
fastapi==0.98.0
1212
uvicorn==0.22.0
13+
starlette==0.27.0
14+
httpx==0.25.0
15+
pydantic==1.10.4
1316
gunicorn==20.1.0
1417
matplotlib==3.6.2
1518
seaborn==0.12.2

src/app/examples_request.yml

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
features_labels:
2+
"age": "Person's age - numerical value (int)"
3+
"workclass": "Person's work class - categorical value (str)"
4+
"fnlgt": "Index information - numerical value (int), internally removed"
5+
"education": "Person's education level - ordinal categorical value (str)"
6+
"education_num": "Person's education level as number - numerical value (int)"
7+
"marital_status": "Person's marital status - nominal categorical value (str)"
8+
"occupation": "Person's occupation - nominal categorical value (str)"
9+
"relationship": "Person's relationship - nominal categorical (str)"
10+
"race": "Persons's race - nominal categorical value (str)"
11+
"sex": "Person's sex - nominal categorical value (str), internally mapped to integer: 'Male': 0, 'Female': 1"
12+
"capital_gain": "Person's capital gain - numerical value (int)"
13+
"capital_loss": "Person's capital loss - numerical value (int)"
14+
"hours_per_week": "Person's hours of work per week - numerical value (int)"
15+
"native_country": "Persons's native country - nominal categorical value (str), internally mapped to specific bin group"
16+
test_examples:
17+
"Class <=50k (Label 0)":
18+
"summary": "Person test example, income <=50k"
19+
"description": "Person test example for model to predict income class of <=50k"
20+
"value":
21+
"age": 38
22+
"workclass": "Private"
23+
"fnlgt": 215646
24+
"education": "HS-grad"
25+
"education_num": 9
26+
"marital_status": "Divorced"
27+
"occupation": "Handlers-cleaners"
28+
"relationship": "Not-in-family"
29+
"race": "White"
30+
"sex": "Male"
31+
"capital_gain": 0
32+
"capital_loss": 0
33+
"hours_per_week": 40
34+
"native_country": "United-States"
35+
"Class >50k (Label 1)":
36+
"summary": "Person test example, income >50k"
37+
"description": "Person test example for model to predict income class of >50k"
38+
"value":
39+
"age": 31
40+
"workclass": "Private"
41+
"fnlgt": 45781
42+
"education": "Masters"
43+
"education_num": 14
44+
"marital_status": "Never-married"
45+
"occupation": "Prof-specialty"
46+
"relationship": "Not-in-family"
47+
"race": "White"
48+
"sex": "Female"
49+
"capital_gain": 14084
50+
"capital_loss": 0
51+
"hours_per_week": 50
52+
"native_country": "United-States"
53+
"Wrong feature value types str":
54+
"summary": "Person test example with wrong value types as strings, success case of >50k"
55+
"description": "Person test example for model to handle wrong values (education, hours_per_week numbers as string)"
56+
"value":
57+
"age": 31
58+
"workclass": "Private"
59+
"fnlgt": 45781
60+
"education": "Masters"
61+
"education_num": '14'
62+
"marital_status": "Never-married"
63+
"occupation": "Prof-specialty"
64+
"relationship": "Not-in-family"
65+
"race": "White"
66+
"sex": "Female"
67+
"capital_gain": 14084
68+
"capital_loss": 0
69+
"hours_per_week": '50'
70+
"native_country": "United-States"
71+
"Wrong feature value types int":
72+
"summary": "Person test example with wrong value types as int, success case of >50k"
73+
"description": "Person test example for model to handle wrong values (workclass, relationship strings as int)"
74+
"value":
75+
"age": 31
76+
"workclass": 15
77+
"fnlgt": 45781
78+
"education": "Masters"
79+
"education_num": 14
80+
"marital_status": "Never-married"
81+
"occupation": "Prof-specialty"
82+
"relationship": 3
83+
"race": "White"
84+
"sex": "Female"
85+
"capital_gain": 14084
86+
"capital_loss": 0
87+
"hours_per_week": 50
88+
"native_country": "United-States"
89+
"Missing features":
90+
"summary": "Person test example with missing values, failure case"
91+
"description": "Person test example for model to handle missing feature values (starting with age)"
92+
"value":
93+
"fnlgt": 120478
94+
"education_num": 11
95+
"capital_gain": 0
96+
"capital_loss": 0
97+
"hours_per_week": 10

src/app/schemas.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/usr/bin/env -S python3 -i
2+
3+
"""
4+
Script to handle the FastAPI schema code.
5+
author: Ilona Brinkmeier
6+
date: 2023-09
7+
"""
8+
9+
###################
10+
# Imports
11+
###################
12+
13+
from pydantic import BaseModel
14+
from typing import Optional
15+
from enum import Enum
16+
17+
18+
###################
19+
# Coding
20+
###################
21+
22+
class FeatureLabels(str, Enum):
23+
''' Delivers the feature names as needed in Python '''
24+
age = "age"
25+
workclass = "workclass"
26+
fnlgt = "fnlgt"
27+
education = "education"
28+
education_num = "education_num"
29+
marital_status = "marital_status"
30+
occupation = "occupation"
31+
relationship = "relationship"
32+
race = "race"
33+
sex = "sex"
34+
captial_gain = "capital_gain"
35+
captial_loss = "capital_loss"
36+
hours_per_week = "hours_per_week"
37+
native_country = "native_country"
38+
39+
40+
class Person(BaseModel):
41+
''' Delivers the type hints for feature attributes '''
42+
age: int
43+
workclass: Optional[str] = None
44+
fnlgt: int
45+
education: Optional[str] = None
46+
education_num: int
47+
marital_status: Optional[str] = None
48+
occupation: Optional[str] = None
49+
relationship: Optional[str] = None
50+
race: Optional[str] = None
51+
sex: Optional[str] = None
52+
capital_gain: int
53+
capital_loss: int
54+
hours_per_week: int
55+
native_country: Optional[str] = None

src/config/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import logging
44
from logging import Formatter, NullHandler
55
from colorama import Fore, Style
6+
from src.config.config import get_config, get_data_path, get_models_path, get_project_root_path
67

78

89
print(f'Invoking __init__.py for {__name__}')

src/main.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,28 @@
22

33
"""
44
Script to handle the API code here.
5+
Be able to view the interactive API documentation, powered by Swagger UI,
6+
at http://localhost:8000/docs
7+
8+
Read-in of Person class instance is optional regarding original categorical features.
9+
10+
For production code, set debug on False.
11+
For general FastAPI information, see:
12+
https://fastapi.tiangolo.com/tutorial/
13+
For application setup, see:
14+
https://fastapi.tiangolo.com/advanced/events/
15+
For FastAPI beginner tutorial, start with:
16+
https://fastapi.tiangolo.com/tutorial/first-steps/
17+
For advanced FastAPI example, see:
18+
https://github.com/microsoft/cookiecutter-spacy-fastapi/blob/master/%7B%7Bcookiecutter.project_slug%7D%7D/app/api.py
19+
For testing see:
20+
https://fastapi.tiangolo.com/tutorial/testing/
21+
22+
future toDo:
23+
add a custom exception handler with @app.exception_handler()
24+
see: https://fastapi.tiangolo.com/tutorial/handling-errors/
25+
26+
527
author: Ilona Brinkmeier
628
date: 2023-09
729
"""
@@ -12,7 +34,28 @@
1234

1335
import logging
1436
import uvicorn
37+
import signal
38+
import os
39+
import sys
40+
import yaml
41+
import numpy as np
42+
import pandas as pd
43+
44+
# needed to run this script alone
45+
MAIN_DIR = os.path.join(os.getcwd(), 'src/')
46+
APP_DIR = os.path.join(MAIN_DIR, 'app/')
47+
sys.path.append(MAIN_DIR)
48+
sys.path.append(os.getcwd())
49+
print(f'sys.path : {sys.path}')
1550

51+
from typing import Optional, Any
52+
from contextlib import asynccontextmanager
53+
from fastapi import FastAPI, Body, HTTPException, Response, status
54+
from app.schemas import FeatureLabels, Person
55+
from training.ml.data import clean_data
56+
from training.ml.model import inference
57+
from config import get_config
58+
from slice_performance import load_transformer_artifact, load_final_model_artifact
1659

1760
###################
1861
# Coding
@@ -22,10 +65,139 @@
2265
# info see: https://realpython.com/python-logging-source-code/
2366
logger = logging.getLogger(__name__)
2467

68+
# variable to store artifacts names
69+
ml_components = {}
70+
71+
# read in examples
72+
examples_file = os.path.join(APP_DIR, 'examples_request.yml')
73+
with open(examples_file) as f:
74+
examples_request = yaml.safe_load(f)
75+
76+
77+
# customised exception
78+
class InferenceNotPossible(HTTPException):
79+
''' Raised if inference workflow went wrong '''
80+
def __init__(self) -> None:
81+
super().__init__(status_code=404, detail="Client error: Inference not possible")
82+
83+
84+
# Define the signal handler function
85+
def graceful_shutdown(signum, frame) -> None:
86+
# Perform cleanup tasks here (closing db connections, saving state, ...);
87+
# e.g. has to be filled, if Person items are stored in a database
88+
89+
# Finally, exit the application
90+
logger.warning("Shutting down the FastAPI US Census app")
91+
sys.exit(0)
92+
93+
94+
# Register the signal handler for SIGTERM
95+
signal.signal(signal.SIGTERM, graceful_shutdown)
96+
97+
98+
@asynccontextmanager
99+
async def lifespan(app: FastAPI) -> None:
100+
''' Handles transformer and model artifacts for startup and shutdown.
101+
102+
The coding before the yield will be executed before the application starts taking
103+
requests, during the startup.
104+
The coding after the yield will be executed after the application finishes handling requests,
105+
right before the shutdown.
106+
'''
107+
try:
108+
logging.debug('Read in post-market transformer and model artifacts')
109+
# load ml components: feature transformer and classifier artifacts
110+
transformer_artifact = load_transformer_artifact()
111+
ml_components['transformer_artifact'] = transformer_artifact
112+
model_artifact = load_final_model_artifact()
113+
ml_components['model_artifact'] = model_artifact
114+
115+
yield
116+
117+
# clean up the ML components and release the resources
118+
logging.debug('Resource cleaning of transformer and model artifacts')
119+
ml_components.clear()
120+
except Exception as e:
121+
logger.exception("Exit: exception of type %s occurred. Details: %s", type(e).__name__, str(e))
122+
else:
123+
txt = 'Handling of transformer and model artifacts was successful during lifespan of FastAPI app.'
124+
logger.debug(txt)
125+
126+
127+
app = FastAPI(
128+
title = "Udacity MLOps, Project 3 - Prediction Model for Public US Census Bureau Data",
129+
description = "Deploying a Binary Classification ML Model on Render with FastAPI; \
130+
its inference is about having a salary <=50K or >50K",
131+
version = "0.1",
132+
lifespan=lifespan,
133+
debug = True
134+
)
135+
136+
137+
@app.get("/")
138+
async def root() -> Response:
139+
''' Returns welcome message at root level '''
140+
response = Response(
141+
status_code=status.HTTP_200_OK,
142+
content="Welcome to the Udacity MLOps project 3 and its salary prediction application!"
143+
)
144+
return response
145+
146+
147+
@app.get("/feature_labels/{feature_name}")
148+
async def feature_labels(feature_name: FeatureLabels) -> Any:
149+
''' Read-in feature values with original label from US census dataset '''
150+
logging.info("Read-in of feature values from examples_request file started")
151+
feat_value = examples_request['features_labels'][feature_name]
152+
return feat_value
153+
154+
155+
@app.post("/predict/")
156+
async def predict(person: Person = Body(..., examples=examples_request['test_examples'])):
157+
'''
158+
Returns prediction of test examples about income class, being <=50k or >50k,
159+
so having a proper response status number 200 in such cases.
160+
161+
If only a few features are having a wrong value type, the model shall be able to handle
162+
this properly having an inference result of being an <=50k or >50k item as well.
163+
164+
If most of the features are missing, a value error shall be thrown with response status number 422.
165+
'''
166+
logging.info("Model classification inference started")
167+
person = person.dict()
168+
features = np.array(
169+
[person[f] for f in examples_request['features_labels'].keys()]
170+
).reshape(1, -1)
171+
172+
df = pd.DataFrame(features, columns=examples_request['features_labels'].keys())
173+
df_cleaned = clean_data(df, get_config())
174+
logger.info('Census cleaned new adult person data with %s features',
175+
df_cleaned.shape[1])
176+
logger.info('Its columns are: %s', df_cleaned.columns)
25177

178+
# cleaning inference case for person dataframe (X = df_cleaned), not training
179+
X_processed = ml_components['transformer_artifact'].transform(df_cleaned)
180+
# predict income class
181+
model = ml_components['model_artifact']
182+
y_pred = inference(model, X_processed)
183+
logger.info('Predict post y_pred: %s', y_pred)
184+
if y_pred not in [0, 1]:
185+
raise InferenceNotPossible(HTTPException('US census prediction workflow error'))
26186

187+
pred_class = '>50k' if y_pred == 1 else '<=50k'
188+
logger.info('income prediction label: %s, salary class: %s', y_pred[0], pred_class)
27189

190+
content_txt = ''.join(
191+
['income prediction label: ', str(y_pred[0]),
192+
', ',
193+
'salary class: ', pred_class]
194+
)
195+
response = Response(
196+
status_code = status.HTTP_200_OK,
197+
content = content_txt,
198+
)
28199

200+
return response
29201

30202

31203
if __name__ == "__main__":

0 commit comments

Comments
 (0)