IloBe
diff --git a/‎requirements.txt‎
Lines changed: 3 additions & 0 deletions b/‎requirements.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/app/examples_request.yml‎
Lines changed: 97 additions & 0 deletions b/‎src/app/examples_request.yml‎
Lines changed: 97 additions & 0 deletions
diff --git a/‎src/app/schemas.py‎
Lines changed: 55 additions & 0 deletions b/‎src/app/schemas.py‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎src/config/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎src/config/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/main.py‎
Lines changed: 172 additions & 0 deletions b/‎src/main.py‎
Lines changed: 172 additions & 0 deletions
@@ -10,6 +10,9 @@ pytest_html==4.0.0
 requests==2.31.0
 fastapi==0.98.0
 uvicorn==0.22.0
+starlette==0.27.0
+httpx==0.25.0
+pydantic==1.10.4
 gunicorn==20.1.0
 matplotlib==3.6.2
 seaborn==0.12.2
 
@@ -0,0 +1,97 @@
+features_labels:
+  "age": "Person's age - numerical value (int)"
+  "workclass": "Person's work class - categorical value (str)"
+  "fnlgt": "Index information - numerical value (int), internally removed"
+  "education": "Person's education level - ordinal categorical value (str)"
+  "education_num": "Person's education level as number - numerical value (int)"
+  "marital_status": "Person's marital status - nominal categorical value (str)"
+  "occupation": "Person's occupation - nominal categorical value (str)"
+  "relationship": "Person's relationship - nominal categorical (str)"
+  "race": "Persons's race - nominal categorical value (str)"
+  "sex": "Person's sex - nominal categorical value (str), internally mapped to integer: 'Male': 0, 'Female': 1"
+  "capital_gain": "Person's capital gain - numerical value (int)"
+  "capital_loss": "Person's capital loss - numerical value (int)"
+  "hours_per_week": "Person's hours of work per week - numerical value (int)"
+  "native_country": "Persons's native country - nominal categorical value (str), internally mapped to specific bin group"
+test_examples:
+  "Class <=50k (Label 0)":
+    "summary": "Person test example, income <=50k"
+    "description": "Person test example for model to predict income class of <=50k"
+    "value": 
+      "age": 38
+      "workclass": "Private"
+      "fnlgt": 215646
+      "education": "HS-grad"
+      "education_num": 9
+      "marital_status": "Divorced"
+      "occupation": "Handlers-cleaners"
+      "relationship": "Not-in-family"
+      "race": "White"
+      "sex": "Male"
+      "capital_gain": 0
+      "capital_loss": 0
+      "hours_per_week": 40
+      "native_country": "United-States"
+  "Class >50k (Label 1)":
+    "summary": "Person test example, income >50k"
+    "description": "Person test example for model to predict income class of >50k"
+    "value": 
+      "age": 31
+      "workclass": "Private"
+      "fnlgt": 45781
+      "education": "Masters"
+      "education_num": 14
+      "marital_status": "Never-married"
+      "occupation": "Prof-specialty"
+      "relationship": "Not-in-family"
+      "race": "White"
+      "sex": "Female"
+      "capital_gain": 14084
+      "capital_loss": 0
+      "hours_per_week": 50
+      "native_country": "United-States"
+  "Wrong feature value types str": 
+    "summary": "Person test example with wrong value types as strings, success case of >50k"
+    "description": "Person test example for model to handle wrong values (education, hours_per_week numbers as string)"
+    "value":
+      "age": 31
+      "workclass": "Private"
+      "fnlgt": 45781
+      "education": "Masters"
+      "education_num": '14'
+      "marital_status": "Never-married"
+      "occupation": "Prof-specialty"
+      "relationship": "Not-in-family"
+      "race": "White"
+      "sex": "Female"
+      "capital_gain": 14084
+      "capital_loss": 0
+      "hours_per_week": '50'
+      "native_country": "United-States"
+  "Wrong feature value types int": 
+    "summary": "Person test example with wrong value types as int, success case of >50k"
+    "description": "Person test example for model to handle wrong values (workclass, relationship strings as int)"
+    "value":
+      "age": 31
+      "workclass": 15
+      "fnlgt": 45781
+      "education": "Masters"
+      "education_num": 14
+      "marital_status": "Never-married"
+      "occupation": "Prof-specialty"
+      "relationship": 3
+      "race": "White"
+      "sex": "Female"
+      "capital_gain": 14084
+      "capital_loss": 0
+      "hours_per_week": 50
+      "native_country": "United-States"
+  "Missing features": 
+    "summary": "Person test example with missing values, failure case"
+    "description": "Person test example for model to handle missing feature values (starting with age)"
+    "value":
+      "fnlgt": 120478
+      "education_num": 11
+      "capital_gain": 0
+      "capital_loss": 0
+      "hours_per_week": 10
@@ -0,0 +1,55 @@
+#!/usr/bin/env -S python3 -i
+
+"""
+Script to handle the FastAPI schema code.
+author: Ilona Brinkmeier
+date:   2023-09
+"""
+
+###################
+# Imports
+###################
+
+from pydantic import BaseModel
+from typing import Optional
+from enum import Enum
+
+
+###################
+# Coding
+###################
+
+class FeatureLabels(str, Enum):
+    ''' Delivers the feature names as needed in Python '''
+    age = "age"
+    workclass = "workclass"
+    fnlgt = "fnlgt"
+    education = "education"
+    education_num = "education_num"
+    marital_status = "marital_status"
+    occupation = "occupation"
+    relationship = "relationship"
+    race = "race"
+    sex = "sex"
+    captial_gain = "capital_gain"
+    captial_loss = "capital_loss"
+    hours_per_week = "hours_per_week"
+    native_country = "native_country"
+
+
+class Person(BaseModel):
+    ''' Delivers the type hints for feature attributes '''
+    age: int
+    workclass: Optional[str] = None
+    fnlgt: int
+    education: Optional[str] = None
+    education_num: int
+    marital_status: Optional[str] = None
+    occupation: Optional[str] = None
+    relationship: Optional[str] = None
+    race: Optional[str] = None
+    sex: Optional[str] = None
+    capital_gain: int
+    capital_loss: int
+    hours_per_week: int
+    native_country: Optional[str] = None
@@ -3,6 +3,7 @@
 import logging
 from logging import Formatter, NullHandler
 from colorama import Fore, Style
+from src.config.config import get_config, get_data_path, get_models_path, get_project_root_path
 
 
 print(f'Invoking __init__.py for {__name__}')
 
@@ -2,6 +2,28 @@
 
 """
 Script to handle the API code here.
+Be able to view the interactive API documentation, powered by Swagger UI,
+at http://localhost:8000/docs
+
+Read-in of Person class instance is optional regarding original categorical features.
+
+For production code, set debug on False.
+For general FastAPI information, see:
+https://fastapi.tiangolo.com/tutorial/
+For application setup, see:
+https://fastapi.tiangolo.com/advanced/events/
+For FastAPI beginner tutorial, start with:
+https://fastapi.tiangolo.com/tutorial/first-steps/
+For advanced FastAPI example, see:
+https://github.com/microsoft/cookiecutter-spacy-fastapi/blob/master/%7B%7Bcookiecutter.project_slug%7D%7D/app/api.py
+For testing see:
+https://fastapi.tiangolo.com/tutorial/testing/
+
+future toDo:
+add a custom exception handler with @app.exception_handler()
+see: https://fastapi.tiangolo.com/tutorial/handling-errors/
+
+
 author: Ilona Brinkmeier
 date:   2023-09
 """
@@ -12,7 +34,28 @@
 
 import logging
 import uvicorn
+import signal
+import os
+import sys
+import yaml
+import numpy as np
+import pandas as pd
+
+# needed to run this script alone
+MAIN_DIR = os.path.join(os.getcwd(), 'src/')
+APP_DIR = os.path.join(MAIN_DIR, 'app/')
+sys.path.append(MAIN_DIR)
+sys.path.append(os.getcwd())
+print(f'sys.path : {sys.path}')
 
+from typing import Optional, Any
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, Body, HTTPException, Response, status
+from app.schemas import FeatureLabels, Person
+from training.ml.data import clean_data
+from training.ml.model import inference
+from config import get_config
+from slice_performance import load_transformer_artifact, load_final_model_artifact
 
 ###################
 # Coding
@@ -22,10 +65,139 @@
 # info see: https://realpython.com/python-logging-source-code/
 logger = logging.getLogger(__name__)
 
+# variable to store artifacts names
+ml_components = {}
+
+# read in examples
+examples_file = os.path.join(APP_DIR, 'examples_request.yml')
+with open(examples_file) as f:
+    examples_request = yaml.safe_load(f)
+
+
+# customised exception
+class InferenceNotPossible(HTTPException):
+    ''' Raised if inference workflow went wrong '''
+    def __init__(self) -> None:
+        super().__init__(status_code=404, detail="Client error: Inference not possible")
+
+
+# Define the signal handler function
+def graceful_shutdown(signum, frame) -> None:
+    # Perform cleanup tasks here (closing db connections, saving state, ...);
+    # e.g. has to be filled, if Person items are stored in a database
+
+    # Finally, exit the application
+    logger.warning("Shutting down the FastAPI US Census app")
+    sys.exit(0)
+
+
+# Register the signal handler for SIGTERM
+signal.signal(signal.SIGTERM, graceful_shutdown)
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI) -> None:
+    ''' Handles transformer and model artifacts for startup and shutdown.
+    
+    The coding before the yield will be executed before the application starts taking
+    requests, during the startup.
+    The coding after the yield will be executed after the application finishes handling requests,
+    right before the shutdown.
+    '''
+    try:
+        logging.debug('Read in post-market transformer and model artifacts')
+        # load ml components: feature transformer and classifier artifacts
+        transformer_artifact = load_transformer_artifact()
+        ml_components['transformer_artifact'] = transformer_artifact 
+        model_artifact = load_final_model_artifact()
+        ml_components['model_artifact'] = model_artifact
+
+        yield
+
+        # clean up the ML components and release the resources
+        logging.debug('Resource cleaning of transformer and model artifacts')
+        ml_components.clear()
+    except Exception as e:
+        logger.exception("Exit: exception of type %s occurred. Details: %s", type(e).__name__, str(e))
+    else:
+        txt = 'Handling of transformer and model artifacts was successful during lifespan of FastAPI app.'
+        logger.debug(txt)
+
+
+app = FastAPI(
+    title = "Udacity MLOps, Project 3 - Prediction Model for Public US Census Bureau Data",
+    description = "Deploying a Binary Classification ML Model on Render with FastAPI; \
+                  its inference is about having a salary <=50K or >50K",
+    version = "0.1",
+    lifespan=lifespan,
+    debug = True
+)
+
+
+@app.get("/")
+async def root() -> Response:
+    ''' Returns welcome message at root level '''
+    response = Response(
+        status_code=status.HTTP_200_OK,
+        content="Welcome to the Udacity MLOps project 3 and its salary prediction application!"
+    )
+    return response 
+
+
+@app.get("/feature_labels/{feature_name}")
+async def feature_labels(feature_name: FeatureLabels) -> Any:
+    ''' Read-in feature values with original label from US census dataset '''
+    logging.info("Read-in of feature values from examples_request file started")
+    feat_value = examples_request['features_labels'][feature_name]
+    return feat_value
+
+
+@app.post("/predict/")
+async def predict(person: Person = Body(..., examples=examples_request['test_examples'])):
+    ''' 
+    Returns prediction of test examples about income class, being <=50k or >50k,
+    so having a proper response status number 200 in such cases.
+    
+    If only a few features are having a wrong value type, the model shall be able to handle
+    this properly having an inference result of being an <=50k or >50k item as well.
+    
+    If most of the features are missing, a value error shall be thrown with response status number 422.
+    '''
+    logging.info("Model classification inference started")   
+    person = person.dict()
+    features = np.array(
+        [person[f] for f in examples_request['features_labels'].keys()]
+    ).reshape(1, -1)
+    
+    df = pd.DataFrame(features, columns=examples_request['features_labels'].keys())
+    df_cleaned = clean_data(df, get_config())
+    logger.info('Census cleaned new adult person data with %s features',
+                df_cleaned.shape[1])
+    logger.info('Its columns are: %s', df_cleaned.columns)
 
+    # cleaning inference case for person dataframe (X = df_cleaned), not training
+    X_processed = ml_components['transformer_artifact'].transform(df_cleaned)
+    # predict income class
+    model = ml_components['model_artifact']
+    y_pred = inference(model, X_processed)
+    logger.info('Predict post y_pred: %s', y_pred)
+    if y_pred not in [0, 1]:
+        raise InferenceNotPossible(HTTPException('US census prediction workflow error'))
 
+    pred_class = '>50k' if y_pred == 1 else '<=50k'
+    logger.info('income prediction label: %s, salary class: %s', y_pred[0], pred_class)
 
+    content_txt = ''.join(
+        ['income prediction label: ', str(y_pred[0]),
+         ', ',
+         'salary class: ', pred_class]
+    )
+    response = Response(
+        status_code = status.HTTP_200_OK,
+        content = content_txt,
+    )   
 
+    return response
 
 
 if __name__ == "__main__":