serverledge-faas
diff --git a/‎examples/sentiment-analysis/README.md‎
Lines changed: 118 additions & 0 deletions b/‎examples/sentiment-analysis/README.md‎
Lines changed: 118 additions & 0 deletions
diff --git a/‎examples/sentiment-analysis/src/Dockerfile‎
Lines changed: 24 additions & 0 deletions b/‎examples/sentiment-analysis/src/Dockerfile‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎examples/sentiment-analysis/src/executor.py‎
Lines changed: 158 additions & 0 deletions b/‎examples/sentiment-analysis/src/executor.py‎
Lines changed: 158 additions & 0 deletions
@@ -0,0 +1,118 @@
+Amazon Review Dataset:
+https://s3.amazonaws.com/fast-ai-nlp/amazon_review_polarity_csv.tgz
+
+Source: HEFTless paper
+
+## Requirements 
+
+This application retrieves a dataset from AWS, stores it on [MinIO](https://github.com/minio/minio), and runs machine learning tasks on it. 
+
+To run MinIO using docker containers, run: 
+
+    docker run -p 9000:9000 -p 9001:9001 \                                   
+        -e "MINIO_ROOT_USER=minio" \
+        -e "MINIO_ROOT_PASSWORD=minio123" \
+        quay.io/minio/minio server /data --console-address ":9001"
+
+## Build the Sentiment Analysis Application
+
+This Sentiment Analysis application on Amazon Reviews comes with a `Dockerfile`. It simplifies the application deployment. 
+
+To build the container, run the following command:
+
+    docker build -t sa-sentiment-analysis .
+
+## Launch the Server 
+The Sentiment Analysis application creates a HTTP Server that execute different functions according to the received REST call. 
+
+    docker run -p 8080:8080 -ti --rm -e MINIO_ENDPOINT="172.17.0.1:9000" sa-sentiment-analysis
+
+By default, the server listens to `8080`. The server need `MinIO` as object storage to save intermediary data. We can set information for connecting to MINIO using environment variables. 
+
+    MINIO_ENDPOINT="172.17.0.1:9000"
+    MINIO_ACCESS_KEY=minio
+    MINIO_SECRET_KEY=minio123
+    MINIO_BUCKET=serverledge
+    MINIO_SECURE=false
+
+### API
+
+#### Retrieve
+POST localhost:8080/invoke
+
+    {
+        "Function" : "retrieve",
+        "Params" : {
+            "data_url": "https://s3.amazonaws.com/fast-ai-nlp/amazon_review_polarity_csv.tgz", 
+            "local_dir": "./amazon_review_polarity_csv.tgz", 
+            "object_name": "raw/amazon_review_polarity_csv.tgz"
+        }
+    }
+
+       
+#### Extract
+
+POST localhost:8080/invoke
+
+    {
+        "Function" : "extract",
+        "Params" : {
+            "tgz_input_object_name": "data/test.csv",
+            "subset" : 0.002,
+            "local_dataset_file": "./amazon_review_polarity_csv.tgz", 
+            "local_output_dir": "./data", 
+            "output_train_object_name": "data/train.csv",
+            "output_test_object_name": "data/test.csv"
+        }
+    }
+
+
+#### Train
+
+POST localhost:8080/invoke
+
+    {
+      "Function" : "train",
+      "Params" : {
+          "subset": 0.001, 
+          "max_features": 2, 
+          "train_object_data": "data/train.csv", 
+          "local_train_file": "train.csv", 
+          "local_model_file": "sentiment_model.pkl", 
+          "local_vectorizer_file": "tfidf_vectorizer.pkl",
+          "output_model_object": "model/sentiment_model.pkl", 
+          "output_vectorizer_object": "model/tfidf_vectorizer.pkl" 
+      }
+    }
+                
+#### Evaluate
+
+POST localhost:8080/invoke
+
+    {
+        "Function" : "evaluate",
+        "Params" : {
+            "test_object_data": "data/test.csv", 
+            "local_test_file": "test.csv", 
+            "subset": 0.0002, 
+            "local_model_file": "sentiment_model.pkl", 
+            "local_vectorizer_file": "tfidf_vectorizer.pkl", 
+            "input_model_object": "model/sentiment_model.pkl", 
+            "input_vectorizer_object": "model/tfidf_vectorizer.pkl"
+        }
+    }
+        
+
+## Workflow
+
+TODO: remove this section
+
+    - retriever
+    - extractor 
+    - choice
+      - modelHA.train -> modelHA.evaluate
+      - modelLA.train -> modelLA.evaluate
+  
+### Serverledge Implementation 
+
+TODO
@@ -0,0 +1,24 @@
+FROM python:3.10-slim
+
+# Install dependencies
+COPY requirements.txt /tmp/
+RUN pip install --no-cache-dir -r /tmp/requirements.txt
+
+ENV MINIO_ENDPOINT="172.17.0.1:9000"
+ENV MINIO_ACCESS_KEY=minio
+ENV MINIO_SECRET_KEY=minio123
+ENV MINIO_BUCKET=serverledge
+ENV MINIO_SECURE=false
+
+# Copy project files
+COPY executor.py /
+COPY extractor.py /
+COPY minio_client.py /
+COPY ml_model.py /
+COPY retriever.py /
+
+WORKDIR /
+
+EXPOSE 8080
+
+CMD ["python", "executor.py"]
@@ -0,0 +1,158 @@
+from http.server import BaseHTTPRequestHandler, HTTPServer
+import os
+import json
+import retriever
+import ml_model
+import extractor
+
+hostName = "0.0.0.0"
+serverPort = 8080
+
+ML_MODEL = os.getenv("ML_MODEL")
+    
+DATA_URL = os.getenv("DATA_URL", "https://s3.amazonaws.com/fast-ai-nlp/amazon_review_polarity_csv.tgz")
+OUTPUT_PATH = os.getenv("OUTPUT_PATH", "./amazon_review_polarity_csv.tgz")
+OBJECT_NAME = os.getenv("OBJECT_NAME", "raw/amazon_review_polarity_csv.tgz")
+
+class Executor(BaseHTTPRequestHandler):
+    def do_POST(self):
+        content_length = int(self.headers['Content-Length']) 
+        post_data = self.rfile.read(content_length) 
+        request = json.loads(post_data.decode('utf-8'))
+
+        if not "invoke" in self.path:
+            self.send_response(404)
+            self.end_headers()
+            return
+
+        try:
+            params = request["Params"]
+        except:
+            params = {} 
+        try:
+            func = request["Function"]
+        except:
+            func = None
+
+        if "context" in os.environ:
+            context = json.loads(os.environ["CONTEXT"]) 
+        else:
+            context = {}
+
+        print(f"Function: {func}")
+        print(f"Params: {params}")
+        print(f"Context: {context}")
+        
+        response = {}
+        try:
+            if func is None:
+                raise Exception("function not defined!")
+            
+            if func == "retrieve":
+                ''' Invocation example: 
+                
+                    POST localhost:8080/invoke
+                    {
+                        "Function" : "retrieve",
+                        "Params" : {
+                            "data_url": "https://s3.amazonaws.com/fast-ai-nlp/amazon_review_polarity_csv.tgz", 
+                            "local_dir": "./amazon_review_polarity_csv.tgz", 
+                            "object_name": "raw/amazon_review_polarity_csv.tgz"
+                        }
+                    }
+                '''
+                print(params)
+                data_url = str(params.get("data_url", DATA_URL))
+                local_temp_dir = str(params.get("local_dir", OUTPUT_PATH))
+                data_object_name = str(params.get("object_name", OBJECT_NAME))
+                
+                print(f"Running function 'retriever' with params {data_url}, {local_temp_dir}, {data_object_name}")
+                result = retriever.handler(data_url=data_url, local_temp_path=local_temp_dir, object_name=data_object_name)
+                # result = True
+            elif func == "train":
+                ''' Invocation example: 
+                
+                    POST localhost:8080/invoke
+                    {
+                        "Function" : "train",
+                        "Params" : {
+                            "subset": 0.001, 
+                            "max_features": 2, 
+                            "train_object_data": "data/train.csv", 
+                            "local_train_file": "train.csv", 
+                            "local_model_file": "sentiment_model.pkl", 
+                            "local_vectorizer_file": "tfidf_vectorizer.pkl",
+                            "output_model_object": "model/sentiment_model.pkl", 
+                            "output_vectorizer_object": "model/tfidf_vectorizer.pkl" 
+                        }
+                    }
+                '''
+                print(f"Running function 'handle_train' with params {params}, {context}")
+                result = ml_model.handler_train(params, context)
+                
+            elif func == "evaluate":
+                ''' Invocation example: 
+                
+                    POST localhost:8080/invoke
+                    {
+                        "Function" : "evaluate",
+                        "Params" : {
+                            "test_object_data": "data/test.csv", 
+                            "local_test_file": "test.csv", 
+                            "subset": 0.0002, 
+                            "local_model_file": "sentiment_model.pkl", 
+                            "local_vectorizer_file": "tfidf_vectorizer.pkl", 
+                            "input_model_object": "model/sentiment_model.pkl", 
+                            "input_vectorizer_object": "model/tfidf_vectorizer.pkl"
+                        }
+                    }
+                '''
+                print(f"Running function 'handle_evaluate' with params {params}, {context}")
+                result = ml_model.handler_evaluate(params, context)
+            
+            elif func == "extract": 
+                ''' Invocation example: 
+                
+                    POST localhost:8080/invoke
+                    {
+                        "Function" : "extract",
+                        "Params" : {
+                            "tgz_input_object_name": "data/test.csv",
+                            "subset" : 0.002,
+                            "local_dataset_file": "./amazon_review_polarity_csv.tgz", 
+                            "local_output_dir": "./data", 
+                            "output_train_object_name": "data/train.csv",
+                            "output_test_object_name": "data/test.csv"
+                        }
+                    }
+                '''
+                print(f"Running function 'extract' with params {params}, {context}")
+                result = extractor.handler(params, context)
+
+            else:
+                raise Exception("Unsupported function")
+              
+            response["Result"] = json.dumps(result)
+            response["Success"] = True
+        except Exception as e:
+            print(e)
+            response["Success"] = False
+            response["Error"] = str(e)
+
+        self.send_response(200)
+        self.send_header("Content-type", "application/json")
+        self.end_headers()
+        self.wfile.write(bytes(json.dumps(response), "utf-8"))
+
+
+
+if __name__ == "__main__":      
+    print("Launching HTTP Server... ")  
+    srv = HTTPServer((hostName, serverPort), Executor)
+    try:
+        print("Running server ... ")
+        srv.serve_forever()
+    except KeyboardInterrupt:
+        pass
+    srv.server_close()
+