Skip to content

Commit 3297d76

Browse files
committed
Application: Add Sentiment Analysis application
1 parent 1551f8c commit 3297d76

8 files changed

Lines changed: 826 additions & 0 deletions

File tree

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
Amazon Review Dataset:
2+
https://s3.amazonaws.com/fast-ai-nlp/amazon_review_polarity_csv.tgz
3+
4+
Source: HEFTless paper
5+
6+
## Requirements
7+
8+
This application retrieves a dataset from AWS, stores it on [MinIO](https://github.com/minio/minio), and runs machine learning tasks on it.
9+
10+
To run MinIO using docker containers, run:
11+
12+
docker run -p 9000:9000 -p 9001:9001 \
13+
-e "MINIO_ROOT_USER=minio" \
14+
-e "MINIO_ROOT_PASSWORD=minio123" \
15+
quay.io/minio/minio server /data --console-address ":9001"
16+
17+
## Build the Sentiment Analysis Application
18+
19+
This Sentiment Analysis application on Amazon Reviews comes with a `Dockerfile`. It simplifies the application deployment.
20+
21+
To build the container, run the following command:
22+
23+
docker build -t sa-sentiment-analysis .
24+
25+
## Launch the Server
26+
The Sentiment Analysis application creates a HTTP Server that execute different functions according to the received REST call.
27+
28+
docker run -p 8080:8080 -ti --rm -e MINIO_ENDPOINT="172.17.0.1:9000" sa-sentiment-analysis
29+
30+
By default, the server listens to `8080`. The server need `MinIO` as object storage to save intermediary data. We can set information for connecting to MINIO using environment variables.
31+
32+
MINIO_ENDPOINT="172.17.0.1:9000"
33+
MINIO_ACCESS_KEY=minio
34+
MINIO_SECRET_KEY=minio123
35+
MINIO_BUCKET=serverledge
36+
MINIO_SECURE=false
37+
38+
### API
39+
40+
#### Retrieve
41+
POST localhost:8080/invoke
42+
43+
{
44+
"Function" : "retrieve",
45+
"Params" : {
46+
"data_url": "https://s3.amazonaws.com/fast-ai-nlp/amazon_review_polarity_csv.tgz",
47+
"local_dir": "./amazon_review_polarity_csv.tgz",
48+
"object_name": "raw/amazon_review_polarity_csv.tgz"
49+
}
50+
}
51+
52+
53+
#### Extract
54+
55+
POST localhost:8080/invoke
56+
57+
{
58+
"Function" : "extract",
59+
"Params" : {
60+
"tgz_input_object_name": "data/test.csv",
61+
"subset" : 0.002,
62+
"local_dataset_file": "./amazon_review_polarity_csv.tgz",
63+
"local_output_dir": "./data",
64+
"output_train_object_name": "data/train.csv",
65+
"output_test_object_name": "data/test.csv"
66+
}
67+
}
68+
69+
70+
#### Train
71+
72+
POST localhost:8080/invoke
73+
74+
{
75+
"Function" : "train",
76+
"Params" : {
77+
"subset": 0.001,
78+
"max_features": 2,
79+
"train_object_data": "data/train.csv",
80+
"local_train_file": "train.csv",
81+
"local_model_file": "sentiment_model.pkl",
82+
"local_vectorizer_file": "tfidf_vectorizer.pkl",
83+
"output_model_object": "model/sentiment_model.pkl",
84+
"output_vectorizer_object": "model/tfidf_vectorizer.pkl"
85+
}
86+
}
87+
88+
#### Evaluate
89+
90+
POST localhost:8080/invoke
91+
92+
{
93+
"Function" : "evaluate",
94+
"Params" : {
95+
"test_object_data": "data/test.csv",
96+
"local_test_file": "test.csv",
97+
"subset": 0.0002,
98+
"local_model_file": "sentiment_model.pkl",
99+
"local_vectorizer_file": "tfidf_vectorizer.pkl",
100+
"input_model_object": "model/sentiment_model.pkl",
101+
"input_vectorizer_object": "model/tfidf_vectorizer.pkl"
102+
}
103+
}
104+
105+
106+
## Workflow
107+
108+
TODO: remove this section
109+
110+
- retriever
111+
- extractor
112+
- choice
113+
- modelHA.train -> modelHA.evaluate
114+
- modelLA.train -> modelLA.evaluate
115+
116+
### Serverledge Implementation
117+
118+
TODO
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
FROM python:3.10-slim
2+
3+
# Install dependencies
4+
COPY requirements.txt /tmp/
5+
RUN pip install --no-cache-dir -r /tmp/requirements.txt
6+
7+
ENV MINIO_ENDPOINT="172.17.0.1:9000"
8+
ENV MINIO_ACCESS_KEY=minio
9+
ENV MINIO_SECRET_KEY=minio123
10+
ENV MINIO_BUCKET=serverledge
11+
ENV MINIO_SECURE=false
12+
13+
# Copy project files
14+
COPY executor.py /
15+
COPY extractor.py /
16+
COPY minio_client.py /
17+
COPY ml_model.py /
18+
COPY retriever.py /
19+
20+
WORKDIR /
21+
22+
EXPOSE 8080
23+
24+
CMD ["python", "executor.py"]
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
from http.server import BaseHTTPRequestHandler, HTTPServer
2+
import os
3+
import json
4+
import retriever
5+
import ml_model
6+
import extractor
7+
8+
hostName = "0.0.0.0"
9+
serverPort = 8080
10+
11+
ML_MODEL = os.getenv("ML_MODEL")
12+
13+
DATA_URL = os.getenv("DATA_URL", "https://s3.amazonaws.com/fast-ai-nlp/amazon_review_polarity_csv.tgz")
14+
OUTPUT_PATH = os.getenv("OUTPUT_PATH", "./amazon_review_polarity_csv.tgz")
15+
OBJECT_NAME = os.getenv("OBJECT_NAME", "raw/amazon_review_polarity_csv.tgz")
16+
17+
class Executor(BaseHTTPRequestHandler):
18+
def do_POST(self):
19+
content_length = int(self.headers['Content-Length'])
20+
post_data = self.rfile.read(content_length)
21+
request = json.loads(post_data.decode('utf-8'))
22+
23+
if not "invoke" in self.path:
24+
self.send_response(404)
25+
self.end_headers()
26+
return
27+
28+
try:
29+
params = request["Params"]
30+
except:
31+
params = {}
32+
try:
33+
func = request["Function"]
34+
except:
35+
func = None
36+
37+
if "context" in os.environ:
38+
context = json.loads(os.environ["CONTEXT"])
39+
else:
40+
context = {}
41+
42+
print(f"Function: {func}")
43+
print(f"Params: {params}")
44+
print(f"Context: {context}")
45+
46+
response = {}
47+
try:
48+
if func is None:
49+
raise Exception("function not defined!")
50+
51+
if func == "retrieve":
52+
''' Invocation example:
53+
54+
POST localhost:8080/invoke
55+
{
56+
"Function" : "retrieve",
57+
"Params" : {
58+
"data_url": "https://s3.amazonaws.com/fast-ai-nlp/amazon_review_polarity_csv.tgz",
59+
"local_dir": "./amazon_review_polarity_csv.tgz",
60+
"object_name": "raw/amazon_review_polarity_csv.tgz"
61+
}
62+
}
63+
'''
64+
print(params)
65+
data_url = str(params.get("data_url", DATA_URL))
66+
local_temp_dir = str(params.get("local_dir", OUTPUT_PATH))
67+
data_object_name = str(params.get("object_name", OBJECT_NAME))
68+
69+
print(f"Running function 'retriever' with params {data_url}, {local_temp_dir}, {data_object_name}")
70+
result = retriever.handler(data_url=data_url, local_temp_path=local_temp_dir, object_name=data_object_name)
71+
# result = True
72+
elif func == "train":
73+
''' Invocation example:
74+
75+
POST localhost:8080/invoke
76+
{
77+
"Function" : "train",
78+
"Params" : {
79+
"subset": 0.001,
80+
"max_features": 2,
81+
"train_object_data": "data/train.csv",
82+
"local_train_file": "train.csv",
83+
"local_model_file": "sentiment_model.pkl",
84+
"local_vectorizer_file": "tfidf_vectorizer.pkl",
85+
"output_model_object": "model/sentiment_model.pkl",
86+
"output_vectorizer_object": "model/tfidf_vectorizer.pkl"
87+
}
88+
}
89+
'''
90+
print(f"Running function 'handle_train' with params {params}, {context}")
91+
result = ml_model.handler_train(params, context)
92+
93+
elif func == "evaluate":
94+
''' Invocation example:
95+
96+
POST localhost:8080/invoke
97+
{
98+
"Function" : "evaluate",
99+
"Params" : {
100+
"test_object_data": "data/test.csv",
101+
"local_test_file": "test.csv",
102+
"subset": 0.0002,
103+
"local_model_file": "sentiment_model.pkl",
104+
"local_vectorizer_file": "tfidf_vectorizer.pkl",
105+
"input_model_object": "model/sentiment_model.pkl",
106+
"input_vectorizer_object": "model/tfidf_vectorizer.pkl"
107+
}
108+
}
109+
'''
110+
print(f"Running function 'handle_evaluate' with params {params}, {context}")
111+
result = ml_model.handler_evaluate(params, context)
112+
113+
elif func == "extract":
114+
''' Invocation example:
115+
116+
POST localhost:8080/invoke
117+
{
118+
"Function" : "extract",
119+
"Params" : {
120+
"tgz_input_object_name": "data/test.csv",
121+
"subset" : 0.002,
122+
"local_dataset_file": "./amazon_review_polarity_csv.tgz",
123+
"local_output_dir": "./data",
124+
"output_train_object_name": "data/train.csv",
125+
"output_test_object_name": "data/test.csv"
126+
}
127+
}
128+
'''
129+
print(f"Running function 'extract' with params {params}, {context}")
130+
result = extractor.handler(params, context)
131+
132+
else:
133+
raise Exception("Unsupported function")
134+
135+
response["Result"] = json.dumps(result)
136+
response["Success"] = True
137+
except Exception as e:
138+
print(e)
139+
response["Success"] = False
140+
response["Error"] = str(e)
141+
142+
self.send_response(200)
143+
self.send_header("Content-type", "application/json")
144+
self.end_headers()
145+
self.wfile.write(bytes(json.dumps(response), "utf-8"))
146+
147+
148+
149+
if __name__ == "__main__":
150+
print("Launching HTTP Server... ")
151+
srv = HTTPServer((hostName, serverPort), Executor)
152+
try:
153+
print("Running server ... ")
154+
srv.serve_forever()
155+
except KeyboardInterrupt:
156+
pass
157+
srv.server_close()
158+

0 commit comments

Comments
 (0)