Skip to content

Commit 42d3cda

Browse files
refactor: add endpoints
- `app.py`: add all and quotes endpoints - `crawler.py`: move to `bin`, skip db creation and only sanitize data - `json_to_sqlite.py`: convert `db.json` to `db.sqlite` for faster access - `db.json`: remove trailing comma in author name (randomized order in the process) - `docker-compose.yml`: mount working directory - `Dockerfile`: add timezone, reorder layerf for better caching, add env vars, split entrypoint and command
1 parent c2cf646 commit 42d3cda

File tree

6 files changed

+2361
-2232
lines changed

6 files changed

+2361
-2232
lines changed

Dockerfile

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,35 @@
22

33
FROM python:3.11-slim-buster
44

5+
ENV TZ=America/Chicago
6+
7+
RUN apt-get update \
8+
&& apt-get install --no-install-recommends -y tzdata \
9+
&& rm -rf /var/lib/apt/lists/*
10+
11+
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
12+
513
WORKDIR /app
614

7-
COPY . .
15+
COPY requirements.txt .
16+
17+
ENV PYTHONDONTWRITEBYTECODE 1
18+
ENV PYTHONFAULTHANDLER 1
19+
ENV PYTHONUNBUFFERED 1
820

9-
RUN pip install --no-cache-dir -r requirements.txt
21+
ENV PIP_DISABLE_PIP_VERSION_CHECK=on
22+
ENV PIP_DEFAULT_TIMEOUT=100
23+
24+
RUN python -m pip install --no-cache-dir -r requirements.txt
25+
26+
ENV LANG C.UTF-8
27+
ENV LC_ALL C.UTF-8
28+
29+
ENV WEB_CONCURRENCY=2
30+
31+
COPY . .
1032

1133
EXPOSE 8000
1234

13-
CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "-b", "0.0.0.0:8000", "app:app"]
35+
ENTRYPOINT [ "gunicorn" ]
36+
CMD ["-k", "uvicorn.workers.UvicornWorker", "-b", "0.0.0.0:8000", "app:app", "--reload"]

app.py

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,46 @@
11
#!/usr/bin/env python
22

3-
from fastapi import FastAPI
4-
from fastapi import Request
5-
from fastapi.responses import HTMLResponse
3+
import sqlite3
4+
from fastapi import FastAPI, Query,Request
5+
from fastapi.responses import HTMLResponse, JSONResponse
66
from fastapi.templating import Jinja2Templates
7+
from typing import List, Optional
78

89
app = FastAPI()
910
templates = Jinja2Templates(directory="templates")
1011

1112

12-
@app.get('/hello')
13-
def hello():
14-
return 'Hello, World!'
15-
16-
1713
@app.get('/')
1814
def home(request: Request):
1915
return templates.TemplateResponse("index.html", {"request": request})
16+
17+
18+
# TODO: fix optional name parameter
19+
@app.get('/hello/{name}')
20+
def hello(name: Optional[str] = None) -> JSONResponse:
21+
if name:
22+
message = f"Hello, {name}!"
23+
else:
24+
message = f"Hello, World!"
25+
return JSONResponse(content={"hello": message})
26+
27+
28+
# TODO: improve sql performance
29+
@app.get('/all')
30+
def get_quotes(request: Request) -> JSONResponse:
31+
db = sqlite3.connect("db.sqlite")
32+
c = db.cursor()
33+
c.execute("SELECT * FROM quotes")
34+
quotes = c.fetchall()
35+
db.close()
36+
return JSONResponse(content={"quotes": quotes})
37+
38+
39+
@app.get('/quotes/{limit}')
40+
def get_quotes_by_limit(request: Request, limit: int) -> JSONResponse:
41+
db = sqlite3.connect("db.sqlite")
42+
c = db.cursor()
43+
c.execute(f"SELECT * FROM quotes ORDER BY RANDOM() LIMIT {limit}")
44+
quotes = c.fetchall()
45+
db.close()
46+
return JSONResponse(content={"quotes": quotes})

crawler.py renamed to bin/crawler.py

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,14 @@
88
from decouple import config
99
from tinydb import TinyDB, Query
1010
from urllib.parse import urljoin
11-
from tinydb import TinyDB, Query
1211

13-
# env vars
12+
# env vars
1413
name = config("NAME", default="goodreads")
1514
url = config("URL", default="https://www.goodreads.com/quotes")
1615
ttl = config("TTL", default=300)
1716

1817
# database
19-
db = TinyDB("db.json")
18+
db = TinyDB("../db.json")
2019

2120
# cache the requests to sqlite, expire after n time
2221
if not ttl:
@@ -79,6 +78,7 @@ def strip_characters(data):
7978
item[key] = value.strip()
8079
elif key == "author":
8180
value = value.strip()
81+
value = value.rstrip(",")
8282
item[key] = value
8383

8484
return data
@@ -99,10 +99,51 @@ def sanitize_data(data):
9999
return valid_data
100100

101101

102+
def check_db():
103+
"""Check db.json for data."""
104+
105+
Quote = Query()
106+
quotes = db.count(Quote.author.exists())
107+
108+
if quotes:
109+
return True
110+
else:
111+
return False
112+
113+
114+
def update_db():
115+
"""Update existing db.json with normalized data."""
116+
117+
# get authors
118+
authors_set = set(entry['author'] for entry in db.all())
119+
quotes_set = set(entry['quote'] for entry in db.all())
120+
121+
# normalize authors
122+
for author in authors_set:
123+
normalized_author = strip_characters([{"author": author}])[0]["author"]
124+
normalized_author_list = sanitize_data([{"author": normalized_author}])
125+
if normalized_author_list:
126+
normalized_author = normalized_author_list[0]["author"]
127+
db.update({"author": normalized_author}, Author.author == author)
128+
129+
# normalize quotes
130+
for quote in quotes_set:
131+
normalized_quote = strip_characters([{"quote": quote}])[0]["quote"]
132+
normalized_quote_list = sanitize_data([{"quote": normalized_quote}])
133+
if normalized_quote_list:
134+
normalized_quote = normalized_quote_list[0]["quote"]
135+
db.update({"quote": normalized_quote}, Quote.quote == quote)
136+
137+
102138
async def main():
103139
# ! drop the table (qa)
104140
# db.drop_tables()
105141

142+
# check if there is data in the db
143+
if check_db():
144+
update_db()
145+
return
146+
106147
# initialize cursor with the starting URL
107148
cursor = url
108149

bin/json_to_sqlite.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env python
2+
3+
import json
4+
import sqlite3
5+
from pathlib import Path
6+
7+
raw_db = Path("../db.json")
8+
db = Path("../db.sqlite")
9+
10+
# ! remove the db if it exists (qa)
11+
if db.exists():
12+
db.unlink()
13+
14+
if not db.exists():
15+
conn = sqlite3.connect(db)
16+
c = conn.cursor()
17+
c.execute(
18+
"CREATE TABLE quotes (id INTEGER PRIMARY KEY, quote TEXT, author TEXT)"
19+
)
20+
conn.commit()
21+
conn.close()
22+
23+
with open(raw_db) as f:
24+
data = json.load(f)
25+
26+
quotes = set()
27+
28+
quotes = [(key, value['quote'],
29+
value['author']) for key, value in data['_default'].items()
30+
]
31+
32+
conn = sqlite3.connect(db)
33+
c = conn.cursor()
34+
c.executemany("INSERT INTO quotes VALUES (?, ?, ?)", quotes)
35+
conn.commit()
36+
conn.close()

0 commit comments

Comments
 (0)