Skip to content

Commit 84fd582

Browse files
authored
Refactored models, added Google Sheets scraping (#119)
* Model Changes * Add gym and FC hours * Implemented Amenities * Add capacities scraper * Add Pool Hours * Add Court Hours * Fixed timezone issues * Added scheduling * Update README.md * Update README.md * Update README.md * Compacted regular hours * Add bowling hours * Added scraper for special hours * Update sp_hours_scraper.py * Address PR comments
1 parent 7742c1d commit 84fd582

38 files changed

Lines changed: 1364 additions & 1811 deletions

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@ jobs:
2626
pip install --force-reinstall setuptools==44.0.0
2727
pip freeze
2828
pip install -r requirements.txt
29-
python tests.py
29+
python -m unittest src.tests.test_scraper

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@ migrations
1616
.vscode
1717
Archive
1818
scripts
19-
*.sqlite3
19+
*.sqlite3
20+
service-account-key.json

README.md

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,28 @@
22

33
Technologies involved include:
44
1. Flask
5-
2. GraphQL
5+
2. GraphQL (Graphene)
66

7-
## Virtualenv
7+
## Setup Instructions
88

9-
Virtualenv setup!
9+
### Database
10+
1. Install PostgreSQL on your local computer.
11+
2. Create a database and user:
12+
```
13+
createdb uplift
14+
createuser -P -s -e local
15+
```
16+
- This creates a database called `uplift` and a user called `local`. You will then use these when defining your environment variables.
17+
18+
### Virtualenv
1019

1120
```bash
1221
virtualenv venv
1322
source venv/bin/activate
1423
pip install -r requirements.txt
1524
```
1625

17-
## Environment Variables
26+
### Environment Variables
1827
It's recommended to use [`autoenv`](https://github.com/kennethreitz/autoenv).
1928
The required environment variables for this API are the following:
2029

@@ -34,15 +43,19 @@ To use `autoenv` with this repository, run the following and set the variables a
3443
cp env.template .env
3544
````
3645

37-
## Setting Up the Database with Data
46+
### Service Account Key
47+
Check the `#uplift-backend` channel for the `service-account-key.json` which should be placed in the root directory.
48+
49+
## Running the App and Test Suite
50+
51+
To run the app:
3852
````bash
39-
python setup_db.py
53+
python app.py
4054
````
4155

42-
## Running the App
43-
56+
To run the test suite:
4457
````bash
45-
flask run
58+
python -m unittest src.tests.test_scraper
4659
````
4760

4861
## Setting up linter

app.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1+
import logging, schedule, time
12
from flask import Flask, render_template
23
from flask_graphql import GraphQLView
34
from graphene import Schema
45
from graphql.utils import schema_printer
56
from src.database import db_session, init_db
7+
from src.models.capacity import Capacity
8+
from src.models.openhours import OpenHours
69
from src.schema import Query
7-
from src.constants import create_gym_table
8-
from src.scrapers.scraper import scrape_classes
9-
from src.scrapers.gym_scraper import scrape_times
10-
from src.scrapers.scraper import scrape_classes, scrape_pool_hours
10+
from src.scrapers.capacities_scraper import fetch_capacities
11+
from src.scrapers.reg_hours_scraper import fetch_reg_building, fetch_reg_facility
12+
from src.scrapers.sp_hours_scraper import fetch_sp_facility
13+
from src.utils.utils import create_gym_table
1114

1215

1316
app = Flask(__name__)
@@ -29,12 +32,22 @@ def shutdown_session(exception=None):
2932
db_session.remove()
3033

3134

32-
# Create database and fill it with constants
35+
def scrape_sheets():
36+
logging.info("Scraping from sheets...")
37+
38+
# Fetch Hours
39+
fetch_reg_facility()
40+
fetch_reg_building()
41+
fetch_sp_facility()
42+
43+
# Fetch Capacities
44+
fetch_capacities()
45+
46+
47+
# Create database and fill it with data
3348
init_db()
3449
create_gym_table()
35-
scrape_times()
36-
scrape_classes(3)
37-
scrape_pool_hours()
50+
scrape_sheets()
3851

3952
# Create schema.graphql
4053
with open("schema.graphql", "w+") as schema_file:
@@ -44,3 +57,9 @@ def shutdown_session(exception=None):
4457
# Should only be used for dev
4558
if __name__ == "__main__":
4659
app.run(host="127.0.0.1", port=5000) # For Dev Purposes only (use start_server.sh for release)
60+
61+
# Schedule the scraping to run every 10 minutes
62+
schedule.every(10).minutes.do(scrape_sheets)
63+
while True:
64+
schedule.run_pending()
65+
time.sleep(60)

requirements.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,14 @@ Flask-GraphQL==2.0.0
2121
Flask-Migrate==2.4.0
2222
Flask-Script==2.0.5
2323
Flask-SQLAlchemy==2.3.1
24-
google-auth==1.6.3
24+
google-auth==1.12.0
2525
graphene==2.1.3
2626
graphene-sqlalchemy==2.3.0
2727
graphql-core==2.1
2828
graphql-relay==0.4.5
2929
graphql-server-core==1.1.1
3030
greenlet==2.0.2
31+
gspread==5.12.3
3132
gunicorn==19.9.0
3233
identify==2.5.24
3334
idna==2.6
@@ -41,6 +42,7 @@ MarkupSafe==1.1.1
4142
marshmallow==3.0.0rc4
4243
marshmallow-sqlalchemy==0.16.2
4344
nodeenv==1.8.0
45+
pandas==2.1.4
4446
parso==0.8.3
4547
platformdirs==3.10.0
4648
pre-commit==1.18.3
@@ -53,10 +55,12 @@ pyasn1==0.5.0
5355
pyasn1-modules==0.3.0
5456
python-dateutil==2.8.0
5557
python-editor==1.0.4
58+
pytz==2023.3
5659
PyYAML==6.0
5760
requests==2.28.2
5861
rsa==4.0
5962
Rx==1.6.1
63+
schedule==1.1.0
6064
singledispatch==3.7.0
6165
six==1.11.0
6266
snapshottest==0.6.0

schema.graphql

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,70 +2,71 @@ schema {
22
query: Query
33
}
44

5-
type Capacity {
5+
type Amenity {
66
id: ID!
7-
facilityId: Int!
8-
count: Int!
9-
percent: Float!
10-
updated: DateTime!
7+
gymId: Int!
8+
type: AmenityType!
119
}
1210

13-
type Class {
14-
id: ID!
15-
name: String!
16-
description: String!
17-
gyms: [ClassInstance]
11+
enum AmenityType {
12+
SHOWERS
13+
LOCKERS
14+
PARKING
15+
ELEVATORS
1816
}
1917

20-
type ClassInstance {
18+
type Capacity {
2119
id: ID!
22-
gymId: Int
23-
classId: Int!
24-
location: String!
25-
instructor: String!
26-
isCanceled: Boolean!
27-
isVirtual: Boolean!
28-
startTime: DateTime
29-
endTime: DateTime
30-
class_: Class
31-
gym: Gym
20+
count: Int!
21+
facilityId: Int!
22+
percent: Float!
23+
updated: Int!
3224
}
3325

34-
scalar DateTime
26+
enum CourtType {
27+
BASKETBALL
28+
VOLLEYBALL
29+
BADMINTON
30+
}
3531

3632
type Facility {
3733
id: ID!
34+
facilityType: FacilityType!
3835
gymId: Int!
3936
name: String!
40-
facilityType: FacilityType!
41-
openHours(name: String): [OpenHours]
42-
capacities: [Capacity]
4337
capacity: Capacity
38+
hours: [OpenHours]
4439
}
4540

4641
enum FacilityType {
4742
FITNESS
4843
POOL
44+
BOWLING
45+
COURT
4946
}
5047

5148
type Gym {
5249
id: ID!
53-
name: String!
54-
description: String!
55-
location: String!
50+
address: String!
51+
imageUrl: String
5652
latitude: Float!
5753
longitude: Float!
58-
imageUrl: String
54+
name: String!
55+
amenities: [Amenity]
5956
facilities: [Facility]
60-
classes: [ClassInstance]
57+
hours: [OpenHours]
6158
}
6259

6360
type OpenHours {
6461
id: ID!
65-
facilityId: Int!
66-
day: Int!
67-
startTime: String
68-
endTime: String
62+
courtType: CourtType
63+
endTime: Int!
64+
facilityId: Int
65+
gymId: Int
66+
isShallow: Boolean
67+
isSpecial: Boolean!
68+
isWomen: Boolean
69+
startTime: Int!
6970
}
7071

7172
type Query {

0 commit comments

Comments
 (0)