-
Notifications
You must be signed in to change notification settings - Fork 54
Expand file tree
/
Copy pathmake_example_table_data.py
More file actions
103 lines (79 loc) · 2.8 KB
/
make_example_table_data.py
File metadata and controls
103 lines (79 loc) · 2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import random
import secrets
from dotenv import load_dotenv
from faker import Faker
from sqlalchemy import Column, Integer, String, Date, inspect, create_engine
from sqlalchemy.orm import sessionmaker, declarative_base
from myvanna import train_vanna_for_sales_data
load_dotenv()
Base = declarative_base()
class SalesData(Base):
__tablename__ = 'sales_data'
sale_id = Column(Integer, primary_key=True, autoincrement=True)
product_id = Column(Integer)
product_name = Column(String(255))
sale_date = Column(Date)
region = Column(String(255))
class MySQLDatabase:
def __init__(self):
self.engine = self.create_engine()
self.Session = sessionmaker(bind=self.engine)
def create_engine(self):
username = os.getenv('DB_USERNAME')
password = os.getenv('DB_PASSWORD')
host = os.getenv('DB_HOST')
port = os.getenv('DB_PORT')
database = os.getenv('DB_NAME')
connection_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}'
return create_engine(connection_string)
def get_session(self):
return self.Session()
def create_tables(self):
Base.metadata.create_all(self.engine)
def drop_table(self, table_class):
inspector = inspect(self.engine)
if inspector.has_table(table_class.__tablename__):
table_class.__table__.drop(self.engine)
fake = Faker()
seed_value = 42
random.seed(seed_value)
Faker.seed(seed_value)
products = [
{"id": 101, "name": "Smartwatch", "price": 150.00},
{"id": 102, "name": "Laptop", "price": 1200.00},
{"id": 103, "name": "Smartphone", "price": 800.00},
{"id": 104, "name": "Tablet", "price": 400.00},
{"id": 105, "name": "Headphones", "price": 100.00}
]
regions = ["North America", "Europe", "Asia", "South America", "Africa"]
def generate_sales_data(session, num_records):
sales_data_list = []
for _ in range(num_records):
product = secrets.choice(products)
region = secrets.choice(regions)
sale_date = fake.date_between(start_date='-1y', end_date='today')
sales_data = SalesData(
product_id=product["id"],
product_name=product["name"],
sale_date=sale_date,
region=region
)
sales_data_list.append(sales_data)
session.bulk_save_objects(sales_data_list)
session.commit()
if __name__ == "__main__":
db = MySQLDatabase()
db.drop_table(SalesData)
db.create_tables()
session = db.get_session()
generate_sales_data(session, 20000)
train_vanna_for_sales_data("""
CREATE TABLE sales_data (
sale_id INT PRIMARY KEY AUTO_INCREMENT,
product_id INT,
product_name VARCHAR(255),
sale_date DATE,
region VARCHAR(255)
)
""")