-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathschema.sql
More file actions
50 lines (46 loc) · 2.11 KB
/
schema.sql
File metadata and controls
50 lines (46 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
-- =============================================================
-- Section 08 — Indexes
-- =============================================================
-- Creates a large employee table (300k rows) so EXPLAIN ANALYZE
-- actually shows meaningful differences between sequential and
-- index scans.
--
-- schema.sql creates the table AND seeds it via generate_series.
-- There is no separate seed.sql — the data is synthesised here.
--
-- Run:
-- psql -U postgres -d sql_exercise \
-- -f 04-advanced-databases/08-indexes/schema.sql
--
-- Warning: generating 300k rows takes a few seconds. Go stretch.
-- =============================================================
DROP SCHEMA IF EXISTS adv_indexes CASCADE;
CREATE SCHEMA adv_indexes;
SET search_path TO adv_indexes;
CREATE TABLE employee (
id BIGSERIAL PRIMARY KEY,
first_name VARCHAR(50) NOT NULL,
last_name VARCHAR(50) NOT NULL,
email VARCHAR(255) NOT NULL,
department VARCHAR(50) NOT NULL,
country CHAR(2) NOT NULL,
salary INTEGER NOT NULL,
is_active BOOLEAN NOT NULL,
hired_on DATE NOT NULL
);
-- Synthesise 300k employees.
-- Picks deterministic-ish values from small arrays so there are
-- lots of duplicates per column — that's what you want to show
-- the effect of indexes on filtered queries.
INSERT INTO employee (first_name, last_name, email, department, country, salary, is_active, hired_on)
SELECT
(ARRAY['Ada','Alan','Grace','Linus','Margaret','Dennis','Katherine','Radia','Barbara','Edsger'])[1 + (n % 10)],
(ARRAY['Lovelace','Turing','Hopper','Torvalds','Hamilton','Ritchie','Johnson','Perlman','Liskov','Dijkstra'])[1 + ((n / 10) % 10)],
'user' || n || '@example.com',
(ARRAY['engineering','sales','marketing','support','finance','people','design','data'])[1 + (n % 8)],
(ARRAY['GB','US','FR','DE','ES','IT','NL','PL','PT','SE'])[1 + (n % 10)],
30000 + (n % 120) * 500,
(n % 20) <> 0, -- ~5% inactive
DATE '2015-01-01' + ((n % 3650) || ' days')::INTERVAL
FROM generate_series(1, 300000) AS n;
ANALYZE employee;