-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathamazon_business_analysis.sql
More file actions
392 lines (341 loc) · 10.9 KB
/
Copy pathamazon_business_analysis.sql
File metadata and controls
392 lines (341 loc) · 10.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
CREATE TABLE amazon_brazil.customers (
customer_id VARCHAR PRIMARY KEY,
customer_unique_id VARCHAR,
customer_zip_code_prefix INT
);
CREATE TABLE amazon_brazil.orders (
order_id VARCHAR PRIMARY KEY,
customer_id VARCHAR,
order_status VARCHAR,
order_purchase_timestamp TIMESTAMP,
order_approved_at TIMESTAMP,
order_delivered_carrier_date TIMESTAMP,
order_delivered_customer_date TIMESTAMP,
order_estimated_delivery_date TIMESTAMP
);
CREATE TABLE amazon_brazil.payments (
order_id VARCHAR,
payment_sequential INT,
payment_type VARCHAR,
payment_installments INT,
payment_value NUMERIC
);
CREATE TABLE amazon_brazil.sellers (
seller_id VARCHAR PRIMARY KEY,
seller_zip_code_prefix INT
);
CREATE TABLE amazon_brazil.products (
product_id VARCHAR PRIMARY KEY,
product_category_name VARCHAR,
product_name_length INTEGER,
product_description_length INTEGER,
product_photos_qty INTEGER,
product_weight_g INTEGER,
product_length_cm INTEGER,
product_height_cm INTEGER,
product_width_cm INTEGER
);
CREATE TABLE amazon_brazil.order_items (
order_id VARCHAR,
order_item_id INTEGER,
product_id VARCHAR,
seller_id VARCHAR,
shipping_limit_date TIMESTAMP,
price NUMERIC,
freight_value NUMERIC
);
-- Question 1:
-- To simplify financial reports, calculate the average payment value for each payment type,
-- round it to the nearest integer, and display results in ascending order.
SELECT
payment_type,
ROUND(AVG(payment_value)) AS rounded_avg_payment
FROM amazon_brazil.payments
GROUP BY payment_type
ORDER BY rounded_avg_payment ASC;
-- Question 2:
-- Calculate the percentage of total orders for each payment type,
-- rounded to one decimal place, and display in descending order.
SELECT payment_type,
ROUND(COUNT(order_id) * 100.0 / SUM(COUNT(order_id)) OVER (),1) AS percentage_orders
FROM amazon_brazil.payments
GROUP BY payment_type
ORDER BY percentage_orders DESC;
-- Question 3:
-- Identify all products priced between 100 and 500 BRL that contain the word 'Smart' in their name,
-- and display them sorted by price in descending order.
SELECT
oi.product_id,
oi.price
FROM amazon_brazil.order_items oi
JOIN amazon_brazil.products p
ON oi.product_id = p.product_id
WHERE
oi.price BETWEEN 100 AND 500
AND p.product_category_name ILIKE '%smart%'
ORDER BY oi.price DESC;
-- Question 4:
-- Determine the top 3 months with the highest total sales value,
-- rounded to the nearest integer.
SELECT
DATE_TRUNC('month', o.order_purchase_timestamp) AS month,
ROUND(SUM(oi.price)) AS total_sales
FROM amazon_brazil.orders o
JOIN amazon_brazil.order_items oi
ON o.order_id = oi.order_id
GROUP BY month
ORDER BY total_sales DESC;
-- Question 5:
-- Find product categories where the difference between maximum and minimum prices exceeds 500 BRL.
SELECT
p.product_category_name,
MAX(oi.price) - MIN(oi.price) AS price_difference
FROM amazon_brazil.order_items oi
JOIN amazon_brazil.products p
ON oi.product_id = p.product_id
GROUP BY p.product_category_name
HAVING MAX(oi.price) - MIN(oi.price) > 500
ORDER BY price_difference DESC;
-- Question 6:
-- Identify payment types with the least variation in transaction amounts,
-- using standard deviation and sorting by smallest first.
SELECT
payment_type,
ROUND(STDDEV(payment_value), 2) AS std_deviation
FROM amazon_brazil.payments
GROUP BY payment_type
ORDER BY std_deviation ASC;
-- Question 7:
-- Retrieve products where the product category name is missing
-- or contains only a single character.
SELECT
product_id,
product_category_name
FROM amazon_brazil.products
WHERE
product_category_name IS NULL
OR LENGTH(product_category_name) = 1;
-- Part II - Question 1:
-- Segment orders into value ranges and calculate the count of each payment type within those segments.
SELECT
CASE
WHEN oi.price < 200 THEN 'Low'
WHEN oi.price BETWEEN 200 AND 1000 THEN 'Medium'
ELSE 'High'
END AS order_value_segment,
p.payment_type,
COUNT(*) AS count
FROM amazon_brazil.order_items oi
JOIN amazon_brazil.payments p
ON oi.order_id = p.order_id
GROUP BY order_value_segment, p.payment_type
ORDER BY count DESC;
-- Part II - Question 2:
-- Calculate minimum, maximum, and average price for each product category,
-- and sort results by average price in descending order.
SELECT
p.product_category_name,
MIN(oi.price) AS min_price,
MAX(oi.price) AS max_price,
ROUND(AVG(oi.price), 2) AS avg_price
FROM amazon_brazil.order_items oi
JOIN amazon_brazil.products p
ON oi.product_id = p.product_id
GROUP BY p.product_category_name
ORDER BY avg_price DESC;
-- Part II - Question 3:
-- Identify customers who have placed more than one order and display their total order count.
SELECT
c.customer_unique_id,
COUNT(o.order_id) AS total_orders
FROM amazon_brazil.orders o
JOIN amazon_brazil.customers c
ON o.customer_id = c.customer_id
GROUP BY c.customer_unique_id
HAVING COUNT(o.order_id) > 1
ORDER BY total_orders DESC;
-- Part II - Question 4:
-- Categorize customers into New, Returning, and Loyal based on their total order count.
WITH customer_orders AS (
SELECT
c.customer_unique_id,
COUNT(o.order_id) AS total_orders
FROM amazon_brazil.orders o
JOIN amazon_brazil.customers c
ON o.customer_id = c.customer_id
GROUP BY c.customer_unique_id
)
SELECT
customer_unique_id,
CASE
WHEN total_orders = 1 THEN 'New'
WHEN total_orders BETWEEN 2 AND 4 THEN 'Returning'
ELSE 'Loyal'
END AS customer_type
FROM customer_orders;
-- Part II - Question 5:
-- Calculate total revenue for each product category and display the top 5 categories.
SELECT
p.product_category_name,
ROUND(SUM(oi.price), 2) AS total_revenue
FROM amazon_brazil.order_items oi
JOIN amazon_brazil.products p
ON oi.product_id = p.product_id
GROUP BY p.product_category_name
ORDER BY total_revenue DESC;
-- Part III - Question 1:
-- Calculate total sales for each season based on order purchase dates.
SELECT
season,
ROUND(SUM(price), 2) AS total_sales
FROM (
SELECT
oi.price,
CASE
WHEN EXTRACT(MONTH FROM o.order_purchase_timestamp) IN (3, 4, 5) THEN 'Spring'
WHEN EXTRACT(MONTH FROM o.order_purchase_timestamp) IN (6, 7, 8) THEN 'Summer'
WHEN EXTRACT(MONTH FROM o.order_purchase_timestamp) IN (9, 10, 11) THEN 'Autumn'
ELSE 'Winter'
END AS season
FROM amazon_brazil.orders o
JOIN amazon_brazil.order_items oi
ON o.order_id = oi.order_id
) AS seasonal_data
GROUP BY season
ORDER BY total_sales DESC;
-- Part III - Question 2:
-- Identify products whose total quantity sold is above the overall average.
SELECT
product_id,
COUNT(order_item_id) AS total_quantity_sold
FROM amazon_brazil.order_items
GROUP BY product_id
HAVING COUNT(order_item_id) > (
SELECT AVG(product_count)
FROM (
SELECT COUNT(order_item_id) AS product_count
FROM amazon_brazil.order_items
GROUP BY product_id
) AS avg_table
)LIMIT 10;
-- Part III - Question 3:
-- Calculate total monthly revenue for the year 2018.
SELECT
DATE_TRUNC('month', o.order_purchase_timestamp) AS month,
ROUND(SUM(oi.price), 2) AS total_revenue
FROM amazon_brazil.orders o
JOIN amazon_brazil.order_items oi
ON o.order_id = oi.order_id
WHERE EXTRACT(YEAR FROM o.order_purchase_timestamp) = 2018
GROUP BY month
ORDER BY month;
-- Part III - Question 4:
-- Segment customers based on purchase frequency and count the number of customers in each segment.
WITH customer_orders AS (
SELECT
c.customer_unique_id,
COUNT(o.order_id) AS total_orders
FROM amazon_brazil.orders o
JOIN amazon_brazil.customers c
ON o.customer_id = c.customer_id
GROUP BY c.customer_unique_id
),
categorized AS (
SELECT
CASE
WHEN total_orders BETWEEN 1 AND 2 THEN 'Occasional'
WHEN total_orders BETWEEN 3 AND 5 THEN 'Regular'
ELSE 'Loyal'
END AS customer_type
FROM customer_orders
)
SELECT
customer_type,
COUNT(*) AS count
FROM categorized
GROUP BY customer_type
ORDER BY count DESC;
-- Part III - Question 5:
-- Rank customers based on their average order value and display the top 20.
WITH order_values AS (
-- Total value per order
SELECT
o.order_id,
o.customer_id,
SUM(oi.price) AS order_total
FROM amazon_brazil.orders o
JOIN amazon_brazil.order_items oi
ON o.order_id = oi.order_id
GROUP BY o.order_id, o.customer_id
),
customer_avg AS (
-- Average order value per customer
SELECT
c.customer_id,
AVG(ov.order_total) AS avg_order_value
FROM order_values ov
JOIN amazon_brazil.customers c
ON ov.customer_id = c.customer_id
GROUP BY c.customer_id
)
SELECT
customer_id,
ROUND(avg_order_value, 2) AS avg_order_value,
RANK() OVER (ORDER BY avg_order_value DESC) AS customer_rank
FROM customer_avg
ORDER BY customer_rank
LIMIT 20;
-- Part III - Question 6:
-- Calculate monthly cumulative sales for each product.
WITH monthly_sales AS (
SELECT
oi.product_id,
DATE_TRUNC('month', o.order_purchase_timestamp) AS sale_month,
SUM(oi.price) AS monthly_total
FROM amazon_brazil.orders o
JOIN amazon_brazil.order_items oi
ON o.order_id = oi.order_id
GROUP BY oi.product_id, sale_month
)
SELECT
product_id,
sale_month,
SUM(monthly_total) OVER (
PARTITION BY product_id
ORDER BY sale_month
) AS total_sales
FROM monthly_sales
ORDER BY product_id, sale_month;
-- Part III - Question 7:
-- Calculate monthly sales and month-over-month growth for each payment type in 2018.
WITH monthly_sales AS (
SELECT
p.payment_type,
DATE_TRUNC('month', o.order_purchase_timestamp) AS sale_month,
SUM(oi.price) AS monthly_total
FROM amazon_brazil.orders o
JOIN amazon_brazil.order_items oi
ON o.order_id = oi.order_id
JOIN amazon_brazil.payments p
ON o.order_id = p.order_id
WHERE EXTRACT(YEAR FROM o.order_purchase_timestamp) = 2018
GROUP BY p.payment_type, sale_month
)
SELECT
payment_type,
sale_month,
ROUND(monthly_total, 2) AS monthly_total,
ROUND(
(monthly_total - LAG(monthly_total) OVER (
PARTITION BY payment_type
ORDER BY sale_month
))
* 100.0
/ LAG(monthly_total) OVER (
PARTITION BY payment_type
ORDER BY sale_month
),
2
) AS monthly_change
FROM monthly_sales
ORDER BY payment_type, sale_month;