-
Notifications
You must be signed in to change notification settings - Fork 15
Expand file tree
/
Copy pathcreate_dimensional_objects.sql
More file actions
236 lines (212 loc) · 6.55 KB
/
create_dimensional_objects.sql
File metadata and controls
236 lines (212 loc) · 6.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
CREATE SCHEMA STG AUTHORIZATION dbo;
--Create Parquet file format
CREATE EXTERNAL FILE FORMAT SynapseParquetFormat
WITH (
FORMAT_TYPE = PARQUET
);
--Customer
CREATE EXTERNAL TABLE STG.DimCustomer
WITH
(
LOCATION = 'conformed/dimensions/dimcustomer/01',
DATA_SOURCE = ExternalDataSourceDataLake,
FILE_FORMAT = SynapseParquetFormat
)
AS
SELECT CAST(ROW_NUMBER() OVER(ORDER BY C.CustomerID) AS INT) AS CustomerKey,
CAST(C.CustomerID AS INT) AS CustomerID,
C.CustomerName,
CC.CustomerCategoryName,
BG.BuyingGroupName,
DM.DeliveryMethodName,
DC.CityName AS DeliveryCityName,
DSP.StateProvinceName AS DeliveryStateProvinceName,
DSP.SalesTerritory AS DeliverySalesTerritory,
DCO.Country AS DeliveryCountry,
DCO.Continent AS DeliveryContinent,
DCO.Region AS DeliveryRegion,
DCO.Subregion AS DeliverySubregion,
CAST('2013-01-01' AS DATE) AS ValidFromDate
FROM LDW.vwCustomers C
LEFT JOIN LDW.vwCustomerCategories CC On CC.CustomerCategoryID = C.CustomerCategoryID
LEFT JOIN LDW.vwCities DC ON DC.CityID = C.DeliveryCityID
LEFT JOIN LDW.vwStateProvinces DSP ON DSP.StateProvinceID = DC.StateProvinceID
LEFT JOIN LDW.vwCountries DCO ON DCO.CountryID = DSP.CountryID
LEFT JOIN LDW.vwBuyingGroups BG ON BG.BuyingGroupID = C.BuyingGroupID
LEFT JOIN LDW.vwDeliveryMethods DM ON DM.DeliveryMethodID = C.DeliveryMethodID
ORDER BY C.CustomerID
--StockItem
CREATE EXTERNAL TABLE STG.DimStockItem
WITH
(
LOCATION = 'conformed/dimensions/dimstockitem/01',
DATA_SOURCE = ExternalDataSourceDataLake,
FILE_FORMAT = SynapseParquetFormat
)
AS
SELECT CAST(ROW_NUMBER() OVER(ORDER BY SI.StockItemID) AS SMALLINT) AS StockItemKey,
CAST(SI.StockItemID AS SMALLINT) AS StockItemID,
SI.StockItemName,
SI.LeadTimeDays,
C.ColorName,
OP.PackageTypeName AS OuterPackageTypeName,
CAST('2013-01-01' AS DATE) AS ValidFromDate
FROM LDW.vwStockItems SI
LEFT JOIN LDW.vwColors C ON C.ColorID = SI.ColorID
LEFT JOIN LDW.vwPackageTypes OP ON OP.PackageTypeID = SI.OuterPackageID
ORDER BY SI.StockItemID
--Supplier
CREATE EXTERNAL TABLE STG.DimSupplier
WITH
(
LOCATION = 'conformed/dimensions/dimsupplier/01',
DATA_SOURCE = ExternalDataSourceDataLake,
FILE_FORMAT = SynapseParquetFormat
)
AS
SELECT CAST(ROW_NUMBER() OVER(ORDER BY S.SupplierID) AS TINYINT) AS SupplierKey,
CAST(S.SupplierID AS TINYINT) AS SupplierID,
S.SupplierName,
SC.SupplierCategoryName,
CAST('2013-01-01' AS DATE) AS ValidFromDate
FROM LDW.vwSuppliers S
LEFT JOIN LDW.vwSupplierCategories SC ON SC.SupplierCategoryID = S.SupplierCategoryID
ORDER BY S.SupplierID;
--Date
CREATE EXTERNAL TABLE STG.DimDate
WITH
(
LOCATION = 'conformed/dimensions/dimdate',
DATA_SOURCE = ExternalDataSourceDataLake,
FILE_FORMAT = SynapseParquetFormat
)
AS
SELECT CAST(DateKey AS INT) AS DateKey,
CAST(Date AS DATE) AS Date,
CAST(Day AS TINYINT) AS Day,
CAST(WeekDay AS TINYINT) AS WeekDay,
WeekDayName,
CAST(Month AS TINYINT) AS Month,
MonthName,
CAST(Quarter AS TINYINT) AS Quarter,
CAST(Year AS SMALLINT) AS Year
FROM
OPENROWSET
(
BULK 'sourcedatadim/datedim/*.csv',
DATA_SOURCE = 'ExternalDataSourceDataLake',
FORMAT = 'CSV',
PARSER_VERSION = '2.0',
HEADER_ROW = TRUE,
FIELDTERMINATOR ='|'
) AS fct
--Customer
CREATE VIEW LDW.vwDimCustomer
AS
SELECT * FROM
OPENROWSET
(
BULK 'conformed/dimensions/dimcustomer/*/',
DATA_SOURCE = 'ExternalDataSourceDataLake',
FORMAT = 'Parquet'
) AS fct
--StockItem
CREATE VIEW LDW.vwDimStockItem
AS
SELECT * FROM
OPENROWSET
(
BULK 'conformed/dimensions/dimstockitem/*/',
DATA_SOURCE = 'ExternalDataSourceDataLake',
FORMAT = 'Parquet'
) AS fct
--Supplier
CREATE VIEW LDW.vwDimSupplier
AS
SELECT * FROM
OPENROWSET
(
BULK 'conformed/dimensions/dimsupplier/*/',
DATA_SOURCE = 'ExternalDataSourceDataLake',
FORMAT = 'Parquet'
) AS fct
--Date
CREATE VIEW LDW.vwDimDate
AS
SELECT * FROM
OPENROWSET
(
BULK 'conformed/dimensions/dimdate',
DATA_SOURCE = 'ExternalDataSourceDataLake',
FORMAT = 'Parquet'
) AS fct
CREATE EXTERNAL TABLE STG.FactSales
WITH
(
LOCATION = 'conformed/facts/factsales/initial',
DATA_SOURCE = ExternalDataSourceDataLake,
FILE_FORMAT = SynapseParquetFormat
)
AS
SELECT
--Surrogate Keys
DC.CustomerKey,
CAST(FORMAT(SO.OrderDate,'yyyyMMdd') AS INT) as OrderDateKey,
DSI.StockItemKey,
DS.SupplierKey,
--Degenerate Dimensions
CAST(SO.OrderID AS INT) AS OrderID,
CAST(SOL.OrderLineID AS INT) AS OrderLineID,
--Measure
CAST(SOL.Quantity AS INT) AS SalesOrderQuantity,
CAST(SOL.UnitPrice AS DECIMAL(18,2)) AS SalesOrderUnitPrice
FROM LDW.vwSalesOrdersLines SOL
INNER JOIN LDW.vwSalesOrders SO ON SOL.OrderID = SO.OrderID
LEFT JOIN LDW.vwDimCustomer DC ON DC.CustomerID = SO.CustomerID
LEFT JOIN LDW.vwDimStockItem DSI ON DSI.StockItemID = SOL.StockItemID
LEFT JOIN LDW.vwStockItems SI ON SI.StockItemID = DSI.StockItemID
LEFT JOIN LDW.vwDimSupplier DS ON DS.SupplierID = SI.SupplierID;
CREATE VIEW LDW.vwFactSales
AS
SELECT * FROM
OPENROWSET
(
BULK 'conformed/facts/factsales/initial',
DATA_SOURCE = 'ExternalDataSourceDataLake',
FORMAT = 'Parquet'
) AS fct
--Group Sales by Date
SELECT DD.[Year] AS SalesYear,
DD.[Month] AS SalesMonth,
SUM(FS.Quantity) AS SalesOrderQuantity,
SUM(FS.UnitPrice) AS SalesOrderUnitPrice,
COUNT(DISTINCT FS.OrderID) AS SalesOrderTotal
FROM LDW.vwFactSales FS
INNER JOIN LDW.vwDimDate DD ON DD.DateKey = FS.OrderDateKey
GROUP BY DD.[Year],
DD.[Month]
ORDER BY DD.[Year],
DD.[Month];
--Group Sales by Customer
SELECT DC.DeliverySalesTerritory,
SUM(FS.Quantity) AS SalesOrderQuantity,
SUM(FS.UnitPrice) AS SalesOrderUnitPrice,
COUNT(DISTINCT OrderID) AS SalesOrderTotal
FROM LDW.vwFactSales FS
INNER JOIN LDW.vwDimCustomer DC ON DC.CustomerKey = FS.CustomerKey
GROUP BY DC.DeliverySalesTerritory
ORDER BY SUM(FS.Quantity) DESC;
--Group Sales by Supplier
--Note that multiple Suppliers can be linked to a single Sales Order
SELECT DS.SupplierName,
SUM(FS.Quantity) AS SalesOrderQuantity,
SUM(FS.UnitPrice) AS SalesOrderUnitPrice
FROM LDW.vwFactSales FS
INNER JOIN LDW.vwDimSupplier DS ON DS.SupplierKey = FS.SupplierKey
GROUP BY DS.SupplierName
ORDER BY SUM(FS.Quantity) DESC;
EXEC sp_describe_first_result_set N'SELECT * FROM LDW.vwDimDate';
EXEC sp_describe_first_result_set N'SELECT * FROM LDW.vwDimSupplier';
EXEC sp_describe_first_result_set N'SELECT * FROM LDW.vwDimStockItem';
EXEC sp_describe_first_result_set N'SELECT * FROM LDW.vwDimCustomer';
EXEC sp_describe_first_result_set N'SELECT * FROM LDW.vwFactSales';