diff --git a/models/orders/dw/order_fact.sql b/models/orders/dw/order_fact.sql index ecd392f..a03a0bc 100644 --- a/models/orders/dw/order_fact.sql +++ b/models/orders/dw/order_fact.sql @@ -3,108 +3,66 @@ unique_key='order_id' ) }} -WITH shipment_lines AS ( - SELECT - sl.shipment_id - , sl.line_item_id - , sl.quantity_shipped - , li.unit_price - FROM {{ ref('stg_shipment_line_items') }} AS sl - INNER JOIN {{ ref('stg_line_items') }} AS li - ON sl.line_item_id = li.line_item_id -) - -, joined AS ( +WITH order_base AS ( SELECT o.order_id , o.merchant_id + , m.merchant_name , o.customer_id + , m.customer_type , o.order_status , o.is_test , o.ordered_at , o.paid_at - , s.shipment_id - , s.shipped_at - , sl.line_item_id - , sl.quantity_shipped - , sl.unit_price FROM {{ ref('stg_orders') }} AS o - LEFT JOIN {{ ref('stg_shipments') }} AS s - ON o.order_id = s.order_id - LEFT JOIN shipment_lines AS sl - ON s.shipment_id = sl.shipment_id -) - -, shipment_totals AS ( - -- aggregated to one row per (order, shipment) - SELECT - order_id - , merchant_id - , customer_id - , order_status - , is_test - , ordered_at - , paid_at - , shipment_id - , shipped_at - , count(DISTINCT line_item_id) AS line_count - , sum(quantity_shipped) AS total_quantity - , sum(quantity_shipped * unit_price) AS shipment_revenue - FROM joined - GROUP BY order_id, merchant_id, customer_id, order_status, is_test, ordered_at, paid_at, shipment_id, shipped_at + LEFT JOIN {{ ref('lkp_merchants') }} AS m + ON o.merchant_id = m.merchant_id + {% if is_incremental() %} + -- In incremental mode, only append orders we haven't loaded yet. + WHERE o.order_id NOT IN (SELECT t.order_id FROM {{ this }} AS t) + {% endif %} ) -, shipment_counts AS ( +, order_lines AS ( + -- One row per order from ordered line items (not shipment allocations). SELECT - order_id - , count(DISTINCT shipment_id) AS shipment_count - FROM shipment_totals - GROUP BY order_id + li.order_id + , count(DISTINCT li.line_item_id) AS line_count + , sum(li.quantity) AS total_quantity + , sum(li.quantity * li.unit_price) AS revenue + FROM {{ ref('stg_line_items') }} AS li + GROUP BY li.order_id ) -, enriched AS ( +, shipment_aggs AS ( + -- One row per order with shipment metadata. SELECT - st.order_id - , st.merchant_id - , m.merchant_name - , st.customer_id - , m.customer_type - , st.order_status - , st.is_test - , st.ordered_at - , st.paid_at - , st.shipped_at - , sc.shipment_count - , st.line_count - , st.total_quantity - , st.shipment_revenue AS revenue - FROM shipment_totals AS st - LEFT JOIN {{ ref('lkp_merchants') }} AS m - ON st.merchant_id = m.merchant_id - LEFT JOIN shipment_counts AS sc - ON st.order_id = sc.order_id + s.order_id + , count(DISTINCT s.shipment_id) AS shipment_count + , min(s.shipped_at) AS shipped_at + FROM {{ ref('stg_shipments') }} AS s + GROUP BY s.order_id ) SELECT - order_id - , merchant_id - , merchant_name - , customer_id - , customer_type - , order_status - , is_test - , ordered_at - , paid_at - , shipped_at - , shipment_count - , line_count - , total_quantity - , revenue + ob.order_id + , ob.merchant_id + , ob.merchant_name + , ob.customer_id + , ob.customer_type + , ob.order_status + , ob.is_test + , ob.ordered_at + , ob.paid_at + , sa.shipped_at + , coalesce(sa.shipment_count, 0) AS shipment_count + , coalesce(ol.line_count, 0) AS line_count + , coalesce(ol.total_quantity, 0) AS total_quantity + , coalesce(ol.revenue, 0) AS revenue , current_timestamp AS created_at_dwh , current_timestamp AS updated_at_dwh -FROM enriched -{% if is_incremental() %} - WHERE ordered_at >= {{ get_incremental_value('updated_at_dwh') }} -{% endif %} --- dedupe to one row per order (orders can have multiple shipments) -QUALIFY row_number() OVER (PARTITION BY order_id ORDER BY shipped_at) = 1 +FROM order_base AS ob +LEFT JOIN order_lines AS ol + ON ob.order_id = ol.order_id +LEFT JOIN shipment_aggs AS sa + ON ob.order_id = sa.order_id diff --git a/models/orders/reporting/orders_reporting.yml b/models/orders/reporting/orders_reporting.yml new file mode 100644 index 0000000..ab410fb --- /dev/null +++ b/models/orders/reporting/orders_reporting.yml @@ -0,0 +1,6 @@ +version: 2 + +models: + - name: daily_revenue + description: Daily revenue rollup from order_fact. + diff --git a/tests/daily_revenue_reconciliation.sql b/tests/daily_revenue_reconciliation.sql new file mode 100644 index 0000000..0a95ec6 --- /dev/null +++ b/tests/daily_revenue_reconciliation.sql @@ -0,0 +1,39 @@ +{{ config(severity='error') }} + +-- Reconciles daily_revenue rollup against summed raw line item revenue. +-- Returns rows when daily totals by order_date do not match. + +with actual as ( + select + order_date, + daily_revenue, + orders + from {{ ref('daily_revenue') }} +), + +expected as ( + select + cast(o.ordered_at as date) as order_date, + sum(li.quantity * li.unit_price_in_cents) / 100.0 as expected_daily_revenue, + count(distinct o.order_id) as expected_orders + from {{ source('raw', 'line_items') }} as li + inner join {{ source('raw', 'orders') }} as o + on li.order_id = o.order_id + where coalesce(lower(cast(o.is_test as varchar)), 'false') != 'true' + group by 1 +) + +select + a.order_date, + a.daily_revenue as fact_daily_revenue, + e.expected_daily_revenue, + a.orders as fact_orders, + e.expected_orders, + e.expected_daily_revenue - a.daily_revenue as revenue_discrepancy +from actual as a +inner join expected as e + on a.order_date = e.order_date +where + abs(e.expected_daily_revenue - a.daily_revenue) > 1 + OR e.expected_orders != a.orders + diff --git a/tests/order_fact_revenue_reconciliation.sql b/tests/order_fact_revenue_reconciliation.sql index 5c6952d..b1a00f7 100644 --- a/tests/order_fact_revenue_reconciliation.sql +++ b/tests/order_fact_revenue_reconciliation.sql @@ -1,16 +1,22 @@ -{{ config(severity='warn') }} +{{ config(severity='error') }} --- Reconciles total order_fact.revenue against summed line items for non-test orders. --- Returns rows when the discrepancy exceeds $1 — that is, when something is broken. +-- Reconciles order_fact totals against summed raw line items for non-test orders. +-- Returns rows when anything is broken (revenue/quantity/line_count). with fact_total as ( - select sum(revenue) as total_revenue + select + sum(revenue) as total_revenue + , sum(total_quantity) as total_quantity + , sum(line_count) as total_line_count from {{ ref('order_fact') }} where coalesce(lower(cast(is_test as varchar)), 'false') != 'true' ) , line_total as ( - select sum(li.quantity * li.unit_price) as expected_revenue + select + sum(li.quantity * li.unit_price) as expected_revenue + , sum(li.quantity) as expected_quantity + , count(distinct li.line_item_id) as expected_line_count from {{ ref('stg_line_items') }} as li inner join {{ ref('stg_orders') }} as o on li.order_id = o.order_id @@ -18,9 +24,18 @@ with fact_total as ( ) select - f.total_revenue - , l.expected_revenue - , l.expected_revenue - f.total_revenue as discrepancy + f.total_revenue as fact_revenue + , l.expected_revenue as expected_revenue + , l.expected_revenue - f.total_revenue as revenue_discrepancy + , f.total_quantity as fact_quantity + , l.expected_quantity as expected_quantity + , l.expected_quantity - f.total_quantity as quantity_discrepancy + , f.total_line_count as fact_line_count + , l.expected_line_count as expected_line_count + , l.expected_line_count - f.total_line_count as line_count_discrepancy from fact_total as f cross join line_total as l -where abs(l.expected_revenue - f.total_revenue) > 1 +where + abs(l.expected_revenue - f.total_revenue) > 1 + OR (l.expected_quantity - f.total_quantity) != 0 + OR (l.expected_line_count - f.total_line_count) != 0 diff --git a/tests/order_line_fact_revenue_reconciliation.sql b/tests/order_line_fact_revenue_reconciliation.sql new file mode 100644 index 0000000..9a1f132 --- /dev/null +++ b/tests/order_line_fact_revenue_reconciliation.sql @@ -0,0 +1,38 @@ +{{ config(severity='error') }} + +-- Reconciles order_line_fact.line_revenue against summed raw line items for non-test orders. +-- Returns rows when totals do not match. + +with fact_total as ( + select + sum(line_revenue) as total_line_revenue, + count(distinct line_item_id) as line_item_count + from {{ ref('order_line_fact') }} as f + inner join {{ source('raw', 'orders') }} as o + on f.order_id = o.order_id + where coalesce(lower(cast(o.is_test as varchar)), 'false') != 'true' +), + +expected_total as ( + select + sum(li.quantity * li.unit_price_in_cents) / 100.0 as expected_line_revenue, + count(distinct li.line_item_id) as expected_line_item_count + from {{ source('raw', 'line_items') }} as li + inner join {{ source('raw', 'orders') }} as o + on li.order_id = o.order_id + where coalesce(lower(cast(o.is_test as varchar)), 'false') != 'true' +) + +select + f.total_line_revenue as fact_line_revenue, + e.expected_line_revenue, + e.expected_line_revenue - f.total_line_revenue as revenue_discrepancy, + f.line_item_count as fact_line_item_count, + e.expected_line_item_count, + e.expected_line_item_count - f.line_item_count as line_item_count_discrepancy +from fact_total as f +cross join expected_total as e +where + abs(e.expected_line_revenue - f.total_line_revenue) > 1 + OR (e.expected_line_item_count - f.line_item_count) != 0 +