Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 43 additions & 85 deletions models/orders/dw/order_fact.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,108 +3,66 @@
unique_key='order_id'
) }}

WITH shipment_lines AS (
SELECT
sl.shipment_id
, sl.line_item_id
, sl.quantity_shipped
, li.unit_price
FROM {{ ref('stg_shipment_line_items') }} AS sl
INNER JOIN {{ ref('stg_line_items') }} AS li
ON sl.line_item_id = li.line_item_id
)

, joined AS (
WITH order_base AS (
SELECT
o.order_id
, o.merchant_id
, m.merchant_name
, o.customer_id
, m.customer_type
, o.order_status
, o.is_test
, o.ordered_at
, o.paid_at
, s.shipment_id
, s.shipped_at
, sl.line_item_id
, sl.quantity_shipped
, sl.unit_price
FROM {{ ref('stg_orders') }} AS o
LEFT JOIN {{ ref('stg_shipments') }} AS s
ON o.order_id = s.order_id
LEFT JOIN shipment_lines AS sl
ON s.shipment_id = sl.shipment_id
)

, shipment_totals AS (
-- aggregated to one row per (order, shipment)
SELECT
order_id
, merchant_id
, customer_id
, order_status
, is_test
, ordered_at
, paid_at
, shipment_id
, shipped_at
, count(DISTINCT line_item_id) AS line_count
, sum(quantity_shipped) AS total_quantity
, sum(quantity_shipped * unit_price) AS shipment_revenue
FROM joined
GROUP BY order_id, merchant_id, customer_id, order_status, is_test, ordered_at, paid_at, shipment_id, shipped_at
LEFT JOIN {{ ref('lkp_merchants') }} AS m
ON o.merchant_id = m.merchant_id
{% if is_incremental() %}
-- In incremental mode, only append orders we haven't loaded yet.
WHERE o.order_id NOT IN (SELECT t.order_id FROM {{ this }} AS t)
{% endif %}
)

, shipment_counts AS (
, order_lines AS (
-- One row per order from ordered line items (not shipment allocations).
SELECT
order_id
, count(DISTINCT shipment_id) AS shipment_count
FROM shipment_totals
GROUP BY order_id
li.order_id
, count(DISTINCT li.line_item_id) AS line_count
, sum(li.quantity) AS total_quantity
, sum(li.quantity * li.unit_price) AS revenue
FROM {{ ref('stg_line_items') }} AS li
GROUP BY li.order_id
)

, enriched AS (
, shipment_aggs AS (
-- One row per order with shipment metadata.
SELECT
st.order_id
, st.merchant_id
, m.merchant_name
, st.customer_id
, m.customer_type
, st.order_status
, st.is_test
, st.ordered_at
, st.paid_at
, st.shipped_at
, sc.shipment_count
, st.line_count
, st.total_quantity
, st.shipment_revenue AS revenue
FROM shipment_totals AS st
LEFT JOIN {{ ref('lkp_merchants') }} AS m
ON st.merchant_id = m.merchant_id
LEFT JOIN shipment_counts AS sc
ON st.order_id = sc.order_id
s.order_id
, count(DISTINCT s.shipment_id) AS shipment_count
, min(s.shipped_at) AS shipped_at
FROM {{ ref('stg_shipments') }} AS s
GROUP BY s.order_id
)

SELECT
order_id
, merchant_id
, merchant_name
, customer_id
, customer_type
, order_status
, is_test
, ordered_at
, paid_at
, shipped_at
, shipment_count
, line_count
, total_quantity
, revenue
ob.order_id
, ob.merchant_id
, ob.merchant_name
, ob.customer_id
, ob.customer_type
, ob.order_status
, ob.is_test
, ob.ordered_at
, ob.paid_at
, sa.shipped_at
, coalesce(sa.shipment_count, 0) AS shipment_count
, coalesce(ol.line_count, 0) AS line_count
, coalesce(ol.total_quantity, 0) AS total_quantity
, coalesce(ol.revenue, 0) AS revenue
, current_timestamp AS created_at_dwh
, current_timestamp AS updated_at_dwh
FROM enriched
{% if is_incremental() %}
WHERE ordered_at >= {{ get_incremental_value('updated_at_dwh') }}
{% endif %}
-- dedupe to one row per order (orders can have multiple shipments)
QUALIFY row_number() OVER (PARTITION BY order_id ORDER BY shipped_at) = 1
FROM order_base AS ob
LEFT JOIN order_lines AS ol
ON ob.order_id = ol.order_id
LEFT JOIN shipment_aggs AS sa
ON ob.order_id = sa.order_id
6 changes: 6 additions & 0 deletions models/orders/reporting/orders_reporting.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
version: 2

models:
- name: daily_revenue
description: Daily revenue rollup from order_fact.

39 changes: 39 additions & 0 deletions tests/daily_revenue_reconciliation.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{{ config(severity='error') }}

-- Reconciles daily_revenue rollup against summed raw line item revenue.
-- Returns rows when daily totals by order_date do not match.

with actual as (
select
order_date,
daily_revenue,
orders
from {{ ref('daily_revenue') }}
),

expected as (
select
cast(o.ordered_at as date) as order_date,
sum(li.quantity * li.unit_price_in_cents) / 100.0 as expected_daily_revenue,
count(distinct o.order_id) as expected_orders
from {{ source('raw', 'line_items') }} as li
inner join {{ source('raw', 'orders') }} as o
on li.order_id = o.order_id
where coalesce(lower(cast(o.is_test as varchar)), 'false') != 'true'
group by 1
)

select
a.order_date,
a.daily_revenue as fact_daily_revenue,
e.expected_daily_revenue,
a.orders as fact_orders,
e.expected_orders,
e.expected_daily_revenue - a.daily_revenue as revenue_discrepancy
from actual as a
inner join expected as e
on a.order_date = e.order_date
where
abs(e.expected_daily_revenue - a.daily_revenue) > 1
OR e.expected_orders != a.orders
Comment on lines +33 to +38

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

inner join lets one-sided dates escape reconciliation.

If an order_date exists in daily_revenue but not in expected (or vice versa), the inner join drops it and the discrepancy is never flagged—precisely the divergence this test should catch. Use a full outer join keyed on a coalesced date.

🔧 Proposed fix
+select
+    coalesce(a.order_date, e.order_date) as order_date,
+    a.daily_revenue as fact_daily_revenue,
+    e.expected_daily_revenue,
+    a.orders as fact_orders,
+    e.expected_orders,
+    coalesce(e.expected_daily_revenue, 0) - coalesce(a.daily_revenue, 0) as revenue_discrepancy
+from actual as a
+full outer join expected as e
+    on a.order_date = e.order_date
+where
+    abs(coalesce(e.expected_daily_revenue, 0) - coalesce(a.daily_revenue, 0)) > 1
+    OR coalesce(e.expected_orders, 0) != coalesce(a.orders, 0)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
from actual as a
inner join expected as e
on a.order_date = e.order_date
where
abs(e.expected_daily_revenue - a.daily_revenue) > 1
OR e.expected_orders != a.orders
select
coalesce(a.order_date, e.order_date) as order_date,
a.daily_revenue as fact_daily_revenue,
e.expected_daily_revenue,
a.orders as fact_orders,
e.expected_orders,
coalesce(e.expected_daily_revenue, 0) - coalesce(a.daily_revenue, 0) as revenue_discrepancy
from actual as a
full outer join expected as e
on a.order_date = e.order_date
where
abs(coalesce(e.expected_daily_revenue, 0) - coalesce(a.daily_revenue, 0)) > 1
OR coalesce(e.expected_orders, 0) != coalesce(a.orders, 0)
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@tests/daily_revenue_reconciliation.sql` around lines 33 - 38, The query uses
"inner join" between actual (alias a) and expected (alias e) so rows present
only on one side are dropped; change the join to a full outer join and key it on
COALESCE(a.order_date, e.order_date) so one-sided dates are preserved. Update
the WHERE clause to compare coalesced values (e.g.,
COALESCE(e.expected_daily_revenue,0) vs COALESCE(a.daily_revenue,0) for the
numeric tolerance check and COALESCE(e.expected_orders, NULL) vs
COALESCE(a.orders, NULL) or explicit IS DISTINCT FROM to catch null mismatches)
so discrepancies from missing rows are flagged.


33 changes: 24 additions & 9 deletions tests/order_fact_revenue_reconciliation.sql
Original file line number Diff line number Diff line change
@@ -1,26 +1,41 @@
{{ config(severity='warn') }}
{{ config(severity='error') }}

-- Reconciles total order_fact.revenue against summed line items for non-test orders.
-- Returns rows when the discrepancy exceeds $1 — that is, when something is broken.
-- Reconciles order_fact totals against summed raw line items for non-test orders.
-- Returns rows when anything is broken (revenue/quantity/line_count).

with fact_total as (
select sum(revenue) as total_revenue
select
sum(revenue) as total_revenue
, sum(total_quantity) as total_quantity
, sum(line_count) as total_line_count
from {{ ref('order_fact') }}
where coalesce(lower(cast(is_test as varchar)), 'false') != 'true'
)

, line_total as (
select sum(li.quantity * li.unit_price) as expected_revenue
select
sum(li.quantity * li.unit_price) as expected_revenue
, sum(li.quantity) as expected_quantity
, count(distinct li.line_item_id) as expected_line_count
from {{ ref('stg_line_items') }} as li
inner join {{ ref('stg_orders') }} as o
on li.order_id = o.order_id
where coalesce(lower(cast(o.is_test as varchar)), 'false') != 'true'
)

select
f.total_revenue
, l.expected_revenue
, l.expected_revenue - f.total_revenue as discrepancy
f.total_revenue as fact_revenue
, l.expected_revenue as expected_revenue
, l.expected_revenue - f.total_revenue as revenue_discrepancy
, f.total_quantity as fact_quantity
, l.expected_quantity as expected_quantity
, l.expected_quantity - f.total_quantity as quantity_discrepancy
, f.total_line_count as fact_line_count
, l.expected_line_count as expected_line_count
, l.expected_line_count - f.total_line_count as line_count_discrepancy
from fact_total as f
cross join line_total as l
where abs(l.expected_revenue - f.total_revenue) > 1
where
abs(l.expected_revenue - f.total_revenue) > 1
OR (l.expected_quantity - f.total_quantity) != 0
OR (l.expected_line_count - f.total_line_count) != 0
38 changes: 38 additions & 0 deletions tests/order_line_fact_revenue_reconciliation.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{{ config(severity='error') }}

-- Reconciles order_line_fact.line_revenue against summed raw line items for non-test orders.
-- Returns rows when totals do not match.

with fact_total as (
select
sum(line_revenue) as total_line_revenue,
count(distinct line_item_id) as line_item_count
from {{ ref('order_line_fact') }} as f
inner join {{ source('raw', 'orders') }} as o
on f.order_id = o.order_id
where coalesce(lower(cast(o.is_test as varchar)), 'false') != 'true'
),
Comment on lines +6 to +14

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Confirm order_line_fact is implemented (non-empty).
fd -i 'order_line_fact' -e sql --exec sh -c 'echo "== {} =="; cat "{}"'

Repository: helloextend/poc-data-engineering-coding-challenge

Length of output: 2071


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# 1) List tests that reference order_fact
echo "== Tests referencing ref('order_fact') =="
rg -n "ref\('order_fact'\)" tests || true

# 2) Find order_fact reconciliation test candidates
echo
echo "== Candidate reconciliation tests =="
rg -n "revenue.*reconciliation|order_fact" tests || true

# 3) Inspect the order_fact test file(s) around the join to orders
# (only print small relevant slices)
files=$(rg -l "ref\('order_fact'\)" tests || true)
if [ -n "${files:-}" ]; then
  echo
  echo "== Relevant snippets in each order_fact test =="
  for f in $files; do
    echo
    echo "---- $f ----"
    # show any lines that mention orders source/staging
    rg -n "stg_orders|source\('raw', 'orders'\)|source\(\"raw\", \"orders\"\)|ref\('stg_orders'\)" "$f" || true
    # show a few lines around any such matches
    rg -n -C 3 "stg_orders|source\('raw', 'orders'\)" "$f" || true
  done
fi

# 4) Sanity-check order_line_fact.sql implementation markers (non-empty model)
echo
echo "== order_line_fact.sql size / key contents =="
wc -l models/orders/dw/order_line_fact.sql
sed -n '1,40p' models/orders/dw/order_line_fact.sql

Repository: helloextend/poc-data-engineering-coding-challenge

Length of output: 1504


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== tests/order_fact_revenue_reconciliation.sql =="
sed -n '1,120p' tests/order_fact_revenue_reconciliation.sql

echo
echo "== usages in tests/order_fact_revenue_reconciliation.sql =="
rg -n "ref\('stg_orders'\)|source\('raw', 'orders'\)|ref\('order_fact'\)|source\('raw', 'line_items'\)|ref\('stg_line_items'\)" tests/order_fact_revenue_reconciliation.sql

Repository: helloextend/poc-data-engineering-coding-challenge

Length of output: 1887


Fix test layer consistency in order_line_fact revenue reconciliation

  • ref('order_line_fact') is implemented (models/orders/dw/order_line_fact.sql selects from {{ ref('stg_line_items') }}), so the test shouldn’t fail just because the model is an empty skeleton.
  • tests/order_line_fact_revenue_reconciliation.sql filters non-test orders via {{ source('raw', 'orders') }}, while the analogous tests/order_fact_revenue_reconciliation.sql uses {{ ref('stg_orders') }}. Reconcile against the staged layer for consistency (and to avoid masking staging transforms).
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@tests/order_line_fact_revenue_reconciliation.sql` around lines 6 - 14, The
test currently joins {{ ref('order_line_fact') }} to {{ source('raw','orders')
}} to filter out test orders; change that to join against the staged orders
model {{ ref('stg_orders') }} so the test reconciles the fact against the staged
layer (not raw), i.e., replace references to source('raw','orders') in
tests/order_line_fact_revenue_reconciliation.sql with ref('stg_orders') and keep
the same non-test filter expression (coalesce(lower(cast(o.is_test as varchar)),
'false') != 'true') so staging transforms are validated.


expected_total as (
select
sum(li.quantity * li.unit_price_in_cents) / 100.0 as expected_line_revenue,
count(distinct li.line_item_id) as expected_line_item_count
from {{ source('raw', 'line_items') }} as li
inner join {{ source('raw', 'orders') }} as o
on li.order_id = o.order_id
where coalesce(lower(cast(o.is_test as varchar)), 'false') != 'true'
)

select
f.total_line_revenue as fact_line_revenue,
e.expected_line_revenue,
e.expected_line_revenue - f.total_line_revenue as revenue_discrepancy,
f.line_item_count as fact_line_item_count,
e.expected_line_item_count,
e.expected_line_item_count - f.line_item_count as line_item_count_discrepancy
from fact_total as f
cross join expected_total as e
where
abs(e.expected_line_revenue - f.total_line_revenue) > 1
OR (e.expected_line_item_count - f.line_item_count) != 0