-
Notifications
You must be signed in to change notification settings - Fork 6
feat: add refund models and order fact updates (gkalapov submission) #15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| version: 2 | ||
|
|
||
| models: | ||
| - name: daily_revenue | ||
| description: Daily revenue rollup from order_fact. | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| {{ config(severity='error') }} | ||
|
|
||
| -- Reconciles daily_revenue rollup against summed raw line item revenue. | ||
| -- Returns rows when daily totals by order_date do not match. | ||
|
|
||
| with actual as ( | ||
| select | ||
| order_date, | ||
| daily_revenue, | ||
| orders | ||
| from {{ ref('daily_revenue') }} | ||
| ), | ||
|
|
||
| expected as ( | ||
| select | ||
| cast(o.ordered_at as date) as order_date, | ||
| sum(li.quantity * li.unit_price_in_cents) / 100.0 as expected_daily_revenue, | ||
| count(distinct o.order_id) as expected_orders | ||
| from {{ source('raw', 'line_items') }} as li | ||
| inner join {{ source('raw', 'orders') }} as o | ||
| on li.order_id = o.order_id | ||
| where coalesce(lower(cast(o.is_test as varchar)), 'false') != 'true' | ||
| group by 1 | ||
| ) | ||
|
|
||
| select | ||
| a.order_date, | ||
| a.daily_revenue as fact_daily_revenue, | ||
| e.expected_daily_revenue, | ||
| a.orders as fact_orders, | ||
| e.expected_orders, | ||
| e.expected_daily_revenue - a.daily_revenue as revenue_discrepancy | ||
| from actual as a | ||
| inner join expected as e | ||
| on a.order_date = e.order_date | ||
| where | ||
| abs(e.expected_daily_revenue - a.daily_revenue) > 1 | ||
| OR e.expected_orders != a.orders | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,26 +1,41 @@ | ||
| {{ config(severity='warn') }} | ||
| {{ config(severity='error') }} | ||
|
|
||
| -- Reconciles total order_fact.revenue against summed line items for non-test orders. | ||
| -- Returns rows when the discrepancy exceeds $1 — that is, when something is broken. | ||
| -- Reconciles order_fact totals against summed raw line items for non-test orders. | ||
| -- Returns rows when anything is broken (revenue/quantity/line_count). | ||
|
|
||
| with fact_total as ( | ||
| select sum(revenue) as total_revenue | ||
| select | ||
| sum(revenue) as total_revenue | ||
| , sum(total_quantity) as total_quantity | ||
| , sum(line_count) as total_line_count | ||
| from {{ ref('order_fact') }} | ||
| where coalesce(lower(cast(is_test as varchar)), 'false') != 'true' | ||
| ) | ||
|
|
||
| , line_total as ( | ||
| select sum(li.quantity * li.unit_price) as expected_revenue | ||
| select | ||
| sum(li.quantity * li.unit_price) as expected_revenue | ||
| , sum(li.quantity) as expected_quantity | ||
| , count(distinct li.line_item_id) as expected_line_count | ||
| from {{ ref('stg_line_items') }} as li | ||
| inner join {{ ref('stg_orders') }} as o | ||
| on li.order_id = o.order_id | ||
| where coalesce(lower(cast(o.is_test as varchar)), 'false') != 'true' | ||
| ) | ||
|
|
||
| select | ||
| f.total_revenue | ||
| , l.expected_revenue | ||
| , l.expected_revenue - f.total_revenue as discrepancy | ||
| f.total_revenue as fact_revenue | ||
| , l.expected_revenue as expected_revenue | ||
| , l.expected_revenue - f.total_revenue as revenue_discrepancy | ||
| , f.total_quantity as fact_quantity | ||
| , l.expected_quantity as expected_quantity | ||
| , l.expected_quantity - f.total_quantity as quantity_discrepancy | ||
| , f.total_line_count as fact_line_count | ||
| , l.expected_line_count as expected_line_count | ||
| , l.expected_line_count - f.total_line_count as line_count_discrepancy | ||
| from fact_total as f | ||
| cross join line_total as l | ||
| where abs(l.expected_revenue - f.total_revenue) > 1 | ||
| where | ||
| abs(l.expected_revenue - f.total_revenue) > 1 | ||
| OR (l.expected_quantity - f.total_quantity) != 0 | ||
| OR (l.expected_line_count - f.total_line_count) != 0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| {{ config(severity='error') }} | ||
|
|
||
| -- Reconciles order_line_fact.line_revenue against summed raw line items for non-test orders. | ||
| -- Returns rows when totals do not match. | ||
|
|
||
| with fact_total as ( | ||
| select | ||
| sum(line_revenue) as total_line_revenue, | ||
| count(distinct line_item_id) as line_item_count | ||
| from {{ ref('order_line_fact') }} as f | ||
| inner join {{ source('raw', 'orders') }} as o | ||
| on f.order_id = o.order_id | ||
| where coalesce(lower(cast(o.is_test as varchar)), 'false') != 'true' | ||
| ), | ||
|
Comment on lines
+6
to
+14
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Confirm order_line_fact is implemented (non-empty).
fd -i 'order_line_fact' -e sql --exec sh -c 'echo "== {} =="; cat "{}"'Repository: helloextend/poc-data-engineering-coding-challenge Length of output: 2071 🏁 Script executed: #!/bin/bash
set -euo pipefail
# 1) List tests that reference order_fact
echo "== Tests referencing ref('order_fact') =="
rg -n "ref\('order_fact'\)" tests || true
# 2) Find order_fact reconciliation test candidates
echo
echo "== Candidate reconciliation tests =="
rg -n "revenue.*reconciliation|order_fact" tests || true
# 3) Inspect the order_fact test file(s) around the join to orders
# (only print small relevant slices)
files=$(rg -l "ref\('order_fact'\)" tests || true)
if [ -n "${files:-}" ]; then
echo
echo "== Relevant snippets in each order_fact test =="
for f in $files; do
echo
echo "---- $f ----"
# show any lines that mention orders source/staging
rg -n "stg_orders|source\('raw', 'orders'\)|source\(\"raw\", \"orders\"\)|ref\('stg_orders'\)" "$f" || true
# show a few lines around any such matches
rg -n -C 3 "stg_orders|source\('raw', 'orders'\)" "$f" || true
done
fi
# 4) Sanity-check order_line_fact.sql implementation markers (non-empty model)
echo
echo "== order_line_fact.sql size / key contents =="
wc -l models/orders/dw/order_line_fact.sql
sed -n '1,40p' models/orders/dw/order_line_fact.sqlRepository: helloextend/poc-data-engineering-coding-challenge Length of output: 1504 🏁 Script executed: #!/bin/bash
set -euo pipefail
echo "== tests/order_fact_revenue_reconciliation.sql =="
sed -n '1,120p' tests/order_fact_revenue_reconciliation.sql
echo
echo "== usages in tests/order_fact_revenue_reconciliation.sql =="
rg -n "ref\('stg_orders'\)|source\('raw', 'orders'\)|ref\('order_fact'\)|source\('raw', 'line_items'\)|ref\('stg_line_items'\)" tests/order_fact_revenue_reconciliation.sqlRepository: helloextend/poc-data-engineering-coding-challenge Length of output: 1887 Fix test layer consistency in order_line_fact revenue reconciliation
🤖 Prompt for AI Agents |
||
|
|
||
| expected_total as ( | ||
| select | ||
| sum(li.quantity * li.unit_price_in_cents) / 100.0 as expected_line_revenue, | ||
| count(distinct li.line_item_id) as expected_line_item_count | ||
| from {{ source('raw', 'line_items') }} as li | ||
| inner join {{ source('raw', 'orders') }} as o | ||
| on li.order_id = o.order_id | ||
| where coalesce(lower(cast(o.is_test as varchar)), 'false') != 'true' | ||
| ) | ||
|
|
||
| select | ||
| f.total_line_revenue as fact_line_revenue, | ||
| e.expected_line_revenue, | ||
| e.expected_line_revenue - f.total_line_revenue as revenue_discrepancy, | ||
| f.line_item_count as fact_line_item_count, | ||
| e.expected_line_item_count, | ||
| e.expected_line_item_count - f.line_item_count as line_item_count_discrepancy | ||
| from fact_total as f | ||
| cross join expected_total as e | ||
| where | ||
| abs(e.expected_line_revenue - f.total_line_revenue) > 1 | ||
| OR (e.expected_line_item_count - f.line_item_count) != 0 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
inner joinlets one-sided dates escape reconciliation.If an
order_dateexists indaily_revenuebut not inexpected(or vice versa), the inner join drops it and the discrepancy is never flagged—precisely the divergence this test should catch. Use a full outer join keyed on a coalesced date.🔧 Proposed fix
📝 Committable suggestion
🤖 Prompt for AI Agents