-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.ts
More file actions
105 lines (95 loc) · 3.52 KB
/
Copy pathindex.ts
File metadata and controls
105 lines (95 loc) · 3.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/**
* Customer feedback loop — multi-rater approve/reject corpus → decision packet.
*
* Run with: pnpm tsx examples/customer-feedback-loop/index.ts
*
* Synthesises a 30-claim research corpus reviewed by 3 raters with realistic
* agreement noise. Pipes through fromFeedbackTable() + analyzeRuns(), then
* prints the decision packet — focus on the inter-rater agreement section
* and the top disagreement triage list.
*/
import {
analyzeRuns,
fromFeedbackTable,
type FeedbackTableRow,
} from '../../src/contract'
const N_CLAIMS = 30
const RATERS = ['alice', 'bob', 'carol']
// Synthesise a corpus where raters mostly agree but split on ~15% of claims.
function synthesise(): FeedbackTableRow[] {
const rows: FeedbackTableRow[] = []
for (let i = 0; i < N_CLAIMS; i++) {
const runId = `claim-${i + 1}`
// Ground-truth quality: 70% are clearly good, 15% borderline (disagreement),
// 15% clearly bad.
const tier = i % 7 === 0 ? 'borderline' : i % 6 === 0 ? 'bad' : 'good'
for (const rater of RATERS) {
let approve: boolean
if (tier === 'good') {
approve = pseudoRand(runId + rater) > 0.1 // 90% approve
} else if (tier === 'bad') {
approve = pseudoRand(runId + rater) > 0.85 // 15% approve
} else {
// Borderline — rater-specific bias: alice = pickier, carol = lenient.
const bias = rater === 'alice' ? 0.7 : rater === 'carol' ? 0.3 : 0.5
approve = pseudoRand(runId + rater) > bias
}
rows.push({ runId, rater, rating: approve })
}
}
return rows
}
function pseudoRand(s: string): number {
let h = 2166136261 >>> 0
for (let i = 0; i < s.length; i++) {
h ^= s.charCodeAt(i)
h = Math.imul(h, 16777619) >>> 0
}
return (h >>> 0) / 0xffffffff
}
async function main() {
const rows = synthesise()
const { runs, raterScores } = fromFeedbackTable({ ratings: rows })
const report = await analyzeRuns({ runs, raterScores })
console.log('═══ Customer feedback corpus — decision packet ═══')
console.log()
console.log(`Runs analyzed: ${report.n}`)
console.log(
`Composite mean: ${report.composite.mean.toFixed(3)} ` +
`(p50: ${report.composite.p50.toFixed(3)}, p95: ${report.composite.p95.toFixed(3)})`,
)
const approveRate = (report.composite.mean * 100).toFixed(0)
console.log(`Approve rate: ~${approveRate}%`)
console.log()
if (report.interRater) {
const ir = report.interRater
console.log('── Inter-rater agreement ──')
console.log(`Raters: ${ir.raters} (${RATERS.join(', ')})`)
console.log(`Jointly rated runs: ${ir.jointlyRated}`)
console.log('Pairwise pearson κ:')
for (const [pair, k] of Object.entries(ir.perPair)) {
console.log(` ${pair.padEnd(14)} ${k.toFixed(2)}`)
}
console.log(`Mean κ: ${ir.kappa.toFixed(2)}`)
console.log()
console.log('── Top 5 disagreement cases (worth a triage meeting) ──')
for (const c of ir.disagreementCases.slice(0, 5)) {
const ratingStr = c.ratings
.map((r) => `${r.rater}=${r.score.toFixed(0)}`)
.join(', ')
console.log(` ${c.runId.padEnd(10)} range=${c.range.toFixed(2)} ratings: ${ratingStr}`)
}
console.log()
}
console.log('── Recommendations ──')
for (const r of report.recommendations) {
console.log(`[${r.priority}] ${r.kind} — ${r.title}`)
console.log(` ${r.detail}`)
}
console.log()
console.log('═══ end ═══')
}
main().catch((err) => {
console.error(err)
process.exit(1)
})