Skip to content

Commit 77685ac

Browse files
committed
more aggregation test cases to figure out what the AI can reliably deduplicate
1 parent 3bce027 commit 77685ac

12 files changed

Lines changed: 172 additions & 27 deletions

bun.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
},
7373
"dependencies": {
7474
"commander": "^14.0.2",
75+
"csv-parse": "^6.1.0",
7576
"exceljs": "^4.4.0",
7677
"html-entities": "^2.6.0",
7778
"i18n-iso-countries": "^7.14.0",

src/cli/aggregation.test.ts

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { describe, expect, it } from 'vitest'
22
import { createActivity as createTestActivity } from '../test-support'
33
import type { ClassifiedActivity } from '../types'
44
import {
5-
deduplicateActivities,
5+
aggregateActivities,
66
filterByMentionCount,
77
getFirstMentionedAt,
88
getLastMentionedAt,
@@ -29,9 +29,9 @@ function createActivity(
2929
}
3030

3131
describe('Aggregation Module', () => {
32-
describe('deduplicateActivities', () => {
32+
describe('aggregateActivities', () => {
3333
it('returns empty array for empty input', () => {
34-
const result = deduplicateActivities([])
34+
const result = aggregateActivities([])
3535
expect(result).toEqual([])
3636
})
3737

@@ -48,7 +48,7 @@ describe('Aggregation Module', () => {
4848
})
4949
]
5050

51-
const result = deduplicateActivities(activities)
51+
const result = aggregateActivities(activities)
5252

5353
expect(result).toHaveLength(1)
5454
const first = result[0]
@@ -82,7 +82,7 @@ describe('Aggregation Module', () => {
8282
})
8383
]
8484

85-
const result = deduplicateActivities(activities)
85+
const result = aggregateActivities(activities)
8686

8787
expect(result).toHaveLength(1)
8888
const first = result[0]
@@ -110,7 +110,7 @@ describe('Aggregation Module', () => {
110110
})
111111
]
112112

113-
const result = deduplicateActivities(activities)
113+
const result = aggregateActivities(activities)
114114

115115
expect(result).toHaveLength(1)
116116
const first = result[0]
@@ -138,7 +138,7 @@ describe('Aggregation Module', () => {
138138
})
139139
]
140140

141-
const result = deduplicateActivities(activities)
141+
const result = aggregateActivities(activities)
142142

143143
// Compound activities only match on exact title, not fields
144144
expect(result).toHaveLength(2)
@@ -169,7 +169,7 @@ describe('Aggregation Module', () => {
169169
})
170170
]
171171

172-
const result = deduplicateActivities(activities)
172+
const result = aggregateActivities(activities)
173173

174174
expect(result).toHaveLength(3)
175175
expect(result.every((r) => getMentionCount(r) === 1)).toBe(true)
@@ -195,7 +195,7 @@ describe('Aggregation Module', () => {
195195
})
196196
]
197197

198-
const result = deduplicateActivities(activities)
198+
const result = aggregateActivities(activities)
199199

200200
// "Kazuya" vs "Kazuya Restaurant" - not 95% similar, should NOT match
201201
expect(result).toHaveLength(2)
@@ -232,7 +232,7 @@ describe('Aggregation Module', () => {
232232
})
233233
]
234234

235-
const result = deduplicateActivities(activities)
235+
const result = aggregateActivities(activities)
236236

237237
expect(result).toHaveLength(1)
238238
const first = result[0]
@@ -280,7 +280,7 @@ describe('Aggregation Module', () => {
280280
})
281281
]
282282

283-
const result = deduplicateActivities(activities)
283+
const result = aggregateActivities(activities)
284284

285285
expect(result).toHaveLength(1)
286286
expect(result[0]?.messages).toHaveLength(3)
@@ -317,7 +317,7 @@ describe('Aggregation Module', () => {
317317
interestingScore: 0.5
318318
})
319319

320-
const result = deduplicateActivities([act1, act2, act3])
320+
const result = aggregateActivities([act1, act2, act3])
321321

322322
expect(result).toHaveLength(1)
323323
expect(result[0]?.funScore).toBe(0.7) // (0.8 + 0.6 + 0.7) / 3 = 0.7
@@ -351,7 +351,7 @@ describe('Aggregation Module', () => {
351351
})
352352
]
353353

354-
const result = deduplicateActivities(activities)
354+
const result = aggregateActivities(activities)
355355

356356
expect(result).toHaveLength(1)
357357
// First occurrence is the primary
@@ -403,7 +403,7 @@ describe('Aggregation Module', () => {
403403
createActivity({ id: 6, activity: 'Thrice Mentioned', action: 'explore', object: 'trail' })
404404
]
405405

406-
const deduped = deduplicateActivities(raw)
406+
const deduped = aggregateActivities(raw)
407407
const result = getMostWanted(deduped)
408408

409409
expect(result).toHaveLength(2)
@@ -421,7 +421,7 @@ describe('Aggregation Module', () => {
421421
createActivity({ id: 6, activity: 'activity c', action: 'try', object: 'thing-c' })
422422
]
423423

424-
const deduped = deduplicateActivities(raw)
424+
const deduped = aggregateActivities(raw)
425425
const result = getMostWanted(deduped, 2)
426426

427427
expect(result).toHaveLength(2)

src/cli/aggregation.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ function round(value: number, decimals: number): number {
134134
* @param activities Classified activities to deduplicate
135135
* @returns Deduplicated activities with merged messages
136136
*/
137-
export function deduplicateActivities(
137+
export function aggregateActivities(
138138
activities: readonly ClassifiedActivity[]
139139
): ClassifiedActivity[] {
140140
if (activities.length === 0) return []

src/cli/e2e/02-scan.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ describe('scan command', () => {
5050
})
5151

5252
it('finds specific suggestions in output', () => {
53-
const { stdout } = runCli(`scan ${FIXTURE_INPUT} --cache-dir ${testState.tempCacheDir}`)
53+
const { stdout } = runCli(`scan ${FIXTURE_INPUT} -n 20 --cache-dir ${testState.tempCacheDir}`)
5454

5555
expect(stdout).toContain('Karangahake Gorge')
5656
expect(stdout).toContain('Prinzhorn collection')

src/cli/e2e/07-classify.test.ts

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ describe('classify command', () => {
5656
expect(hotAirBalloon).toBeDefined()
5757
expect(hotAirBalloon?.category).toBeOneOf(['experiences', 'travel'])
5858
expect(hotAirBalloon?.messages[0]?.sender).toBe('Alice Smith')
59-
expect(hotAirBalloon?.funScore).toBe(0.9)
60-
expect(hotAirBalloon?.interestingScore).toBe(0.9)
59+
expect(hotAirBalloon?.funScore).toBeGreaterThanOrEqual(0.8)
60+
expect(hotAirBalloon?.interestingScore).toBeGreaterThanOrEqual(0.8)
6161
expect(hotAirBalloon?.country).toBe('Turkey')
6262
expect(hotAirBalloon?.messages[0]?.message).toMatch(/hot air ballon/i)
6363

@@ -93,12 +93,82 @@ describe('classify command', () => {
9393
expect(yellowstone?.messages[0]?.sender).toBe('John Smith')
9494
expect(yellowstone?.venue).toMatch(/Yellowstone/i)
9595

96-
// Check Karangahake Gorge
96+
// Check Karangahake Gorge - should be aggregated from 2 mentions
9797
const karangahake = activities.find((a) => a.activity.includes('Karangahake'))
9898
expect(karangahake).toBeDefined()
9999
expect(karangahake?.category).toBe('nature')
100100
expect(karangahake?.messages[0]?.sender).toBe('John Smith')
101101
expect(karangahake?.country).toBe('New Zealand')
102+
// Should have 2 messages from aggregation
103+
expect(karangahake?.messages.length).toBe(2)
104+
})
105+
106+
it('aggregates duplicate activities by merging messages', () => {
107+
const activities = readCacheJson<ClassifiedActivity[]>(
108+
testState.tempCacheDir,
109+
'classifications.json'
110+
)
111+
112+
// Karangahake Gorge is mentioned twice in the chat - should be aggregated
113+
const karangahake = activities.find((a) => a.activity.toLowerCase().includes('karangahake'))
114+
expect(karangahake).toBeDefined()
115+
expect(karangahake?.messages.length).toBe(2)
116+
117+
// Check both messages are preserved with correct senders
118+
const senders = karangahake?.messages.map((m) => m.sender) ?? []
119+
expect(senders).toContain('John Smith')
120+
121+
// Check date range spans both mentions (Oct 11 and Nov 15)
122+
const dates = karangahake?.messages.map((m) => new Date(m.timestamp)) ?? []
123+
const sortedDates = dates.sort((a, b) => a.getTime() - b.getTime())
124+
if (sortedDates.length >= 2) {
125+
const firstDate = sortedDates[0]
126+
const lastDate = sortedDates[sortedDates.length - 1]
127+
if (firstDate && lastDate) {
128+
// First mention is Oct 11, second is Nov 15 - at least a month apart
129+
const daysDiff = (lastDate.getTime() - firstDate.getTime()) / (1000 * 60 * 60 * 24)
130+
expect(daysDiff).toBeGreaterThanOrEqual(30)
131+
}
132+
}
133+
134+
// Paintball is mentioned twice - should be aggregated
135+
const paintball = activities.find((a) => a.activity.toLowerCase().includes('paintball'))
136+
expect(paintball).toBeDefined()
137+
expect(paintball?.messages.length).toBe(2)
138+
})
139+
140+
it('does not aggregate compound activities with non-compound', () => {
141+
const activities = readCacheJson<ClassifiedActivity[]>(
142+
testState.tempCacheDir,
143+
'classifications.json'
144+
)
145+
146+
// "Go to a play or a concert" is compound, "Go to a play" is not
147+
// They should NOT be aggregated together
148+
const playActivities = activities.filter((a) =>
149+
a.activity.toLowerCase().includes('go to a play')
150+
)
151+
// sometimes one is compound, sometimes the AI adds both as separate activities and they get aggregated
152+
expect(playActivities.length).toBeOneOf([1, 2])
153+
})
154+
155+
it('does not create duplicate entries for aggregated activities', () => {
156+
const activities = readCacheJson<ClassifiedActivity[]>(
157+
testState.tempCacheDir,
158+
'classifications.json'
159+
)
160+
161+
// Count activities mentioning Karangahake - should be exactly 1 (aggregated)
162+
const karangahakeCount = activities.filter((a) =>
163+
a.activity.toLowerCase().includes('karangahake')
164+
).length
165+
expect(karangahakeCount).toBe(1)
166+
167+
// Count paintball activities - should be exactly 1 (aggregated)
168+
const paintballCount = activities.filter((a) =>
169+
a.activity.toLowerCase().includes('paintball')
170+
).length
171+
expect(paintballCount).toBe(1)
102172
})
103173

104174
it('sorts activities by score (interesting * 2 + fun)', () => {

src/cli/e2e/08-geocode.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ describe('geocode command', () => {
189189
// Original classification fields should be preserved (AI may classify as experiences or nature)
190190
expect(whaleSafari?.category).toBeOneOf(['experiences', 'nature'])
191191
expect(whaleSafari?.messages[0]?.sender).toBe('John Smith')
192-
expect(whaleSafari?.venue).toBe('Auckland Whale & Dolphin Safari')
192+
expect(whaleSafari?.venue).toMatch(/Whale/i)
193193
expect(whaleSafari?.city).toBe('Auckland')
194194
expect(whaleSafari?.country).toBe('New Zealand')
195195
expect(whaleSafari?.funScore).toBeGreaterThanOrEqual(0.8)

src/cli/e2e/10-analyze.test.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import { existsSync, mkdtempSync, readdirSync, readFileSync, rmSync } from 'node:fs'
88
import { tmpdir } from 'node:os'
99
import { join } from 'node:path'
10+
import { parse } from 'csv-parse/sync'
1011
import { afterAll, beforeAll, describe, expect, it } from 'vitest'
1112
import { FIXTURE_INPUT, runCli, testState } from './helpers'
1213

@@ -68,6 +69,7 @@ describe('analyze command', () => {
6869
expect(header).toContain('location')
6970
expect(header).toContain('latitude')
7071
expect(header).toContain('longitude')
72+
expect(header).toContain('mention_count')
7173
})
7274

7375
it('contains activity data rows', () => {
@@ -81,6 +83,21 @@ describe('analyze command', () => {
8183
expect(csv.toLowerCase()).toContain('hot air balloon')
8284
expect(csv.toLowerCase()).toContain('whale')
8385
})
86+
87+
it('shows mention count for aggregated activities', () => {
88+
const csv = readFileSync(join(outputDir, 'activities.csv'), 'utf-8')
89+
const records = parse(csv, { columns: true }) as Array<Record<string, string>>
90+
91+
// Find Karangahake row - should have mention_count of 2
92+
const karangahake = records.find((r) => r.activity?.toLowerCase().includes('karangahake'))
93+
expect(karangahake).toBeDefined()
94+
expect(karangahake?.mention_count).toBe('2')
95+
96+
// Find paintball row - should have mention_count of 2
97+
const paintball = records.find((r) => r.activity?.toLowerCase().includes('paintball'))
98+
expect(paintball).toBeDefined()
99+
expect(paintball?.mention_count).toBe('2')
100+
})
84101
})
85102

86103
describe('JSON export', () => {
@@ -114,6 +131,25 @@ describe('analyze command', () => {
114131
expect(activity).toHaveProperty('funScore')
115132
expect(activity).toHaveProperty('interestingScore')
116133
})
134+
135+
it('aggregated activities have multiple messages', () => {
136+
const json = readFileSync(join(outputDir, 'activities.json'), 'utf-8')
137+
const data = JSON.parse(json)
138+
139+
// Find Karangahake Gorge - mentioned twice, should have 2 messages
140+
const karangahake = data.activities.find((a: { activity: string }) =>
141+
a.activity.toLowerCase().includes('karangahake')
142+
)
143+
expect(karangahake).toBeDefined()
144+
expect(karangahake.messages.length).toBe(2)
145+
146+
// Find paintball activity - mentioned twice, should have 2 messages
147+
const paintball = data.activities.find((a: { activity: string }) =>
148+
a.activity.toLowerCase().includes('paintball')
149+
)
150+
expect(paintball).toBeDefined()
151+
expect(paintball.messages.length).toBe(2)
152+
})
117153
})
118154

119155
describe('Map HTML export', () => {

src/cli/e2e/helpers.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,10 @@ export function setupE2ETests(): E2ETestState {
314314
console.log('⚠️ No cache fixture found - API calls will be made')
315315
}
316316

317+
if (process.env.DEBUG_E2E) {
318+
console.log(`🔍 E2E temp cache dir: ${tempCacheDir}`)
319+
}
320+
317321
extractCacheFixture(tempCacheDir)
318322
const initialCacheHash = hashCacheDirectories(tempCacheDir)
319323
const initialCacheFiles = listCacheFiles(tempCacheDir)
@@ -384,5 +388,9 @@ export function teardownE2ETests(state: E2ETestState): void {
384388
console.error('')
385389
}
386390

387-
rmSync(state.tempCacheDir, { recursive: true, force: true })
391+
if (process.env.DEBUG_E2E) {
392+
console.log(`🔍 Preserving temp cache dir for debugging: ${state.tempCacheDir}`)
393+
} else {
394+
rmSync(state.tempCacheDir, { recursive: true, force: true })
395+
}
388396
}

src/cli/steps/classify.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import { classifyBatch, filterActivities, sortActivitiesByScore } from '../../index'
1010
import type { ScrapedMetadata } from '../../scraper/types'
1111
import type { CandidateMessage, ClassifiedActivity, ClassifierConfig } from '../../types'
12+
import { aggregateActivities } from '../aggregation'
1213
import { resolveModelConfig, resolveUserContext } from '../model'
1314
import { runWorkerPool } from '../worker-pool'
1415
import type { PipelineContext } from './context'
@@ -173,8 +174,9 @@ export async function stepClassify(
173174
allActivities.push(...batchResult)
174175
}
175176

176-
// Filter and sort activities by score (interesting prioritized over fun)
177-
const activities = sortActivitiesByScore(filterActivities(allActivities))
177+
// Deduplicate, filter, and sort activities by score (interesting prioritized over fun)
178+
const deduplicated = aggregateActivities(allActivities)
179+
const activities = sortActivitiesByScore(filterActivities(deduplicated))
178180

179181
const stats: ClassifyStats = {
180182
candidatesClassified: candidates.length,

0 commit comments

Comments
 (0)