Skip to content

Commit e23fddc

Browse files
committed
big refactor to use google places for lookups, not geocoding api
1 parent 9191a14 commit e23fddc

53 files changed

Lines changed: 2116 additions & 1347 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/caching/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ export {
99
generateCacheKey,
1010
generateClassifierCacheKey,
1111
generateEmbeddingCacheKey,
12-
generateGeocodeCacheKey
12+
generateGeocodeCacheKey,
13+
generatePlaceLookupCacheKey
1314
} from './key'
1415
export type { CachedResponse, CacheKeyComponents, ResponseCache } from './types'

src/caching/integration.test.ts

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import { tmpdir } from 'node:os'
1010
import { join } from 'node:path'
1111
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
1212
import { createActivity } from '../test-support'
13-
import type { CandidateMessage, ClassifiedActivity, GeocoderConfig } from '../types'
13+
import type { CandidateMessage, ClassifiedActivity, PlaceLookupConfig } from '../types'
1414
import { FilesystemCache } from './filesystem'
1515

1616
// Base config with required fields for all tests
@@ -23,6 +23,7 @@ const BASE_CONFIG = {
2323
const mockFetch = vi.fn()
2424
vi.mock('../http', () => ({
2525
httpFetch: mockFetch,
26+
guardedFetch: mockFetch, // place-lookup uses guardedFetch
2627
handleHttpError: async (response: { status: number; text: () => Promise<string> }) => {
2728
const errorText = await response.text()
2829
return {
@@ -305,14 +306,14 @@ describe('Cache Integration', () => {
305306
})
306307
})
307308

308-
describe('geocodeActivities cache integration', () => {
309-
const config: GeocoderConfig = {
309+
describe('lookupActivityPlaces cache integration', () => {
310+
const config: PlaceLookupConfig = {
310311
apiKey: 'test-key',
311312
regionBias: 'NZ'
312313
}
313314

314315
it('calls API on cache miss', async () => {
315-
const { geocodeActivities } = await import('../geocoder/index')
316+
const { lookupActivityPlaces } = await import('../place-lookup/index')
316317

317318
const suggestions = [createClassifiedActivity(1, 'Try the cafe', 'Cuba Street, Wellington')]
318319

@@ -330,14 +331,14 @@ describe('Cache Integration', () => {
330331
})
331332
})
332333

333-
const result = await geocodeActivities(suggestions, config, cache)
334+
const result = await lookupActivityPlaces(suggestions, config, cache)
334335

335336
expect(result).toHaveLength(1)
336337
expect(mockFetch).toHaveBeenCalledTimes(1)
337338
})
338339

339340
it('skips API call on cache hit', async () => {
340-
const { geocodeActivities } = await import('../geocoder/index')
341+
const { lookupActivityPlaces } = await import('../place-lookup/index')
341342

342343
const suggestions = [createClassifiedActivity(1, 'Try the cafe', 'Cuba Street, Wellington')]
343344

@@ -356,16 +357,16 @@ describe('Cache Integration', () => {
356357
})
357358

358359
// First call - should hit API
359-
await geocodeActivities(suggestions, config, cache)
360+
await lookupActivityPlaces(suggestions, config, cache)
360361
expect(mockFetch).toHaveBeenCalledTimes(1)
361362

362363
// Second call - should use cache
363-
await geocodeActivities(suggestions, config, cache)
364+
await lookupActivityPlaces(suggestions, config, cache)
364365
expect(mockFetch).toHaveBeenCalledTimes(1)
365366
})
366367

367368
it('makes new API call for different locations', async () => {
368-
const { geocodeActivities } = await import('../geocoder/index')
369+
const { lookupActivityPlaces } = await import('../place-lookup/index')
369370

370371
// Mock successful API responses
371372
mockFetch.mockResolvedValueOnce({
@@ -395,14 +396,14 @@ describe('Cache Integration', () => {
395396
})
396397

397398
// First call
398-
await geocodeActivities(
399+
await lookupActivityPlaces(
399400
[createClassifiedActivity(1, 'Try the cafe', 'Cuba Street, Wellington')],
400401
config,
401402
cache
402403
)
403404

404405
// Second call with different location
405-
await geocodeActivities(
406+
await lookupActivityPlaces(
406407
[createClassifiedActivity(2, 'Visit Queenstown', 'Queenstown, New Zealand')],
407408
config,
408409
cache

src/caching/key.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,30 @@ export function generateClassifierCacheKey(
8989
return `ai/${provider}/${model}/${promptSig}/${hash}`
9090
}
9191

92+
/**
93+
* Generate cache key for place lookup requests.
94+
* Path: places/<type>/<hash>.json
95+
*
96+
* @param type - 'places' for Places API Text Search, 'geocode' for Geocoding API
97+
* @param query - The search query or address
98+
* @param regionBias - Optional region bias code
99+
*/
100+
export function generatePlaceLookupCacheKey(
101+
type: 'places' | 'geocode',
102+
query: string,
103+
regionBias?: string
104+
): string {
105+
const hash = generateCacheKey({
106+
service: 'google',
107+
model: type,
108+
payload: { query, regionBias }
109+
})
110+
return `places/${type}/${hash}`
111+
}
112+
92113
/**
93114
* Generate cache key for geocoding requests.
115+
* @deprecated Use generatePlaceLookupCacheKey('geocode', ...) instead
94116
* Path: geo/google/<hash>.json
95117
*/
96118
export function generateGeocodeCacheKey(location: string, regionBias?: string): string {

src/caching/pipeline.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* ├── candidates.all.json
1515
* ├── scraped_urls.json
1616
* ├── classifications.json
17-
* └── geocodings.json
17+
* └── place_lookups.json
1818
* ```
1919
*
2020
* The sha256 is computed from the input file bytes (NOT content).
@@ -36,13 +36,13 @@ type PipelineStage =
3636
| 'scrape_stats'
3737
| 'scrape_metadata'
3838
| 'classify_stats'
39-
| 'geocode_stats'
39+
| 'place_lookup_stats'
4040
| 'fetch_images_stats'
4141
| 'candidates.heuristics'
4242
| 'candidates.embeddings'
4343
| 'candidates.all'
4444
| 'classifications'
45-
| 'geocodings'
45+
| 'place_lookups'
4646
| 'images'
4747

4848
interface PipelineRunMeta {
@@ -97,7 +97,7 @@ const STAGES_WITH_TIMESTAMPS: PipelineStage[] = [
9797
'candidates.embeddings',
9898
'candidates.all',
9999
'classifications',
100-
'geocodings'
100+
'place_lookups'
101101
]
102102

103103
function stageHasTimestamps(stage: PipelineStage): boolean {

src/cli.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ import { cmdExport } from './cli/commands/export'
1717
import { cmdFetchImageUrls } from './cli/commands/fetch-image-urls'
1818
import { cmdFetchImages } from './cli/commands/fetch-images'
1919
import { cmdFilter } from './cli/commands/filter'
20-
import { cmdGeocode } from './cli/commands/geocode'
2120
import { cmdList } from './cli/commands/list'
2221
import { cmdParse } from './cli/commands/parse'
22+
import { cmdPlaceLookup } from './cli/commands/place-lookup'
2323
import { cmdPreview } from './cli/commands/preview'
2424
import { cmdScan } from './cli/commands/scan'
2525
import { cmdScrapeUrls } from './cli/commands/scrape-urls'
@@ -72,8 +72,8 @@ async function main(): Promise<void> {
7272
await cmdClassify(args, logger)
7373
break
7474

75-
case 'geocode':
76-
await cmdGeocode(args, logger)
75+
case 'place-lookup':
76+
await cmdPlaceLookup(args, logger)
7777
break
7878

7979
case 'fetch-image-urls':

src/cli/args.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@ describe('CLI Args', () => {
4242

4343
it('parses boolean flags for analyze', () => {
4444
const args = parseArgs(
45-
['analyze', 'chat.txt', '-c', 'NZ', '--skip-geocoding', '--dry-run', '--verbose'],
45+
['analyze', 'chat.txt', '-c', 'NZ', '--skip-place-lookup', '--dry-run', '--verbose'],
4646
false
4747
)
48-
expect(args.skipGeocoding).toBe(true)
48+
expect(args.skipPlaceLookup).toBe(true)
4949
expect(args.dryRun).toBe(true)
5050
expect(args.verbose).toBe(true)
5151
})
@@ -57,7 +57,7 @@ describe('CLI Args', () => {
5757
expect(args.minConfidence).toBe(0.5)
5858
expect(args.maxResults).toBe(10)
5959
expect(args.maxMessages).toBeUndefined()
60-
expect(args.skipGeocoding).toBe(false)
60+
expect(args.skipPlaceLookup).toBe(false)
6161
expect(args.quiet).toBe(false)
6262
expect(args.verbose).toBe(false)
6363
expect(args.dryRun).toBe(false)

src/cli/args.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ export interface CLIArgs {
1616
outputDir: string
1717
formats: string[]
1818
minConfidence: number
19-
skipGeocoding: boolean
19+
skipPlaceLookup: boolean
2020
fetchImages: boolean
21-
skipCdn: boolean
21+
skipMediaLibrary: boolean
2222
skipPixabay: boolean
2323
skipWikipedia: boolean
2424
skipGooglePlaces: boolean
@@ -172,9 +172,9 @@ function buildCLIArgs(commandName: string, input: string, opts: Record<string, u
172172
outputDir: typeof opts.outputDir === 'string' ? opts.outputDir : DEFAULT_OUTPUT_DIR,
173173
formats: format.split(',').map((f) => f.trim()),
174174
minConfidence: Number.parseFloat(String(opts.minConfidence ?? '0.5')),
175-
skipGeocoding: opts.skipGeocoding === true,
175+
skipPlaceLookup: opts.skipPlaceLookup === true,
176176
fetchImages: opts.images === true,
177-
skipCdn: opts.imageCdn === false,
177+
skipMediaLibrary: opts.mediaLibrary === false,
178178
skipPixabay: opts.skipPixabay === true,
179179
skipWikipedia: opts.skipWikipedia === true,
180180
skipGooglePlaces: opts.skipGooglePlaces === true,

src/cli/commands.ts

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,20 @@ function addPrefixedPdfOptions(cmd: Command): Command {
9292
.option('--pdf-sort <order>', 'Sort for PDF: score, oldest, newest (overrides --export-sort)')
9393
}
9494

95+
/**
96+
* Add common options for classify and place-lookup commands.
97+
*/
98+
function addClassifyOptions(cmd: Command): Command {
99+
return cmd
100+
.option('-c, --home-country <name>', 'Your home country (auto-detected from IP if not set)')
101+
.option('--timezone <tz>', 'Your timezone (auto-detected from system if not set)')
102+
.option('--json [file]', 'Output as JSON (to file if specified, otherwise stdout)')
103+
.option('-n, --max-results <num>', 'Max results to display', '10')
104+
.option('-m, --max-messages <num>', 'Max messages to process (for testing)')
105+
.option('-a, --all', 'Show all activities (default: top 10)')
106+
.option('--dry-run', 'Show stats without API calls')
107+
}
108+
95109
/**
96110
* Add common pipeline options shared by analyze and export commands.
97111
*/
@@ -106,7 +120,7 @@ function addPipelineOptions(cmd: Command): Command {
106120
'csv,excel,json,map,pdf'
107121
)
108122
.option('--min-confidence <num>', 'Minimum confidence threshold', '0.5')
109-
.option('--skip-geocoding', 'Skip geocoding step')
123+
.option('--skip-place-lookup', 'Skip place lookup step')
110124
.option('--images', 'Fetch images for activities (slower, uses external APIs)')
111125
.option(
112126
'--media-library-path <path>',
@@ -215,7 +229,7 @@ export function createProgram(): Command {
215229
const baseAnalyzeCmd = program
216230
.command('analyze')
217231
.description(
218-
'Run the complete pipeline (parse → filter → scrape-urls → classify → geocode → export)'
232+
'Run the complete pipeline (parse → filter → scrape-urls → classify → place-lookup → export)'
219233
)
220234
.argument('<input>', 'Chat export (.zip, directory, or .txt file)')
221235

@@ -287,38 +301,28 @@ export function createProgram(): Command {
287301
.option('--dry-run', 'Show URL count without scraping')
288302

289303
// ============ CLASSIFY ============
290-
program
291-
.command('classify')
292-
.description('Classify candidates into activities using AI')
293-
.argument('<input>', 'Chat export (.zip, directory, or .txt file)')
294-
.option('-c, --home-country <name>', 'Your home country (auto-detected from IP if not set)')
295-
.option('--timezone <tz>', 'Your timezone (auto-detected from system if not set)')
296-
.option('--json [file]', 'Output as JSON (to file if specified, otherwise stdout)')
297-
.option('-n, --max-results <num>', 'Max results to display', '10')
298-
.option('-m, --max-messages <num>', 'Max messages to process (for testing)')
299-
.option('-a, --all', 'Show all activities (default: top 10)')
300-
.option('--dry-run', 'Show stats without API calls')
304+
addClassifyOptions(
305+
program
306+
.command('classify')
307+
.description('Classify candidates into activities using AI')
308+
.argument('<input>', 'Chat export (.zip, directory, or .txt file)')
309+
)
301310

302-
// ============ GEOCODE ============
303-
program
304-
.command('geocode')
305-
.description('Geocode classified activities using Google Maps API')
306-
.argument('<input>', 'Chat export (.zip, directory, or .txt file)')
307-
.option('-c, --home-country <name>', 'Your home country (auto-detected from IP if not set)')
308-
.option('--timezone <tz>', 'Your timezone (auto-detected from system if not set)')
309-
.option('--json [file]', 'Output as JSON (to file if specified, otherwise stdout)')
310-
.option('-n, --max-results <num>', 'Max results to display', '10')
311-
.option('-m, --max-messages <num>', 'Max messages to process (for testing)')
312-
.option('-a, --all', 'Show all geocoded activities (default: top 10)')
313-
.option('--dry-run', 'Show stats without API calls')
311+
// ============ PLACE-LOOKUP ============
312+
addClassifyOptions(
313+
program
314+
.command('place-lookup')
315+
.description('Look up places for classified activities using Google Maps API')
316+
.argument('<input>', 'Chat export (.zip, directory, or .txt file)')
317+
)
314318

315319
// ============ FETCH-IMAGE-URLS ============
316320
program
317321
.command('fetch-image-urls')
318322
.description('Fetch image URLs for geocoded activities')
319323
.argument('<input>', 'Chat export file or directory')
320324
.option('--json [file]', 'Output as JSON (to file if specified, otherwise stdout)')
321-
.option('--no-image-cdn', 'Skip CDN default images (fetch all from APIs)')
325+
.option('--no-media-library', 'Skip media library images')
322326
.option('--skip-pixabay', 'Skip Pixabay image search')
323327
.option('--skip-wikipedia', 'Skip Wikipedia image lookup')
324328
.option('--skip-google-places', 'Skip Google Places photos')

src/cli/commands/export.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@ export async function cmdExport(args: CLIArgs, logger: Logger): Promise<void> {
6262
if (needsThumbnails) {
6363
const result = await runner.run('fetchImages')
6464
thumbnails = result.thumbnails
65-
// Get geocoded activities from the runner
66-
const geocodeResult = await runner.run('geocode')
67-
geocoded = geocodeResult.activities
65+
// Get place lookup activities from the runner
66+
const placeLookupResult = await runner.run('placeLookup')
67+
geocoded = placeLookupResult.activities
6868
} else {
69-
// Just run through geocode step
70-
const result = await runner.run('geocode')
69+
// Just run through place lookup step
70+
const result = await runner.run('placeLookup')
7171
geocoded = result.activities
7272
}
7373

src/cli/commands/fetch-image-urls.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,11 @@ function logStatsSummary(stats: FetchImagesStats, logger: Logger): void {
5858
export async function cmdFetchImageUrls(args: CLIArgs, logger: Logger): Promise<void> {
5959
const { ctx, config } = await initCommandContext('Fetch Image URLs', args, logger)
6060

61-
// Use StepRunner to handle dependencies: geocode → fetch-image-urls
61+
// Use StepRunner to handle dependencies: placeLookup → fetch-image-urls
6262
const runner = new StepRunner(ctx, args, config, logger)
6363

64-
// Run geocode step (which runs the full pipeline up to geocode)
65-
const { activities: geocodedActivities } = await runner.run('geocode')
64+
// Run place lookup step (which runs the full pipeline up to place-lookup)
65+
const { activities: geocodedActivities } = await runner.run('placeLookup')
6666

6767
logger.log(` Geocoded ${geocodedActivities.length} activities`)
6868

@@ -76,7 +76,7 @@ export async function cmdFetchImageUrls(args: CLIArgs, logger: Logger): Promise<
7676
logger.log('\n📊 Image Fetch Estimate (dry run)')
7777
logger.log(` Activities: ${geocodedActivities.length}`)
7878
const sources: string[] = []
79-
if (!args.skipCdn) sources.push('CDN')
79+
if (!args.skipMediaLibrary) sources.push('Media Library')
8080
if (!args.skipWikipedia) sources.push('Wikipedia')
8181
if (!args.skipPixabay && process.env.PIXABAY_API_KEY) sources.push('Pixabay')
8282
if (!args.skipGooglePlaces && process.env.GOOGLE_MAPS_API_KEY) sources.push('Google Places')
@@ -88,7 +88,7 @@ export async function cmdFetchImageUrls(args: CLIArgs, logger: Logger): Promise<
8888
// Media library path can come from CLI arg or config
8989
const mediaLibraryPath = args.mediaLibraryPath ?? config?.mediaLibraryPath
9090
const fetchResult = await stepFetchImageUrls(ctx, geocodedActivities, {
91-
skipCdn: args.skipCdn,
91+
skipMediaLibrary: args.skipMediaLibrary,
9292
skipPixabay: args.skipPixabay,
9393
skipWikipedia: args.skipWikipedia,
9494
skipGooglePlaces: args.skipGooglePlaces,
@@ -153,9 +153,9 @@ function displayActivities(
153153

154154
if (image) {
155155
const queryInfo = image.query ? ` (query: "${image.query}")` : ''
156-
logger.log(` 🖼️ ${image.source}${queryInfo}: ${image.url}`)
157-
if (image.attribution) {
158-
logger.log(` 📝 ${image.attribution.name}`)
156+
logger.log(` 🖼️ ${image.meta.source}${queryInfo}: ${image.imageUrl}`)
157+
if (image.meta.attribution) {
158+
logger.log(` 📝 ${image.meta.attribution.name}`)
159159
}
160160
} else {
161161
logger.log(` ⚠️ No image found`)

0 commit comments

Comments
 (0)