|
| 1 | +#!/usr/bin/env bun |
| 2 | +/** |
| 3 | + * Entity Resolution Test Script |
| 4 | + * |
| 5 | + * Standalone script for testing the search pipeline. |
| 6 | + * Usage: bun run scripts/resolve-entity.ts "The Matrix" --type movie |
| 7 | + */ |
| 8 | + |
| 9 | +import { parseArgs } from 'util' |
| 10 | +import { FilesystemCache } from '../src/caching/filesystem' |
| 11 | +import { resolveBook, resolveEntity, type EntityType, type ResolvedEntity } from '../src/search' |
| 12 | + |
| 13 | +const VALID_ENTITY_TYPES: EntityType[] = [ |
| 14 | + 'movie', |
| 15 | + 'tv_show', |
| 16 | + 'web_series', |
| 17 | + 'video_game', |
| 18 | + 'physical_game', |
| 19 | + 'book', |
| 20 | + 'comic', |
| 21 | + 'play', |
| 22 | + 'album', |
| 23 | + 'song', |
| 24 | + 'podcast', |
| 25 | + 'artist' |
| 26 | +] |
| 27 | + |
| 28 | +function formatEntityType(type: string): EntityType | null { |
| 29 | + const normalized = type.toLowerCase().replace(/-/g, '_') |
| 30 | + if (VALID_ENTITY_TYPES.includes(normalized as EntityType)) { |
| 31 | + return normalized as EntityType |
| 32 | + } |
| 33 | + return null |
| 34 | +} |
| 35 | + |
| 36 | +function formatResult(entity: ResolvedEntity): string { |
| 37 | + const lines: string[] = [] |
| 38 | + |
| 39 | + lines.push(`✓ Found: ${entity.title}`) |
| 40 | + lines.push(` ID: ${entity.id}`) |
| 41 | + lines.push(` Source: ${entity.source}`) |
| 42 | + lines.push(` Type: ${entity.type}`) |
| 43 | + lines.push(` URL: ${entity.url}`) |
| 44 | + |
| 45 | + if (entity.year) { |
| 46 | + lines.push(` Year: ${entity.year}`) |
| 47 | + } |
| 48 | + |
| 49 | + if (entity.description) { |
| 50 | + lines.push(` Description: ${entity.description}`) |
| 51 | + } |
| 52 | + |
| 53 | + if (entity.imageUrl) { |
| 54 | + lines.push(` Image: ${entity.imageUrl}`) |
| 55 | + } |
| 56 | + |
| 57 | + if (entity.wikipediaUrl) { |
| 58 | + lines.push(` Wikipedia: ${entity.wikipediaUrl}`) |
| 59 | + } |
| 60 | + |
| 61 | + const externalIdKeys = Object.keys(entity.externalIds) |
| 62 | + if (externalIdKeys.length > 0) { |
| 63 | + lines.push(` External IDs:`) |
| 64 | + for (const key of externalIdKeys) { |
| 65 | + const value = entity.externalIds[key as keyof typeof entity.externalIds] |
| 66 | + if (value) { |
| 67 | + lines.push(` ${key}: ${value}`) |
| 68 | + } |
| 69 | + } |
| 70 | + } |
| 71 | + |
| 72 | + return lines.join('\n') |
| 73 | +} |
| 74 | + |
| 75 | +function printUsage(): void { |
| 76 | + console.log(` |
| 77 | +Usage: bun run scripts/resolve-entity.ts <query> --type <type> [options] |
| 78 | +
|
| 79 | +Arguments: |
| 80 | + query Entity name to resolve (e.g., "The Matrix") |
| 81 | +
|
| 82 | +Options: |
| 83 | + -t, --type <type> Entity type (required) |
| 84 | + -a, --author <name> Author name (for books) |
| 85 | + --json Output as JSON |
| 86 | + --dry-run Show what would be queried |
| 87 | + -h, --help Show this help |
| 88 | +
|
| 89 | +Entity types: |
| 90 | + ${VALID_ENTITY_TYPES.join(', ')} |
| 91 | +
|
| 92 | +Examples: |
| 93 | + bun run scripts/resolve-entity.ts "The Matrix" --type movie |
| 94 | + bun run scripts/resolve-entity.ts "Pride and Prejudice" --type book --author "Jane Austen" |
| 95 | +
|
| 96 | +Environment variables: |
| 97 | + GOOGLE_PROGRAMMABLE_SEARCH_API_KEY Google Custom Search API key |
| 98 | + GOOGLE_PROGRAMMABLE_SEARCH_CX Custom search engine ID |
| 99 | + GOOGLE_AI_API_KEY Gemini API key (for AI disambiguation) |
| 100 | +`) |
| 101 | +} |
| 102 | + |
| 103 | +async function main(): Promise<void> { |
| 104 | + const { values, positionals } = parseArgs({ |
| 105 | + args: process.argv.slice(2), |
| 106 | + options: { |
| 107 | + type: { type: 'string', short: 't' }, |
| 108 | + author: { type: 'string', short: 'a' }, |
| 109 | + json: { type: 'boolean', default: false }, |
| 110 | + 'dry-run': { type: 'boolean', default: false }, |
| 111 | + help: { type: 'boolean', short: 'h', default: false } |
| 112 | + }, |
| 113 | + allowPositionals: true |
| 114 | + }) |
| 115 | + |
| 116 | + if (values.help) { |
| 117 | + printUsage() |
| 118 | + process.exit(0) |
| 119 | + } |
| 120 | + |
| 121 | + const query = positionals[0] |
| 122 | + const typeArg = values.type |
| 123 | + const author = values.author |
| 124 | + const jsonOutput = values.json |
| 125 | + const dryRun = values['dry-run'] |
| 126 | + |
| 127 | + if (!query) { |
| 128 | + console.error('Error: No query specified') |
| 129 | + printUsage() |
| 130 | + process.exit(1) |
| 131 | + } |
| 132 | + |
| 133 | + if (!typeArg) { |
| 134 | + console.error('Error: No type specified') |
| 135 | + printUsage() |
| 136 | + process.exit(1) |
| 137 | + } |
| 138 | + |
| 139 | + const entityType = formatEntityType(typeArg) |
| 140 | + if (!entityType) { |
| 141 | + console.error(`Error: Invalid type "${typeArg}"`) |
| 142 | + console.error(`Valid types: ${VALID_ENTITY_TYPES.join(', ')}`) |
| 143 | + process.exit(1) |
| 144 | + } |
| 145 | + |
| 146 | + console.log(`\n🔍 Resolving: "${query}" (${entityType})`) |
| 147 | + if (author) { |
| 148 | + console.log(` Author: ${author}`) |
| 149 | + } |
| 150 | + |
| 151 | + // Set up cache |
| 152 | + const cacheDir = process.env.CHAT_TO_MAP_CACHE_DIR || `${process.env.HOME}/.cache/chat-to-map` |
| 153 | + const cache = new FilesystemCache(`${cacheDir}/requests`) |
| 154 | + |
| 155 | + // Build config from environment |
| 156 | + const googleApiKey = process.env.GOOGLE_PROGRAMMABLE_SEARCH_API_KEY |
| 157 | + const googleCx = process.env.GOOGLE_PROGRAMMABLE_SEARCH_CX |
| 158 | + const geminiApiKey = process.env.GOOGLE_AI_API_KEY |
| 159 | + |
| 160 | + const config = { |
| 161 | + wikidata: true, |
| 162 | + openlibrary: entityType === 'book', |
| 163 | + cache, |
| 164 | + googleSearch: googleApiKey && googleCx ? { apiKey: googleApiKey, cx: googleCx } : undefined, |
| 165 | + aiClassification: geminiApiKey ? { apiKey: geminiApiKey } : undefined |
| 166 | + } |
| 167 | + |
| 168 | + if (dryRun) { |
| 169 | + console.log('\n📊 Dry run: would query:') |
| 170 | + console.log(` - Wikidata API (free)`) |
| 171 | + if (entityType === 'book') { |
| 172 | + console.log(` - Open Library API (free)`) |
| 173 | + } |
| 174 | + if (config.googleSearch) { |
| 175 | + console.log(` - Google Programmable Search API`) |
| 176 | + } |
| 177 | + if (config.aiClassification) { |
| 178 | + console.log(` - Gemini AI for disambiguation`) |
| 179 | + } |
| 180 | + return |
| 181 | + } |
| 182 | + |
| 183 | + console.log('') |
| 184 | + |
| 185 | + let result: ResolvedEntity | null |
| 186 | + |
| 187 | + if (entityType === 'book' && author) { |
| 188 | + result = await resolveBook(query, author, config) |
| 189 | + } else { |
| 190 | + result = await resolveEntity(query, entityType, config) |
| 191 | + } |
| 192 | + |
| 193 | + if (result) { |
| 194 | + if (jsonOutput) { |
| 195 | + console.log(JSON.stringify(result, null, 2)) |
| 196 | + } else { |
| 197 | + console.log(formatResult(result)) |
| 198 | + } |
| 199 | + } else { |
| 200 | + console.log('✗ Not found') |
| 201 | + console.log('') |
| 202 | + console.log('Try:') |
| 203 | + console.log(' - Check spelling') |
| 204 | + console.log(' - Use the full title') |
| 205 | + console.log(' - Add year for disambiguation (e.g., "The Matrix 1999")') |
| 206 | + if (entityType === 'book') { |
| 207 | + console.log(' - Add author with --author') |
| 208 | + } |
| 209 | + process.exit(1) |
| 210 | + } |
| 211 | +} |
| 212 | + |
| 213 | +main().catch((error) => { |
| 214 | + console.error('Error:', error.message) |
| 215 | + process.exit(1) |
| 216 | +}) |
0 commit comments