Skip to content

Commit 71a70fb

Browse files
ndbroadbentclaude
andcommitted
Add centralized costs module and integrate with CLI commands
Costs module (src/costs/): - pricing.ts: AI model pricing (Gemini 3, Claude 4.5, GPT-5), embeddings, Google Maps - calculator.ts: Cost calculation functions for AI, embeddings, geocoding, images - estimator.ts: Pre-processing cost estimation with confidence ranges - tracker.ts: CostTracker class for session-based usage accumulation - types.ts: Type definitions for providers, resources, usage records CLI integration: - embed.ts: Use calculateEmbeddingCost() instead of hardcoded pricing - classify.ts: Show estimated AI costs in dry-run mode - filter.ts: Use costs module for embedding cost estimation - place-lookup.ts: Show estimated geocoding costs in dry-run mode - fetch-image-urls.ts: Show estimated image costs in dry-run mode All prices in micro-dollars for precision. 140 tests with 100% coverage. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 5a073ea commit 71a70fb

16 files changed

Lines changed: 2853 additions & 21 deletions

src/cli/commands/classify.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@
66
*/
77

88
import { writeFile } from 'node:fs/promises'
9+
import {
10+
calculateAIInputCost,
11+
calculateAIOutputCost,
12+
ESTIMATION_DEFAULTS,
13+
formatMicrosAsDollars
14+
} from '../../costs'
915
import { type ClassifiedActivity, formatLocation } from '../../types'
1016
import type { CLIArgs } from '../args'
1117
import {
@@ -103,9 +109,26 @@ export async function cmdClassify(args: CLIArgs, logger: Logger): Promise<void>
103109

104110
// Dry run: show stats and exit
105111
if (args.dryRun) {
112+
// Estimate classification cost
113+
// Use default model for estimation (gemini-3-flash-preview)
114+
const estimatedModel = 'gemini-3-flash-preview'
115+
const batchSize = 30
116+
const estimatedBatches = Math.ceil(candidates.length / batchSize)
117+
// Rough estimate: ~100 tokens per candidate input, ~150 tokens per activity output
118+
const estimatedInputTokens = candidates.length * 100
119+
const estimatedOutputTokens = candidates.length * ESTIMATION_DEFAULTS.outputTokensPerActivity
120+
const inputCostMicros = calculateAIInputCost(estimatedModel, estimatedInputTokens)
121+
const outputCostMicros = calculateAIOutputCost(estimatedModel, estimatedOutputTokens)
122+
const totalCostMicros = inputCostMicros + outputCostMicros
123+
106124
logger.log('\n📊 Classification Estimate (dry run)')
107125
logger.log(` Candidates to classify: ${candidates.length}`)
108-
logger.log(` Estimated batches: ${Math.ceil(candidates.length / 30)}`)
126+
logger.log(` Estimated batches: ${estimatedBatches}`)
127+
logger.log(` Model: ${estimatedModel}`)
128+
logger.log(
129+
` Estimated tokens: ${estimatedInputTokens.toLocaleString()} in / ${estimatedOutputTokens.toLocaleString()} out`
130+
)
131+
logger.log(` Estimated cost: ${formatMicrosAsDollars(totalCostMicros)}`)
109132
return
110133
}
111134

src/cli/commands/embed.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,17 @@
66
*/
77

88
import { countTokens } from '../../classifier/tokenizer'
9+
import {
10+
calculateEmbeddingCost,
11+
DEFAULT_EMBEDDING_MODELS,
12+
formatMicrosAsDollars
13+
} from '../../costs'
914
import type { ParsedMessage } from '../../types'
1015
import type { CLIArgs } from '../args'
1116
import { initCommand } from '../helpers'
1217
import type { Logger } from '../logger'
1318
import { stepEmbed } from '../steps/embed'
1419

15-
const EMBEDDING_COST_PER_MILLION_TOKENS = 0.13
1620
const MIN_MESSAGE_LENGTH = 10
1721

1822
function analyzeMessages(messages: readonly ParsedMessage[]): {
@@ -48,7 +52,8 @@ export async function cmdEmbed(args: CLIArgs, logger: Logger): Promise<void> {
4852

4953
// Analyze messages to embed
5054
const { toEmbed, skipped, totalTokens } = analyzeMessages(messages)
51-
const estimatedCost = (totalTokens / 1_000_000) * EMBEDDING_COST_PER_MILLION_TOKENS
55+
const embeddingModel = DEFAULT_EMBEDDING_MODELS.openai
56+
const estimatedCostMicros = calculateEmbeddingCost(embeddingModel, totalTokens)
5257
const batchCount = Math.ceil(toEmbed / 100)
5358

5459
logger.log(`\n📊 Embedding Stats`)
@@ -57,7 +62,8 @@ export async function cmdEmbed(args: CLIArgs, logger: Logger): Promise<void> {
5762
logger.log(` Skipped (too short): ${skipped.toLocaleString()}`)
5863
logger.log(` Total tokens: ${totalTokens.toLocaleString()}`)
5964
logger.log(` API batches: ${batchCount}`)
60-
logger.log(` Estimated cost: $${estimatedCost.toFixed(4)}`)
65+
logger.log(` Model: ${embeddingModel}`)
66+
logger.log(` Estimated cost: ${formatMicrosAsDollars(estimatedCostMicros)}`)
6167

6268
if (args.dryRun) {
6369
logger.log('\n🏃 Dry run - no API calls made')

src/cli/commands/fetch-image-urls.ts

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
*/
1111

1212
import { writeFile } from 'node:fs/promises'
13+
import { calculatePlacesPhotoCost, formatMicrosAsDollars } from '../../costs'
1314
import type { ImageResult } from '../../images/types'
1415
import type { GeocodedActivity } from '../../types'
1516
import type { CLIArgs } from '../args'
@@ -58,6 +59,34 @@ function logStatsSummary(stats: FetchImagesStats, logger: Logger): void {
5859
if (stats.failed > 0) logger.log(` Not found: ${stats.failed}`)
5960
}
6061

62+
function getEnabledImageSources(args: CLIArgs): string[] {
63+
const sources: string[] = []
64+
if (!args.skipMediaLibrary) sources.push('Media Library')
65+
if (!args.skipWikipedia) sources.push('Wikipedia')
66+
if (!args.skipPexels && process.env.PEXELS_API_KEY) sources.push('Pexels')
67+
if (!args.skipPixabay && process.env.PIXABAY_API_KEY) sources.push('Pixabay')
68+
if (!args.skipGooglePlaces && process.env.GOOGLE_MAPS_API_KEY) sources.push('Google Places')
69+
return sources
70+
}
71+
72+
function logDryRunEstimate(activityCount: number, args: CLIArgs, logger: Logger): void {
73+
const sources = getEnabledImageSources(args)
74+
const willUseGooglePlaces = !args.skipGooglePlaces && !!process.env.GOOGLE_MAPS_API_KEY
75+
const estimatedPhotoCostMicros = willUseGooglePlaces ? calculatePlacesPhotoCost(activityCount) : 0
76+
77+
logger.log('\n📊 Image Fetch Estimate (dry run)')
78+
logger.log(` Activities: ${activityCount}`)
79+
logger.log(` Sources: ${sources.join(', ') || 'none'}`)
80+
if (willUseGooglePlaces) {
81+
logger.log(
82+
` Estimated cost (if all from Google): ${formatMicrosAsDollars(estimatedPhotoCostMicros)}`
83+
)
84+
logger.log(` Note: Free sources (Media Library, Pixabay, Pexels) are tried first`)
85+
} else {
86+
logger.log(` Estimated cost: $0.00 (only free sources enabled)`)
87+
}
88+
}
89+
6190
export async function cmdFetchImageUrls(args: CLIArgs, logger: Logger): Promise<void> {
6291
const { ctx, config } = await initCommandContext('Fetch Image URLs', args, logger)
6392

@@ -76,15 +105,7 @@ export async function cmdFetchImageUrls(args: CLIArgs, logger: Logger): Promise<
76105

77106
// Dry run: show stats and exit
78107
if (args.dryRun) {
79-
logger.log('\n📊 Image Fetch Estimate (dry run)')
80-
logger.log(` Activities: ${geocodedActivities.length}`)
81-
const sources: string[] = []
82-
if (!args.skipMediaLibrary) sources.push('Media Library')
83-
if (!args.skipWikipedia) sources.push('Wikipedia')
84-
if (!args.skipPexels && process.env.PEXELS_API_KEY) sources.push('Pexels')
85-
if (!args.skipPixabay && process.env.PIXABAY_API_KEY) sources.push('Pixabay')
86-
if (!args.skipGooglePlaces && process.env.GOOGLE_MAPS_API_KEY) sources.push('Google Places')
87-
logger.log(` Sources: ${sources.join(', ') || 'none'}`)
108+
logDryRunEstimate(geocodedActivities.length, args, logger)
88109
return
89110
}
90111

src/cli/commands/filter.ts

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77

88
import { writeFile } from 'node:fs/promises'
99
import { countTokens } from '../../classifier/tokenizer'
10+
import {
11+
calculateEmbeddingCost,
12+
DEFAULT_EMBEDDING_MODELS,
13+
formatMicrosAsDollars
14+
} from '../../costs'
1015
import { extractCandidatesByEmbeddings, extractCandidatesByHeuristics } from '../../index'
1116
import type { CandidateMessage, ParsedMessage } from '../../types'
1217
import type { CLIArgs, ExtractionMethod } from '../args'
@@ -30,24 +35,26 @@ interface FilterStats {
3035
embeddingsMatches?: number | undefined
3136
}
3237

33-
const EMBEDDING_COST_PER_MILLION_TOKENS = 0.13
34-
35-
function estimateEmbeddingCost(messages: readonly ParsedMessage[], logger: Logger): void {
38+
function estimateEmbeddingCostForMessages(
39+
messages: readonly ParsedMessage[],
40+
logger: Logger
41+
): void {
3642
const messagesToEmbed = messages.filter((m) => m.content.length > 10)
3743
let totalTokens = 0
3844

3945
for (const msg of messagesToEmbed) {
4046
totalTokens += countTokens(msg.content)
4147
}
4248

43-
const costDollars = (totalTokens / 1_000_000) * EMBEDDING_COST_PER_MILLION_TOKENS
49+
const embeddingModel = DEFAULT_EMBEDDING_MODELS.openai
50+
const costMicros = calculateEmbeddingCost(embeddingModel, totalTokens)
4451
const batchCount = Math.ceil(messagesToEmbed.length / 100)
4552

46-
logger.log('\n📊 Embedding Cost Estimate (text-embedding-3-large)')
53+
logger.log(`\n📊 Embedding Cost Estimate (${embeddingModel})`)
4754
logger.log(` Messages to embed: ${messagesToEmbed.length.toLocaleString()}`)
4855
logger.log(` Total tokens: ${totalTokens.toLocaleString()}`)
4956
logger.log(` API batches: ${batchCount}`)
50-
logger.log(` Estimated cost: $${costDollars.toFixed(4)}`)
57+
logger.log(` Estimated cost: ${formatMicrosAsDollars(costMicros)}`)
5158
}
5259

5360
function formatCandidatesText(output: FilterOutput, logger: Logger, showAll: boolean): void {
@@ -191,7 +198,7 @@ export async function cmdFilter(args: CLIArgs, logger: Logger): Promise<void> {
191198

192199
// Dry run: show cost estimate and exit
193200
if (args.dryRun && (args.method === 'embeddings' || args.method === 'both')) {
194-
estimateEmbeddingCost(parseResult.messages, logger)
201+
estimateEmbeddingCostForMessages(parseResult.messages, logger)
195202
return
196203
}
197204

src/cli/commands/place-lookup.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
*/
77

88
import { writeFile } from 'node:fs/promises'
9+
import { calculatePlacesLookupCost, formatMicrosAsDollars } from '../../costs'
910
import { filterWithCoordinates } from '../../place-lookup/index'
1011
import { formatLocation, type GeocodedActivity } from '../../types'
1112
import type { CLIArgs } from '../args'
@@ -63,11 +64,15 @@ export async function cmdPlaceLookup(args: CLIArgs, logger: Logger): Promise<voi
6364

6465
// Dry run: show stats and exit
6566
if (args.dryRun) {
67+
const withLocation = classifiedActivities.filter((a) => formatLocation(a)).length
68+
// Estimate cost: worst case all activities need place search
69+
const estimatedCostMicros = calculatePlacesLookupCost(classifiedActivities.length)
70+
6671
logger.log('\n📊 Place Lookup Estimate (dry run)')
6772
logger.log(` Activities to look up: ${classifiedActivities.length}`)
68-
const withLocation = classifiedActivities.filter((a) => formatLocation(a)).length
6973
logger.log(` With location info: ${withLocation}`)
7074
logger.log(` Without location: ${classifiedActivities.length - withLocation}`)
75+
logger.log(` Estimated cost (max): ${formatMicrosAsDollars(estimatedCostMicros)}`)
7176
return
7277
}
7378

0 commit comments

Comments
 (0)