DocSpring
diff --git a/‎src/cli/commands/classify.ts‎
Lines changed: 24 additions & 1 deletion b/‎src/cli/commands/classify.ts‎
Lines changed: 24 additions & 1 deletion
diff --git a/‎src/cli/commands/embed.ts‎
Lines changed: 9 additions & 3 deletions b/‎src/cli/commands/embed.ts‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎src/cli/commands/fetch-image-urls.ts‎
Lines changed: 30 additions & 9 deletions b/‎src/cli/commands/fetch-image-urls.ts‎
Lines changed: 30 additions & 9 deletions
diff --git a/‎src/cli/commands/filter.ts‎
Lines changed: 14 additions & 7 deletions b/‎src/cli/commands/filter.ts‎
Lines changed: 14 additions & 7 deletions
diff --git a/‎src/cli/commands/place-lookup.ts‎
Lines changed: 6 additions & 1 deletion b/‎src/cli/commands/place-lookup.ts‎
Lines changed: 6 additions & 1 deletion
@@ -6,6 +6,12 @@
  */
 
 import { writeFile } from 'node:fs/promises'
+import {
+  calculateAIInputCost,
+  calculateAIOutputCost,
+  ESTIMATION_DEFAULTS,
+  formatMicrosAsDollars
+} from '../../costs'
 import { type ClassifiedActivity, formatLocation } from '../../types'
 import type { CLIArgs } from '../args'
 import {
@@ -103,9 +109,26 @@ export async function cmdClassify(args: CLIArgs, logger: Logger): Promise<void>
 
   // Dry run: show stats and exit
   if (args.dryRun) {
+    // Estimate classification cost
+    // Use default model for estimation (gemini-3-flash-preview)
+    const estimatedModel = 'gemini-3-flash-preview'
+    const batchSize = 30
+    const estimatedBatches = Math.ceil(candidates.length / batchSize)
+    // Rough estimate: ~100 tokens per candidate input, ~150 tokens per activity output
+    const estimatedInputTokens = candidates.length * 100
+    const estimatedOutputTokens = candidates.length * ESTIMATION_DEFAULTS.outputTokensPerActivity
+    const inputCostMicros = calculateAIInputCost(estimatedModel, estimatedInputTokens)
+    const outputCostMicros = calculateAIOutputCost(estimatedModel, estimatedOutputTokens)
+    const totalCostMicros = inputCostMicros + outputCostMicros
+
     logger.log('\n📊 Classification Estimate (dry run)')
     logger.log(`   Candidates to classify: ${candidates.length}`)
-    logger.log(`   Estimated batches: ${Math.ceil(candidates.length / 30)}`)
+    logger.log(`   Estimated batches: ${estimatedBatches}`)
+    logger.log(`   Model: ${estimatedModel}`)
+    logger.log(
+      `   Estimated tokens: ${estimatedInputTokens.toLocaleString()} in / ${estimatedOutputTokens.toLocaleString()} out`
+    )
+    logger.log(`   Estimated cost: ${formatMicrosAsDollars(totalCostMicros)}`)
     return
   }
 
 
@@ -6,13 +6,17 @@
  */
 
 import { countTokens } from '../../classifier/tokenizer'
+import {
+  calculateEmbeddingCost,
+  DEFAULT_EMBEDDING_MODELS,
+  formatMicrosAsDollars
+} from '../../costs'
 import type { ParsedMessage } from '../../types'
 import type { CLIArgs } from '../args'
 import { initCommand } from '../helpers'
 import type { Logger } from '../logger'
 import { stepEmbed } from '../steps/embed'
 
-const EMBEDDING_COST_PER_MILLION_TOKENS = 0.13
 const MIN_MESSAGE_LENGTH = 10
 
 function analyzeMessages(messages: readonly ParsedMessage[]): {
@@ -48,7 +52,8 @@ export async function cmdEmbed(args: CLIArgs, logger: Logger): Promise<void> {
 
   // Analyze messages to embed
   const { toEmbed, skipped, totalTokens } = analyzeMessages(messages)
-  const estimatedCost = (totalTokens / 1_000_000) * EMBEDDING_COST_PER_MILLION_TOKENS
+  const embeddingModel = DEFAULT_EMBEDDING_MODELS.openai
+  const estimatedCostMicros = calculateEmbeddingCost(embeddingModel, totalTokens)
   const batchCount = Math.ceil(toEmbed / 100)
 
   logger.log(`\n📊 Embedding Stats`)
@@ -57,7 +62,8 @@ export async function cmdEmbed(args: CLIArgs, logger: Logger): Promise<void> {
   logger.log(`   Skipped (too short): ${skipped.toLocaleString()}`)
   logger.log(`   Total tokens: ${totalTokens.toLocaleString()}`)
   logger.log(`   API batches: ${batchCount}`)
-  logger.log(`   Estimated cost: $${estimatedCost.toFixed(4)}`)
+  logger.log(`   Model: ${embeddingModel}`)
+  logger.log(`   Estimated cost: ${formatMicrosAsDollars(estimatedCostMicros)}`)
 
   if (args.dryRun) {
     logger.log('\n🏃 Dry run - no API calls made')
 
@@ -10,6 +10,7 @@
  */
 
 import { writeFile } from 'node:fs/promises'
+import { calculatePlacesPhotoCost, formatMicrosAsDollars } from '../../costs'
 import type { ImageResult } from '../../images/types'
 import type { GeocodedActivity } from '../../types'
 import type { CLIArgs } from '../args'
@@ -58,6 +59,34 @@ function logStatsSummary(stats: FetchImagesStats, logger: Logger): void {
   if (stats.failed > 0) logger.log(`   Not found: ${stats.failed}`)
 }
 
+function getEnabledImageSources(args: CLIArgs): string[] {
+  const sources: string[] = []
+  if (!args.skipMediaLibrary) sources.push('Media Library')
+  if (!args.skipWikipedia) sources.push('Wikipedia')
+  if (!args.skipPexels && process.env.PEXELS_API_KEY) sources.push('Pexels')
+  if (!args.skipPixabay && process.env.PIXABAY_API_KEY) sources.push('Pixabay')
+  if (!args.skipGooglePlaces && process.env.GOOGLE_MAPS_API_KEY) sources.push('Google Places')
+  return sources
+}
+
+function logDryRunEstimate(activityCount: number, args: CLIArgs, logger: Logger): void {
+  const sources = getEnabledImageSources(args)
+  const willUseGooglePlaces = !args.skipGooglePlaces && !!process.env.GOOGLE_MAPS_API_KEY
+  const estimatedPhotoCostMicros = willUseGooglePlaces ? calculatePlacesPhotoCost(activityCount) : 0
+
+  logger.log('\n📊 Image Fetch Estimate (dry run)')
+  logger.log(`   Activities: ${activityCount}`)
+  logger.log(`   Sources: ${sources.join(', ') || 'none'}`)
+  if (willUseGooglePlaces) {
+    logger.log(
+      `   Estimated cost (if all from Google): ${formatMicrosAsDollars(estimatedPhotoCostMicros)}`
+    )
+    logger.log(`   Note: Free sources (Media Library, Pixabay, Pexels) are tried first`)
+  } else {
+    logger.log(`   Estimated cost: $0.00 (only free sources enabled)`)
+  }
+}
+
 export async function cmdFetchImageUrls(args: CLIArgs, logger: Logger): Promise<void> {
   const { ctx, config } = await initCommandContext('Fetch Image URLs', args, logger)
 
@@ -76,15 +105,7 @@ export async function cmdFetchImageUrls(args: CLIArgs, logger: Logger): Promise<
 
   // Dry run: show stats and exit
   if (args.dryRun) {
-    logger.log('\n📊 Image Fetch Estimate (dry run)')
-    logger.log(`   Activities: ${geocodedActivities.length}`)
-    const sources: string[] = []
-    if (!args.skipMediaLibrary) sources.push('Media Library')
-    if (!args.skipWikipedia) sources.push('Wikipedia')
-    if (!args.skipPexels && process.env.PEXELS_API_KEY) sources.push('Pexels')
-    if (!args.skipPixabay && process.env.PIXABAY_API_KEY) sources.push('Pixabay')
-    if (!args.skipGooglePlaces && process.env.GOOGLE_MAPS_API_KEY) sources.push('Google Places')
-    logger.log(`   Sources: ${sources.join(', ') || 'none'}`)
+    logDryRunEstimate(geocodedActivities.length, args, logger)
     return
   }
 
 
@@ -7,6 +7,11 @@
 
 import { writeFile } from 'node:fs/promises'
 import { countTokens } from '../../classifier/tokenizer'
+import {
+  calculateEmbeddingCost,
+  DEFAULT_EMBEDDING_MODELS,
+  formatMicrosAsDollars
+} from '../../costs'
 import { extractCandidatesByEmbeddings, extractCandidatesByHeuristics } from '../../index'
 import type { CandidateMessage, ParsedMessage } from '../../types'
 import type { CLIArgs, ExtractionMethod } from '../args'
@@ -30,24 +35,26 @@ interface FilterStats {
   embeddingsMatches?: number | undefined
 }
 
-const EMBEDDING_COST_PER_MILLION_TOKENS = 0.13
-
-function estimateEmbeddingCost(messages: readonly ParsedMessage[], logger: Logger): void {
+function estimateEmbeddingCostForMessages(
+  messages: readonly ParsedMessage[],
+  logger: Logger
+): void {
   const messagesToEmbed = messages.filter((m) => m.content.length > 10)
   let totalTokens = 0
 
   for (const msg of messagesToEmbed) {
     totalTokens += countTokens(msg.content)
   }
 
-  const costDollars = (totalTokens / 1_000_000) * EMBEDDING_COST_PER_MILLION_TOKENS
+  const embeddingModel = DEFAULT_EMBEDDING_MODELS.openai
+  const costMicros = calculateEmbeddingCost(embeddingModel, totalTokens)
   const batchCount = Math.ceil(messagesToEmbed.length / 100)
 
-  logger.log('\n📊 Embedding Cost Estimate (text-embedding-3-large)')
+  logger.log(`\n📊 Embedding Cost Estimate (${embeddingModel})`)
   logger.log(`   Messages to embed: ${messagesToEmbed.length.toLocaleString()}`)
   logger.log(`   Total tokens: ${totalTokens.toLocaleString()}`)
   logger.log(`   API batches: ${batchCount}`)
-  logger.log(`   Estimated cost: $${costDollars.toFixed(4)}`)
+  logger.log(`   Estimated cost: ${formatMicrosAsDollars(costMicros)}`)
 }
 
 function formatCandidatesText(output: FilterOutput, logger: Logger, showAll: boolean): void {
@@ -191,7 +198,7 @@ export async function cmdFilter(args: CLIArgs, logger: Logger): Promise<void> {
 
   // Dry run: show cost estimate and exit
   if (args.dryRun && (args.method === 'embeddings' || args.method === 'both')) {
-    estimateEmbeddingCost(parseResult.messages, logger)
+    estimateEmbeddingCostForMessages(parseResult.messages, logger)
     return
   }
 
 
@@ -6,6 +6,7 @@
  */
 
 import { writeFile } from 'node:fs/promises'
+import { calculatePlacesLookupCost, formatMicrosAsDollars } from '../../costs'
 import { filterWithCoordinates } from '../../place-lookup/index'
 import { formatLocation, type GeocodedActivity } from '../../types'
 import type { CLIArgs } from '../args'
@@ -63,11 +64,15 @@ export async function cmdPlaceLookup(args: CLIArgs, logger: Logger): Promise<voi
 
   // Dry run: show stats and exit
   if (args.dryRun) {
+    const withLocation = classifiedActivities.filter((a) => formatLocation(a)).length
+    // Estimate cost: worst case all activities need place search
+    const estimatedCostMicros = calculatePlacesLookupCost(classifiedActivities.length)
+
     logger.log('\n📊 Place Lookup Estimate (dry run)')
     logger.log(`   Activities to look up: ${classifiedActivities.length}`)
-    const withLocation = classifiedActivities.filter((a) => formatLocation(a)).length
     logger.log(`   With location info: ${withLocation}`)
     logger.log(`   Without location: ${classifiedActivities.length - withLocation}`)
+    logger.log(`   Estimated cost (max): ${formatMicrosAsDollars(estimatedCostMicros)}`)
     return
   }