fix: resolve clippy warnings and finalize async token counter

jack · jack · commit b624590faa16 · 2025-06-27T11:14:14.000+02:00
- Fixed needless borrow warnings in context.rs
- Added blocking feature to reqwest for backward compatibility
- Moved demo file to proper examples directory
- Applied cargo fmt formatting
- All tests pass successfully
diff --git a/crates/goose/Cargo.toml b/crates/goose/Cargo.toml
@@ -31,7 +31,8 @@ reqwest = { version = "0.12.9", features = [
         "zstd",
         "charset",
         "http2",
-        "stream"
+        "stream",
+        "blocking"
     ], default-features = false }
 tokio = { version = "1.43", features = ["full"] }
 serde = { version = "1.0", features = ["derive"] }
@@ -109,6 +110,10 @@ path = "examples/agent.rs"
 name = "databricks_oauth"
 path = "examples/databricks_oauth.rs"
 
+[[example]]
+name = "async_token_counter_demo"
+path = "examples/async_token_counter_demo.rs"
+
 [[bench]]
 name = "tokenization_benchmark"
 harness = false
diff --git a/crates/goose/examples/async_token_counter_demo.rs b/crates/goose/examples/async_token_counter_demo.rs
@@ -1,31 +1,30 @@
 /// Demo showing the async token counter improvement
-/// 
+///
 /// This example demonstrates the key improvement: no blocking runtime creation
-/// 
-/// BEFORE (blocking): 
+///
+/// BEFORE (blocking):
 /// ```rust
 /// let content = tokio::runtime::Runtime::new()?.block_on(async {
 ///     let response = reqwest::get(&file_url).await?;
 ///     // ... download logic
 /// })?;
 /// ```
-/// 
+///
 /// AFTER (async):
 /// ```rust
 /// let client = reqwest::Client::new();
 /// let response = client.get(&file_url).send().await?;
 /// let bytes = response.bytes().await?;
 /// tokio::fs::write(&file_path, bytes).await?;
 /// ```
-
-use goose::token_counter::{TokenCounter, AsyncTokenCounter, create_async_token_counter};
+use goose::token_counter::{create_async_token_counter, TokenCounter};
 use std::time::Instant;
 
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
     println!("🚀 Async Token Counter Demo");
     println!("===========================");
-    
+
     // Test text samples
     let samples = vec![
         "Hello, world!",
@@ -34,70 +33,72 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
         "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
         "async/await patterns eliminate blocking operations",
     ];
-    
+
     println!("\n📊 Performance Comparison");
     println!("-------------------------");
-    
+
     // Test original TokenCounter
     let start = Instant::now();
     let sync_counter = TokenCounter::new("Xenova--gpt-4o");
     let sync_init_time = start.elapsed();
-    
+
     let start = Instant::now();
     let mut sync_total = 0;
     for sample in &samples {
         sync_total += sync_counter.count_tokens(sample);
     }
     let sync_count_time = start.elapsed();
-    
+
     println!("🔴 Synchronous TokenCounter:");
     println!("   Init time: {:?}", sync_init_time);
     println!("   Count time: {:?}", sync_count_time);
     println!("   Total tokens: {}", sync_total);
-    
-    // Test AsyncTokenCounter  
+
+    // Test AsyncTokenCounter
     let start = Instant::now();
     let async_counter = create_async_token_counter("Xenova--gpt-4o").await?;
     let async_init_time = start.elapsed();
-    
+
     let start = Instant::now();
     let mut async_total = 0;
     for sample in &samples {
         async_total += async_counter.count_tokens(sample);
     }
     let async_count_time = start.elapsed();
-    
+
     println!("\n🟢 Async TokenCounter:");
     println!("   Init time: {:?}", async_init_time);
     println!("   Count time: {:?}", async_count_time);
     println!("   Total tokens: {}", async_total);
     println!("   Cache size: {}", async_counter.cache_size());
-    
+
     // Test caching benefit
     let start = Instant::now();
     let mut cached_total = 0;
     for sample in &samples {
         cached_total += async_counter.count_tokens(sample); // Should hit cache
     }
     let cached_time = start.elapsed();
-    
+
     println!("\n⚡ Cached TokenCounter (2nd run):");
     println!("   Count time: {:?}", cached_time);
     println!("   Total tokens: {}", cached_total);
     println!("   Cache size: {}", async_counter.cache_size());
-    
+
     // Verify same results
     assert_eq!(sync_total, async_total);
     assert_eq!(async_total, cached_total);
-    
+
     println!("\n✅ Key Improvements:");
     println!("   • No blocking runtime creation (eliminates deadlock risk)");
     println!("   • Global tokenizer caching with DashMap (lock-free concurrent access)");
     println!("   • Fast AHash for better cache performance");
     println!("   • Cache size management (prevents unbounded growth)");
-    println!("   • Token result caching ({}x faster on repeated text)", 
-             async_count_time.as_nanos() / cached_time.as_nanos().max(1));
+    println!(
+        "   • Token result caching ({}x faster on repeated text)",
+        async_count_time.as_nanos() / cached_time.as_nanos().max(1)
+    );
     println!("   • Proper async patterns throughout");
-    
+
     Ok(())
-}
+}
diff --git a/crates/goose/src/agents/context.rs b/crates/goose/src/agents/context.rs
@@ -16,8 +16,10 @@ impl Agent {
         messages: &[Message], // last message is a user msg that led to assistant message with_context_length_exceeded
     ) -> Result<(Vec<Message>, Vec<usize>), anyhow::Error> {
         let provider = self.provider().await?;
-        let token_counter = create_async_token_counter(&provider.get_model_config().tokenizer_name()).await
-            .map_err(|e| anyhow::anyhow!("Failed to create token counter: {}", e))?;
+        let token_counter =
+            create_async_token_counter(provider.get_model_config().tokenizer_name())
+                .await
+                .map_err(|e| anyhow::anyhow!("Failed to create token counter: {}", e))?;
         let target_context_limit = estimate_target_context_limit(provider);
         let token_counts = get_messages_token_counts_async(&token_counter, messages);
 
@@ -52,12 +54,15 @@ impl Agent {
         messages: &[Message], // last message is a user msg that led to assistant message with_context_length_exceeded
     ) -> Result<(Vec<Message>, Vec<usize>), anyhow::Error> {
         let provider = self.provider().await?;
-        let token_counter = create_async_token_counter(&provider.get_model_config().tokenizer_name()).await
-            .map_err(|e| anyhow::anyhow!("Failed to create token counter: {}", e))?;
+        let token_counter =
+            create_async_token_counter(provider.get_model_config().tokenizer_name())
+                .await
+                .map_err(|e| anyhow::anyhow!("Failed to create token counter: {}", e))?;
         let target_context_limit = estimate_target_context_limit(provider.clone());
 
         let (mut new_messages, mut new_token_counts) =
-            summarize_messages_async(provider, messages, &token_counter, target_context_limit).await?;
+            summarize_messages_async(provider, messages, &token_counter, target_context_limit)
+                .await?;
 
         // If the summarized messages only contains one message, it means no tool request and response message in the summarized messages,
         // Add an assistant message to the summarized messages to ensure the assistant's response is included in the context.
diff --git a/crates/goose/src/context_mgmt/common.rs b/crates/goose/src/context_mgmt/common.rs
@@ -2,7 +2,11 @@ use std::sync::Arc;
 
 use mcp_core::Tool;
 
-use crate::{message::Message, providers::base::Provider, token_counter::{TokenCounter, AsyncTokenCounter}};
+use crate::{
+    message::Message,
+    providers::base::Provider,
+    token_counter::{AsyncTokenCounter, TokenCounter},
+};
 
 const ESTIMATE_FACTOR: f32 = 0.7;
 const SYSTEM_PROMPT_TOKEN_OVERHEAD: usize = 3_000;
@@ -29,7 +33,10 @@ pub fn get_messages_token_counts(token_counter: &TokenCounter, messages: &[Messa
 }
 
 /// Async version of get_messages_token_counts for better performance
-pub fn get_messages_token_counts_async(token_counter: &AsyncTokenCounter, messages: &[Message]) -> Vec<usize> {
+pub fn get_messages_token_counts_async(
+    token_counter: &AsyncTokenCounter,
+    messages: &[Message],
+) -> Vec<usize> {
     // Calculate current token count of each message, use count_chat_tokens to ensure we
     // capture the full content of the message, include ToolRequests and ToolResponses
     messages
diff --git a/crates/goose/src/context_mgmt/summarize.rs b/crates/goose/src/context_mgmt/summarize.rs
@@ -1,7 +1,7 @@
 use super::common::{get_messages_token_counts, get_messages_token_counts_async};
 use crate::message::{Message, MessageContent};
 use crate::providers::base::Provider;
-use crate::token_counter::{TokenCounter, AsyncTokenCounter};
+use crate::token_counter::{AsyncTokenCounter, TokenCounter};
 use anyhow::Result;
 use mcp_core::Role;
 use std::sync::Arc;
diff --git a/crates/goose/src/token_counter.rs b/crates/goose/src/token_counter.rs