Skip to content

Commit b31551e

Browse files
authored
fix: support newer Gemini CLI and Copilot session formats (#155)
1 parent 9db43c2 commit b31551e

4 files changed

Lines changed: 213 additions & 41 deletions

File tree

src/analyzers/copilot.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,13 @@ impl CopilotAnalyzer {
5454
#[serde(rename_all = "camelCase")]
5555
struct CopilotChatSession {
5656
version: u32,
57-
requester_username: String,
58-
responder_username: String,
59-
initial_location: String,
57+
#[serde(default)]
58+
requester_username: Option<String>,
59+
#[serde(default)]
60+
responder_username: Option<String>,
61+
#[serde(default)]
62+
initial_location: Option<String>,
63+
#[serde(default)]
6064
requests: Vec<CopilotRequest>,
6165
#[serde(default)]
6266
session_id: Option<String>,

src/analyzers/gemini_cli.rs

Lines changed: 84 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use chrono::{DateTime, Utc};
99
use rayon::prelude::*;
1010
use serde::{Deserialize, Serialize};
1111
use simd_json::prelude::*;
12+
use std::collections::HashMap;
1213
use std::path::{Path, PathBuf};
1314
use walkdir::WalkDir;
1415

@@ -79,6 +80,13 @@ enum GeminiCliMessage {
7980
#[serde(default)]
8081
content: Option<GeminiCliContent>,
8182
},
83+
Warning {
84+
id: String,
85+
#[serde(deserialize_with = "deserialize_utc_timestamp")]
86+
timestamp: DateTime<Utc>,
87+
#[serde(default)]
88+
content: Option<GeminiCliContent>,
89+
},
8290
}
8391

8492
/// A single `Part` from Gemini CLI's multi-modal content. Only `text` is
@@ -245,19 +253,29 @@ fn calculate_gemini_cost(tokens: &GeminiCliTokens, model_name: &str) -> f64 {
245253
input_cost + output_cost + cache_cost
246254
}
247255

248-
// JSON session parsing (not JSONL)
249-
fn parse_json_session_file(file_path: &Path) -> Result<Vec<ConversationMessage>> {
256+
fn is_gemini_cli_chat_path(path: &Path) -> bool {
257+
path.is_file()
258+
&& path
259+
.extension()
260+
.and_then(|ext| ext.to_str())
261+
.is_some_and(|ext| ext == "json" || ext == "jsonl")
262+
&& path
263+
.ancestors()
264+
.skip(1)
265+
.any(|ancestor| ancestor.file_name().is_some_and(|name| name == "chats"))
266+
}
267+
268+
fn messages_from_session(
269+
file_path: &Path,
270+
messages: Vec<GeminiCliMessage>,
271+
) -> Vec<ConversationMessage> {
250272
let project_hash = extract_and_hash_project_id_gemini_cli(file_path);
251273
let file_path_str = file_path.to_string_lossy();
274+
let conversation_hash = hash_text(&file_path.to_string_lossy());
252275
let mut entries = Vec::new();
253276
let mut fallback_session_name: Option<String> = None;
254277

255-
// Parse the complete session JSON
256-
let session: GeminiCliSession =
257-
simd_json::from_slice(&mut std::fs::read_to_string(file_path)?.into_bytes())?;
258-
259-
// Process each message in the session
260-
for message in session.messages {
278+
for message in messages {
261279
match message {
262280
GeminiCliMessage::User {
263281
id: _,
@@ -290,7 +308,7 @@ fn parse_json_session_file(file_path: &Path) -> Result<Vec<ConversationMessage>>
290308
file_path_str,
291309
timestamp.to_rfc3339()
292310
)),
293-
conversation_hash: hash_text(&file_path.to_string_lossy()),
311+
conversation_hash: conversation_hash.clone(),
294312
model: None,
295313
stats: Stats::default(),
296314
role: MessageRole::User,
@@ -309,7 +327,6 @@ fn parse_json_session_file(file_path: &Path) -> Result<Vec<ConversationMessage>>
309327
} => {
310328
let mut stats = extract_tool_stats(&tool_calls);
311329

312-
// Update stats with token information
313330
stats.input_tokens = tokens.input;
314331
stats.output_tokens = tokens.output;
315332
stats.reasoning_tokens = tokens.thoughts;
@@ -330,7 +347,7 @@ fn parse_json_session_file(file_path: &Path) -> Result<Vec<ConversationMessage>>
330347
)),
331348
date: timestamp,
332349
project_hash: project_hash.clone(),
333-
conversation_hash: hash_text(&file_path.to_string_lossy()),
350+
conversation_hash: conversation_hash.clone(),
334351
stats,
335352
role: MessageRole::Assistant,
336353
uuid: None,
@@ -341,7 +358,53 @@ fn parse_json_session_file(file_path: &Path) -> Result<Vec<ConversationMessage>>
341358
}
342359
}
343360

344-
Ok(entries)
361+
entries
362+
}
363+
364+
// JSON session parsing (not JSONL)
365+
fn parse_json_session_file(file_path: &Path) -> Result<Vec<ConversationMessage>> {
366+
let session: GeminiCliSession =
367+
simd_json::from_slice(&mut std::fs::read_to_string(file_path)?.into_bytes())?;
368+
Ok(messages_from_session(file_path, session.messages))
369+
}
370+
371+
fn parse_jsonl_session_file(file_path: &Path) -> Result<Vec<ConversationMessage>> {
372+
let content = std::fs::read_to_string(file_path)?;
373+
let mut message_order = Vec::new();
374+
let mut latest_messages = HashMap::new();
375+
376+
for line in content.lines().filter(|line| !line.trim().is_empty()) {
377+
let mut line_bytes = line.as_bytes().to_vec();
378+
let value: simd_json::OwnedValue = simd_json::from_slice(&mut line_bytes)?;
379+
380+
if value.get("$set").is_some() {
381+
continue;
382+
}
383+
384+
if value.get("type").is_none() || value.get("id").is_none() {
385+
continue;
386+
}
387+
388+
let id = match value.get("id").and_then(|v| v.as_str()) {
389+
Some(id) => id.to_string(),
390+
None => continue,
391+
};
392+
393+
let mut message_bytes = line.as_bytes().to_vec();
394+
let message: GeminiCliMessage = simd_json::from_slice(&mut message_bytes)?;
395+
396+
if !latest_messages.contains_key(&id) {
397+
message_order.push(id.clone());
398+
}
399+
latest_messages.insert(id, message);
400+
}
401+
402+
let messages = message_order
403+
.into_iter()
404+
.filter_map(|id| latest_messages.remove(&id))
405+
.collect();
406+
407+
Ok(messages_from_session(file_path, messages))
345408
}
346409

347410
#[async_trait]
@@ -365,16 +428,9 @@ impl Analyzer for GeminiCliAnalyzer {
365428
let sources = Self::data_dir()
366429
.filter(|d| d.is_dir())
367430
.into_iter()
368-
.flat_map(|tmp_dir| WalkDir::new(tmp_dir).min_depth(3).max_depth(3).into_iter())
431+
.flat_map(|tmp_dir| WalkDir::new(tmp_dir).into_iter())
369432
.filter_map(|e| e.ok())
370-
.filter(|e| {
371-
e.file_type().is_file()
372-
&& e.path().extension().is_some_and(|ext| ext == "json")
373-
&& e.path()
374-
.parent()
375-
.and_then(|p| p.file_name())
376-
.is_some_and(|name| name == "chats")
377-
})
433+
.filter(|e| is_gemini_cli_chat_path(e.path()))
378434
.map(|e| DataSource {
379435
path: e.into_path(),
380436
})
@@ -387,20 +443,16 @@ impl Analyzer for GeminiCliAnalyzer {
387443
Self::data_dir()
388444
.filter(|d| d.is_dir())
389445
.into_iter()
390-
.flat_map(|tmp_dir| WalkDir::new(tmp_dir).min_depth(3).max_depth(3).into_iter())
446+
.flat_map(|tmp_dir| WalkDir::new(tmp_dir).into_iter())
391447
.filter_map(|e| e.ok())
392-
.any(|e| {
393-
e.file_type().is_file()
394-
&& e.path().extension().is_some_and(|ext| ext == "json")
395-
&& e.path()
396-
.parent()
397-
.and_then(|p| p.file_name())
398-
.is_some_and(|name| name == "chats")
399-
})
448+
.any(|e| is_gemini_cli_chat_path(e.path()))
400449
}
401450

402451
fn parse_source(&self, source: &DataSource) -> Result<Vec<ConversationMessage>> {
403-
parse_json_session_file(&source.path)
452+
match source.path.extension().and_then(|ext| ext.to_str()) {
453+
Some("jsonl") => parse_jsonl_session_file(&source.path),
454+
_ => parse_json_session_file(&source.path),
455+
}
404456
}
405457

406458
fn parse_sources_parallel(&self, sources: &[DataSource]) -> Vec<ConversationMessage> {
@@ -419,13 +471,7 @@ impl Analyzer for GeminiCliAnalyzer {
419471
}
420472

421473
fn is_valid_data_path(&self, path: &Path) -> bool {
422-
// Must be a .json file in a "chats" directory
423-
path.is_file()
424-
&& path.extension().is_some_and(|ext| ext == "json")
425-
&& path
426-
.parent()
427-
.and_then(|p| p.file_name())
428-
.is_some_and(|name| name == "chats")
474+
is_gemini_cli_chat_path(path)
429475
}
430476

431477
fn contribution_strategy(&self) -> ContributionStrategy {

src/analyzers/tests/copilot.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,25 @@ fn test_copilot_glob_patterns() {
199199
"VS Code Copilot patterns should not include Copilot CLI event files"
200200
);
201201
}
202+
203+
#[test]
204+
fn test_parse_empty_copilot_session_without_optional_metadata_fields() {
205+
let dir = tempfile::tempdir().unwrap();
206+
let session_path = dir.path().join("empty-session.json");
207+
std::fs::write(
208+
&session_path,
209+
r#"{
210+
"version": 3,
211+
"requests": [],
212+
"sessionId": "b4c76ee0-d6af-4c61-a85b-2b092ebd86fd",
213+
"creationDate": 1768224201981,
214+
"lastMessageDate": 1768224201981,
215+
"hasPendingEdits": false
216+
}"#,
217+
)
218+
.unwrap();
219+
220+
let messages =
221+
parse_copilot_session_file(&session_path).expect("empty sessions should parse cleanly");
222+
assert!(messages.is_empty());
223+
}

src/analyzers/tests/gemini_cli.rs

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,3 +345,103 @@ async fn test_gemini_cli_issue_137_regression() {
345345
.expect("issue #137 regression: parse_source must accept array-of-parts content");
346346
assert_eq!(parsed.len(), 2);
347347
}
348+
349+
#[tokio::test]
350+
async fn test_gemini_cli_warning_messages_are_ignored() {
351+
let dir = tempdir().unwrap();
352+
let project_dir = dir.path().join("tmp").join("project-warning").join("chats");
353+
let json_content = r#"{
354+
"sessionId": "sess-warning",
355+
"projectHash": "proj-hash",
356+
"startTime": "2026-03-20T08:00:00Z",
357+
"lastUpdated": "2026-03-20T08:05:00Z",
358+
"messages": [
359+
{
360+
"type": "user",
361+
"id": "u-1",
362+
"timestamp": "2026-03-20T08:00:00Z",
363+
"content": [{"text": "run the tests"}]
364+
},
365+
{
366+
"type": "warning",
367+
"id": "w-1",
368+
"timestamp": "2026-03-20T08:00:01Z",
369+
"content": [{"text": "tool output warning"}]
370+
},
371+
{
372+
"type": "gemini",
373+
"id": "g-1",
374+
"timestamp": "2026-03-20T08:00:05Z",
375+
"content": "done",
376+
"model": "gemini-3-flash-preview",
377+
"tokens": {
378+
"input": 10,
379+
"output": 20,
380+
"thoughts": 5,
381+
"cached": 0,
382+
"tool": 0,
383+
"total": 35
384+
}
385+
}
386+
]
387+
}"#;
388+
let session_path = write_session(&project_dir, json_content);
389+
390+
let analyzer = GeminiCliAnalyzer::new();
391+
let source = crate::analyzer::DataSource { path: session_path };
392+
let messages = analyzer
393+
.parse_source(&source)
394+
.expect("warning message types should not break parsing");
395+
396+
assert_eq!(messages.len(), 2);
397+
assert_eq!(messages[0].role, crate::types::MessageRole::User);
398+
assert_eq!(messages[1].role, crate::types::MessageRole::Assistant);
399+
}
400+
401+
#[tokio::test]
402+
async fn test_gemini_cli_jsonl_latest_message_version_wins() {
403+
let dir = tempdir().unwrap();
404+
let session_dir = dir
405+
.path()
406+
.join("tmp")
407+
.join("project-jsonl")
408+
.join("chats")
409+
.join("9e43d548-335e-4ad0-b797-4f8bce36e08c");
410+
std::fs::create_dir_all(&session_dir).unwrap();
411+
let session_path = session_dir.join("06fhku.jsonl");
412+
let jsonl_content = r#"{"sessionId":"sess-jsonl","projectHash":"proj-hash","startTime":"2026-04-28T16:10:11.637Z","lastUpdated":"2026-04-28T16:10:11.637Z","kind":"main"}
413+
{"id":"u-1","timestamp":"2026-04-28T16:11:14.988Z","type":"user","content":[{"text":"inspect this cache design"}]}
414+
{"$set":{"lastUpdated":"2026-04-28T16:11:14.989Z"}}
415+
{"id":"g-1","timestamp":"2026-04-28T16:11:38.569Z","type":"gemini","content":"first draft","thoughts":[],"tokens":{"input":20,"output":30,"cached":0,"thoughts":4,"tool":0,"total":54},"model":"gemini-3-flash-preview"}
416+
{"$set":{"lastUpdated":"2026-04-28T16:11:38.569Z"}}
417+
{"id":"g-1","timestamp":"2026-04-28T16:11:38.569Z","type":"gemini","content":"final draft","thoughts":[],"tokens":{"input":20,"output":30,"cached":0,"thoughts":4,"tool":0,"total":54},"model":"gemini-3-flash-preview","toolCalls":[{"id":"call-1","name":"run_shell_command","args":{"command":"rg cache"},"result":[]}]}
418+
"#;
419+
let mut file = File::create(&session_path).unwrap();
420+
file.write_all(jsonl_content.as_bytes()).unwrap();
421+
422+
let analyzer = GeminiCliAnalyzer::new();
423+
let source = crate::analyzer::DataSource {
424+
path: session_path.clone(),
425+
};
426+
let messages = analyzer
427+
.parse_source(&source)
428+
.expect("jsonl sessions should parse successfully");
429+
430+
assert_eq!(messages.len(), 2);
431+
assert_eq!(messages[0].role, crate::types::MessageRole::User);
432+
assert_eq!(
433+
messages[0].session_name.as_deref(),
434+
Some("inspect this cache design")
435+
);
436+
437+
let assistant = messages
438+
.iter()
439+
.find(|m| m.role == crate::types::MessageRole::Assistant)
440+
.unwrap();
441+
assert_eq!(assistant.stats.input_tokens, 20);
442+
assert_eq!(assistant.stats.output_tokens, 30);
443+
assert_eq!(assistant.stats.reasoning_tokens, 4);
444+
assert_eq!(assistant.stats.tool_calls, 1);
445+
assert_eq!(assistant.stats.terminal_commands, 1);
446+
assert!(analyzer.is_valid_data_path(&session_path));
447+
}

0 commit comments

Comments
 (0)