@@ -9,6 +9,7 @@ use chrono::{DateTime, Utc};
99use rayon:: prelude:: * ;
1010use serde:: { Deserialize , Serialize } ;
1111use simd_json:: prelude:: * ;
12+ use std:: collections:: HashMap ;
1213use std:: path:: { Path , PathBuf } ;
1314use walkdir:: WalkDir ;
1415
@@ -79,6 +80,13 @@ enum GeminiCliMessage {
7980 #[ serde( default ) ]
8081 content : Option < GeminiCliContent > ,
8182 } ,
83+ Warning {
84+ id : String ,
85+ #[ serde( deserialize_with = "deserialize_utc_timestamp" ) ]
86+ timestamp : DateTime < Utc > ,
87+ #[ serde( default ) ]
88+ content : Option < GeminiCliContent > ,
89+ } ,
8290}
8391
8492/// A single `Part` from Gemini CLI's multi-modal content. Only `text` is
@@ -245,19 +253,29 @@ fn calculate_gemini_cost(tokens: &GeminiCliTokens, model_name: &str) -> f64 {
245253 input_cost + output_cost + cache_cost
246254}
247255
248- // JSON session parsing (not JSONL)
249- fn parse_json_session_file ( file_path : & Path ) -> Result < Vec < ConversationMessage > > {
256+ fn is_gemini_cli_chat_path ( path : & Path ) -> bool {
257+ path. is_file ( )
258+ && path
259+ . extension ( )
260+ . and_then ( |ext| ext. to_str ( ) )
261+ . is_some_and ( |ext| ext == "json" || ext == "jsonl" )
262+ && path
263+ . ancestors ( )
264+ . skip ( 1 )
265+ . any ( |ancestor| ancestor. file_name ( ) . is_some_and ( |name| name == "chats" ) )
266+ }
267+
268+ fn messages_from_session (
269+ file_path : & Path ,
270+ messages : Vec < GeminiCliMessage > ,
271+ ) -> Vec < ConversationMessage > {
250272 let project_hash = extract_and_hash_project_id_gemini_cli ( file_path) ;
251273 let file_path_str = file_path. to_string_lossy ( ) ;
274+ let conversation_hash = hash_text ( & file_path. to_string_lossy ( ) ) ;
252275 let mut entries = Vec :: new ( ) ;
253276 let mut fallback_session_name: Option < String > = None ;
254277
255- // Parse the complete session JSON
256- let session: GeminiCliSession =
257- simd_json:: from_slice ( & mut std:: fs:: read_to_string ( file_path) ?. into_bytes ( ) ) ?;
258-
259- // Process each message in the session
260- for message in session. messages {
278+ for message in messages {
261279 match message {
262280 GeminiCliMessage :: User {
263281 id : _,
@@ -290,7 +308,7 @@ fn parse_json_session_file(file_path: &Path) -> Result<Vec<ConversationMessage>>
290308 file_path_str,
291309 timestamp. to_rfc3339( )
292310 ) ) ,
293- conversation_hash : hash_text ( & file_path . to_string_lossy ( ) ) ,
311+ conversation_hash : conversation_hash . clone ( ) ,
294312 model : None ,
295313 stats : Stats :: default ( ) ,
296314 role : MessageRole :: User ,
@@ -309,7 +327,6 @@ fn parse_json_session_file(file_path: &Path) -> Result<Vec<ConversationMessage>>
309327 } => {
310328 let mut stats = extract_tool_stats ( & tool_calls) ;
311329
312- // Update stats with token information
313330 stats. input_tokens = tokens. input ;
314331 stats. output_tokens = tokens. output ;
315332 stats. reasoning_tokens = tokens. thoughts ;
@@ -330,7 +347,7 @@ fn parse_json_session_file(file_path: &Path) -> Result<Vec<ConversationMessage>>
330347 ) ) ,
331348 date : timestamp,
332349 project_hash : project_hash. clone ( ) ,
333- conversation_hash : hash_text ( & file_path . to_string_lossy ( ) ) ,
350+ conversation_hash : conversation_hash . clone ( ) ,
334351 stats,
335352 role : MessageRole :: Assistant ,
336353 uuid : None ,
@@ -341,7 +358,53 @@ fn parse_json_session_file(file_path: &Path) -> Result<Vec<ConversationMessage>>
341358 }
342359 }
343360
344- Ok ( entries)
361+ entries
362+ }
363+
364+ // JSON session parsing (not JSONL)
365+ fn parse_json_session_file ( file_path : & Path ) -> Result < Vec < ConversationMessage > > {
366+ let session: GeminiCliSession =
367+ simd_json:: from_slice ( & mut std:: fs:: read_to_string ( file_path) ?. into_bytes ( ) ) ?;
368+ Ok ( messages_from_session ( file_path, session. messages ) )
369+ }
370+
371+ fn parse_jsonl_session_file ( file_path : & Path ) -> Result < Vec < ConversationMessage > > {
372+ let content = std:: fs:: read_to_string ( file_path) ?;
373+ let mut message_order = Vec :: new ( ) ;
374+ let mut latest_messages = HashMap :: new ( ) ;
375+
376+ for line in content. lines ( ) . filter ( |line| !line. trim ( ) . is_empty ( ) ) {
377+ let mut line_bytes = line. as_bytes ( ) . to_vec ( ) ;
378+ let value: simd_json:: OwnedValue = simd_json:: from_slice ( & mut line_bytes) ?;
379+
380+ if value. get ( "$set" ) . is_some ( ) {
381+ continue ;
382+ }
383+
384+ if value. get ( "type" ) . is_none ( ) || value. get ( "id" ) . is_none ( ) {
385+ continue ;
386+ }
387+
388+ let id = match value. get ( "id" ) . and_then ( |v| v. as_str ( ) ) {
389+ Some ( id) => id. to_string ( ) ,
390+ None => continue ,
391+ } ;
392+
393+ let mut message_bytes = line. as_bytes ( ) . to_vec ( ) ;
394+ let message: GeminiCliMessage = simd_json:: from_slice ( & mut message_bytes) ?;
395+
396+ if !latest_messages. contains_key ( & id) {
397+ message_order. push ( id. clone ( ) ) ;
398+ }
399+ latest_messages. insert ( id, message) ;
400+ }
401+
402+ let messages = message_order
403+ . into_iter ( )
404+ . filter_map ( |id| latest_messages. remove ( & id) )
405+ . collect ( ) ;
406+
407+ Ok ( messages_from_session ( file_path, messages) )
345408}
346409
347410#[ async_trait]
@@ -365,16 +428,9 @@ impl Analyzer for GeminiCliAnalyzer {
365428 let sources = Self :: data_dir ( )
366429 . filter ( |d| d. is_dir ( ) )
367430 . into_iter ( )
368- . flat_map ( |tmp_dir| WalkDir :: new ( tmp_dir) . min_depth ( 3 ) . max_depth ( 3 ) . into_iter ( ) )
431+ . flat_map ( |tmp_dir| WalkDir :: new ( tmp_dir) . into_iter ( ) )
369432 . filter_map ( |e| e. ok ( ) )
370- . filter ( |e| {
371- e. file_type ( ) . is_file ( )
372- && e. path ( ) . extension ( ) . is_some_and ( |ext| ext == "json" )
373- && e. path ( )
374- . parent ( )
375- . and_then ( |p| p. file_name ( ) )
376- . is_some_and ( |name| name == "chats" )
377- } )
433+ . filter ( |e| is_gemini_cli_chat_path ( e. path ( ) ) )
378434 . map ( |e| DataSource {
379435 path : e. into_path ( ) ,
380436 } )
@@ -387,20 +443,16 @@ impl Analyzer for GeminiCliAnalyzer {
387443 Self :: data_dir ( )
388444 . filter ( |d| d. is_dir ( ) )
389445 . into_iter ( )
390- . flat_map ( |tmp_dir| WalkDir :: new ( tmp_dir) . min_depth ( 3 ) . max_depth ( 3 ) . into_iter ( ) )
446+ . flat_map ( |tmp_dir| WalkDir :: new ( tmp_dir) . into_iter ( ) )
391447 . filter_map ( |e| e. ok ( ) )
392- . any ( |e| {
393- e. file_type ( ) . is_file ( )
394- && e. path ( ) . extension ( ) . is_some_and ( |ext| ext == "json" )
395- && e. path ( )
396- . parent ( )
397- . and_then ( |p| p. file_name ( ) )
398- . is_some_and ( |name| name == "chats" )
399- } )
448+ . any ( |e| is_gemini_cli_chat_path ( e. path ( ) ) )
400449 }
401450
402451 fn parse_source ( & self , source : & DataSource ) -> Result < Vec < ConversationMessage > > {
403- parse_json_session_file ( & source. path )
452+ match source. path . extension ( ) . and_then ( |ext| ext. to_str ( ) ) {
453+ Some ( "jsonl" ) => parse_jsonl_session_file ( & source. path ) ,
454+ _ => parse_json_session_file ( & source. path ) ,
455+ }
404456 }
405457
406458 fn parse_sources_parallel ( & self , sources : & [ DataSource ] ) -> Vec < ConversationMessage > {
@@ -419,13 +471,7 @@ impl Analyzer for GeminiCliAnalyzer {
419471 }
420472
421473 fn is_valid_data_path ( & self , path : & Path ) -> bool {
422- // Must be a .json file in a "chats" directory
423- path. is_file ( )
424- && path. extension ( ) . is_some_and ( |ext| ext == "json" )
425- && path
426- . parent ( )
427- . and_then ( |p| p. file_name ( ) )
428- . is_some_and ( |name| name == "chats" )
474+ is_gemini_cli_chat_path ( path)
429475 }
430476
431477 fn contribution_strategy ( & self ) -> ContributionStrategy {
0 commit comments