@@ -8,9 +8,14 @@ use anyhow::{Context, Result};
88use async_trait:: async_trait;
99use chrono:: { DateTime , Utc } ;
1010use serde:: { Deserialize , Serialize } ;
11+ use std:: collections:: HashSet ;
1112
1213use crate :: sniff:: reader:: LogEntry ;
1314
15+ const MAX_PROMPT_LINES : usize = 200 ;
16+ const MAX_PROMPT_CHARS : usize = 16_000 ;
17+ const MAX_LINE_CHARS : usize = 500 ;
18+
1419/// Summary produced by AI analysis of log entries
1520#[ derive( Debug , Clone , Serialize , Deserialize ) ]
1621pub struct LogSummary {
@@ -69,6 +74,17 @@ pub struct OpenAiAnalyzer {
6974}
7075
7176impl OpenAiAnalyzer {
77+ fn push_selected_index (
78+ selected_indices : & mut Vec < usize > ,
79+ seen : & mut HashSet < usize > ,
80+ idx : usize ,
81+ total_entries : usize ,
82+ ) {
83+ if idx < total_entries && seen. insert ( idx) {
84+ selected_indices. push ( idx) ;
85+ }
86+ }
87+
7288 pub fn new ( api_url : String , api_key : Option < String > , model : String ) -> Self {
7389 Self {
7490 api_url,
@@ -79,8 +95,21 @@ impl OpenAiAnalyzer {
7995 }
8096
8197 fn build_prompt ( entries : & [ LogEntry ] ) -> String {
82- let lines: Vec < & str > = entries. iter ( ) . map ( |e| e. line . as_str ( ) ) . collect ( ) ;
83- let log_block = lines. join ( "\n " ) ;
98+ let prompt_entries = Self :: select_prompt_entries ( entries) ;
99+ let included_count = prompt_entries. len ( ) ;
100+ let included_chars: usize = prompt_entries. iter ( ) . map ( |line| line. len ( ) ) . sum ( ) ;
101+ let was_truncated = included_count < entries. len ( ) ;
102+ let truncation_note = if was_truncated {
103+ format ! (
104+ "Only {} of {} entries are included below to keep the request bounded. \
105+ Prioritize the included lines when identifying anomalies, but keep the full batch size in mind.\n ",
106+ included_count,
107+ entries. len( )
108+ )
109+ } else {
110+ String :: new ( )
111+ } ;
112+ let log_block = prompt_entries. join ( "\n " ) ;
84113
85114 format ! (
86115 "Analyze these log entries and provide a JSON response with:\n \
@@ -90,9 +119,107 @@ impl OpenAiAnalyzer {
90119 4. \" key_events\" : Array of important events (max 5)\n \
91120 5. \" anomalies\" : Array of objects with \" description\" , \" severity\" (Low/Medium/High/Critical), \" sample_line\" \n \n \
92121 Respond ONLY with valid JSON, no markdown.\n \n \
93- Log entries:\n {}", log_block
122+ Batch metadata:\n \
123+ - total_entries: {}\n \
124+ - included_entries: {}\n \
125+ - included_characters: {}\n \
126+ {}\
127+ Log entries:\n {}",
128+ entries. len( ) ,
129+ included_count,
130+ included_chars,
131+ truncation_note,
132+ log_block
94133 )
95134 }
135+
136+ fn select_prompt_entries ( entries : & [ LogEntry ] ) -> Vec < String > {
137+ if entries. is_empty ( ) {
138+ return Vec :: new ( ) ;
139+ }
140+
141+ let mut selected_indices = Vec :: new ( ) ;
142+ let mut seen = HashSet :: new ( ) ;
143+
144+ for ( idx, entry) in entries. iter ( ) . enumerate ( ) {
145+ if Self :: is_priority_line ( & entry. line ) {
146+ Self :: push_selected_index ( & mut selected_indices, & mut seen, idx, entries. len ( ) ) ;
147+ }
148+ }
149+
150+ let recent_window_start = entries. len ( ) . saturating_sub ( MAX_PROMPT_LINES ) ;
151+ for idx in recent_window_start..entries. len ( ) {
152+ Self :: push_selected_index ( & mut selected_indices, & mut seen, idx, entries. len ( ) ) ;
153+ }
154+
155+ if selected_indices. len ( ) < MAX_PROMPT_LINES {
156+ let stride = ( entries. len ( ) / MAX_PROMPT_LINES . max ( 1 ) ) . max ( 1 ) ;
157+ let mut idx = 0 ;
158+ while idx < entries. len ( ) && selected_indices. len ( ) < MAX_PROMPT_LINES {
159+ Self :: push_selected_index ( & mut selected_indices, & mut seen, idx, entries. len ( ) ) ;
160+ idx += stride;
161+ }
162+ }
163+
164+ selected_indices. sort_unstable ( ) ;
165+
166+ let mut prompt_entries = Vec :: new ( ) ;
167+ let mut total_chars = 0 ;
168+
169+ for idx in selected_indices {
170+ if prompt_entries. len ( ) >= MAX_PROMPT_LINES {
171+ break ;
172+ }
173+
174+ let line = Self :: truncate_line ( & entries[ idx] . line ) ;
175+ let next_chars = if prompt_entries. is_empty ( ) {
176+ line. len ( )
177+ } else {
178+ total_chars + 1 + line. len ( )
179+ } ;
180+
181+ if next_chars > MAX_PROMPT_CHARS {
182+ break ;
183+ }
184+
185+ total_chars = next_chars;
186+ prompt_entries. push ( line) ;
187+ }
188+
189+ if prompt_entries. is_empty ( ) {
190+ prompt_entries. push ( Self :: truncate_line ( & entries[ entries. len ( ) - 1 ] . line ) ) ;
191+ }
192+
193+ prompt_entries
194+ }
195+
196+ fn is_priority_line ( line : & str ) -> bool {
197+ let lower = line. to_ascii_lowercase ( ) ;
198+ [
199+ "error" ,
200+ "warn" ,
201+ "fatal" ,
202+ "panic" ,
203+ "exception" ,
204+ "denied" ,
205+ "unauthorized" ,
206+ "failed" ,
207+ "timeout" ,
208+ "attack" ,
209+ "anomaly" ,
210+ ]
211+ . iter ( )
212+ . any ( |pattern| lower. contains ( pattern) )
213+ }
214+
215+ fn truncate_line ( line : & str ) -> String {
216+ let truncated: String = line. chars ( ) . take ( MAX_LINE_CHARS ) . collect ( ) ;
217+ if truncated. len ( ) == line. len ( ) {
218+ truncated
219+ } else {
220+ format ! ( "{}...[truncated]" , truncated)
221+ }
222+ }
96223}
97224
98225/// Response structure from the LLM
@@ -262,10 +389,11 @@ impl LogAnalyzer for OpenAiAnalyzer {
262389 let source_id = & entries[ 0 ] . source_id ;
263390
264391 log:: debug!(
265- "Sending {} entries to AI API (model: {}, url: {})" ,
392+ "Sending {} entries to AI API (model: {}, url: {}, prompt_chars: {} )" ,
266393 entries. len( ) ,
267394 self . model,
268- self . api_url
395+ self . api_url,
396+ prompt. len( )
269397 ) ;
270398 log:: trace!( "Prompt:\n {}" , prompt) ;
271399
@@ -492,6 +620,58 @@ mod tests {
492620 assert ! ( prompt. contains( "JSON" ) ) ;
493621 }
494622
623+ #[ test]
624+ fn test_build_prompt_limits_included_entries ( ) {
625+ let entries: Vec < LogEntry > = ( 0 ..250 )
626+ . map ( |i| LogEntry {
627+ source_id : "test-source" . into ( ) ,
628+ timestamp : Utc :: now ( ) ,
629+ line : format ! ( "INFO line {}" , i) ,
630+ metadata : HashMap :: new ( ) ,
631+ } )
632+ . collect ( ) ;
633+
634+ let prompt = OpenAiAnalyzer :: build_prompt ( & entries) ;
635+
636+ assert ! ( prompt. contains( "- total_entries: 250" ) ) ;
637+ assert ! ( prompt. contains( "- included_entries: 200" ) ) ;
638+ assert ! ( prompt. contains( "Only 200 of 250 entries are included below" ) ) ;
639+ assert ! ( prompt. contains( "INFO line 249" ) ) ;
640+ assert ! ( !prompt. contains( "INFO line 0" ) ) ;
641+ }
642+
643+ #[ test]
644+ fn test_select_prompt_entries_preserves_priority_lines ( ) {
645+ let mut entries: Vec < LogEntry > = ( 0 ..260 )
646+ . map ( |i| LogEntry {
647+ source_id : "test-source" . into ( ) ,
648+ timestamp : Utc :: now ( ) ,
649+ line : format ! ( "INFO line {}" , i) ,
650+ metadata : HashMap :: new ( ) ,
651+ } )
652+ . collect ( ) ;
653+ entries[ 10 ] . line = "ERROR: early failure" . into ( ) ;
654+
655+ let selected = OpenAiAnalyzer :: select_prompt_entries ( & entries) ;
656+
657+ assert_eq ! ( selected. len( ) , 200 ) ;
658+ assert ! ( selected
659+ . iter( )
660+ . any( |line| line. contains( "ERROR: early failure" ) ) ) ;
661+ }
662+
663+ #[ test]
664+ fn test_select_prompt_entries_truncates_long_lines ( ) {
665+ let long_line = "x" . repeat ( MAX_LINE_CHARS + 50 ) ;
666+ let entries = make_entries ( & [ & long_line] ) ;
667+
668+ let selected = OpenAiAnalyzer :: select_prompt_entries ( & entries) ;
669+
670+ assert_eq ! ( selected. len( ) , 1 ) ;
671+ assert ! ( selected[ 0 ] . ends_with( "...[truncated]" ) ) ;
672+ assert ! ( selected[ 0 ] . len( ) > MAX_LINE_CHARS ) ;
673+ }
674+
495675 #[ test]
496676 fn test_parse_llm_response_valid ( ) {
497677 let entries = make_entries ( & [ "test line" ] ) ;
0 commit comments