1818 DD_USE_VPC ,
1919 GOV_STRING ,
2020)
21- from steps .common import add_service_tag , is_cloudtrail , merge_dicts , parse_event_source
21+ from steps .common import (
22+ add_service_tag ,
23+ is_cloudtrail ,
24+ is_vpc_flowlog ,
25+ merge_dicts ,
26+ parse_event_source ,
27+ )
2228
2329
2430class S3EventDataStore :
@@ -63,6 +69,7 @@ def handle(self, event):
6369 add_service_tag (self .metadata )
6470
6571 self ._extract_data ()
72+
6673 yield from self ._get_structured_lines_for_s3_handler ()
6774
6875 def _extract_event (self , event ):
@@ -178,6 +185,9 @@ def _extract_cloudtrail_logs(self):
178185 self .logger .debug ("Unable to parse cloudtrail log: %s" % e )
179186
180187 def _extract_other_logs (self ):
188+ # VPC flow logs have a header line that should be skipped
189+ skip_first_line = is_vpc_flowlog (self .data_store .key )
190+
181191 # Check if using multiline log regex pattern
182192 # and determine whether line or pattern separated logs
183193 if self .multiline_regex_start_pattern and self .multiline_regex_pattern :
@@ -197,7 +207,9 @@ def _extract_other_logs(self):
197207 )
198208 self .data_store .data = self .data_store .data .splitlines ()
199209
200- for line in self .data_store .data :
210+ for i , line in enumerate (self .data_store .data ):
211+ if skip_first_line and i == 0 :
212+ continue
201213 yield self ._format_event (line )
202214
203215 else :
@@ -206,7 +218,10 @@ def _extract_other_logs(self):
206218 #
207219 # https://docs.python.org/3/library/stdtypes.html#str.splitlines
208220 # https://docs.python.org/3/library/stdtypes.html#bytes.splitlines
209- for line in self .data_store .data .splitlines ():
221+ for i , line in enumerate (self .data_store .data .splitlines ()):
222+ if skip_first_line and i == 0 :
223+ continue
224+
210225 line = line .decode ("utf-8" , errors = "ignore" ).strip ()
211226 if len (line ) == 0 :
212227 continue
0 commit comments