Skip to content

Commit aa9ab60

Browse files
authored
Merge pull request #1395 from stratosphereips/alya/use_the_label_from_given_labeled_zeek_logs
use the label from given labeled zeek logs
2 parents d63a11a + 23cad7d commit aa9ab60

15 files changed

Lines changed: 716 additions & 585 deletions

File tree

modules/flowalerts/ssl.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,8 @@ async def check_non_ssl_port_443_conns(
220220
self.keep_track_of_ssl_flow(flow, key)
221221
return False
222222

223+
flow.starttime = float(flow.starttime)
224+
223225
# in seconds
224226
five_mins = 5 * 60
225227

modules/flowmldetection/flowmldetection.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,8 @@ def detect(self, x_flow) -> Optional[numpy.ndarray]:
316316
"endtime",
317317
"bytes",
318318
"flow_source",
319+
"ground_truth_label", # todo now we can use them
320+
"detailed_ground_truth_label",
319321
]
320322
for field in fields_to_drop:
321323
try:

modules/http_analyzer/http_analyzer.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -485,8 +485,8 @@ async def check_non_http_port_80_conns(
485485
we discard the evidence. if not found, we check for future 5 mins
486486
of matching zeek flows that were detected as http by zeek.
487487
if found, we dont set an evidence, if not found, we set an evidence
488-
:kwarg timeout_reached: did we wait 5 mins in future and in the
489-
past for the http of the given flow to arrive or not?
488+
:kwarg timeout_reached: did we wait 5 mins in future AND in the
489+
past for the http of the given flow to arrive?
490490
"""
491491
if not self.is_tcp_established_port_80_non_empty_flow(flow):
492492
# we're not interested in that flow
@@ -500,6 +500,7 @@ async def check_non_http_port_80_conns(
500500
self.keep_track_of_http_flow(flow, key)
501501
return False
502502

503+
flow.starttime = float(flow.starttime)
503504
# in seconds
504505
five_mins = 5 * 60
505506

@@ -522,7 +523,6 @@ async def check_non_http_port_80_conns(
522523

523524
if matching_http_flows:
524525
# awesome! discard evidence. FP dodged.
525-
# clear these timestamps as we dont need them anymore?
526526
return False
527527

528528
# reaching here means we looked in the past 5 mins and
@@ -538,8 +538,8 @@ async def check_non_http_port_80_conns(
538538
# within that time?
539539
await self.wait_for_new_flows_or_timeout(five_mins)
540540
# we can safely await here without blocking the main thread because
541-
# once the above await returns, this function will never sleep
542-
# again, it'll either set the evidence or discard it
541+
# once we run this func with timeout_reached=True, this function will
542+
# never sleep again, it'll either set the evidence or discard it
543543
await self.check_non_http_port_80_conns(
544544
twid, flow, timeout_reached=True
545545
)
@@ -548,7 +548,9 @@ async def check_non_http_port_80_conns(
548548
async def wait_for_new_flows_or_timeout(self, timeout: float):
549549
"""
550550
waits for new incoming flows, but interrupts the wait if profiler
551-
stop sending new flows within within the timeout period.
551+
stop sending new flows within the timeout period.
552+
because that means no more flows are coming during the wait period,
553+
no need to wait.
552554
553555
:param timeout: the maximum time to wait before resuming execution.
554556
"""
@@ -568,6 +570,7 @@ async def will_slips_have_new_incoming_flows():
568570
await asyncio.wait_for(
569571
will_slips_have_new_incoming_flows(), timeout
570572
)
573+
571574
except asyncio.TimeoutError:
572575
pass # timeout reached
573576

@@ -669,3 +672,5 @@ async def main(self):
669672
twid = msg["twid"]
670673
flow = self.classifier.convert_to_flow_obj(msg["flow"])
671674
self.create_task(self.check_non_http_port_80_conns, twid, flow)
675+
676+
self.remove_old_entries_from_http_recognized_flows()

modules/ip_info/ip_info.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,13 @@ def get_vendor_offline(self, mac_addr, profileid):
232232

233233
def get_vendor(self, mac_addr: str, profileid: str) -> dict:
234234
"""
235-
Returns the vendor info of a MAC address and stores it in slips db
235+
Returns the vendor info of a MAC address and stores it in slips db
236236
either from an offline or an online database
237237
"""
238+
if not utils.is_ignored_ip(profileid.split("_")[-1]):
239+
# dont try to get the MAC vendor of private profiles, the MAC
240+
# here is irrelevant (might be the gateway's)
241+
return False
238242

239243
if (
240244
"ff:ff:ff:ff:ff:ff" in mac_addr.lower()

slips_files/common/slips_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,8 +310,8 @@ def convert_format(self, ts, required_format: str):
310310
return datetime_obj.astimezone(tz=self.local_tz).isoformat()
311311
elif required_format == "unixtimestamp":
312312
return datetime_obj.timestamp()
313-
else:
314-
return datetime_obj.strftime(required_format)
313+
314+
return datetime_obj.strftime(required_format)
315315

316316
def get_local_timezone(self):
317317
"""

slips_files/core/flows/zeek.py

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,17 @@ class Conn:
3434
sbytes: int
3535
dbytes: int
3636

37-
smac: str
38-
dmac: str
39-
4037
state: str
4138
history: str
39+
40+
smac: str = ""
41+
dmac: str = ""
42+
43+
# this is for when you give flows labeled by the netflow labeler
44+
# https://github.com/stratosphereips/netflowlabeler
45+
ground_truth_label: str = ""
46+
detailed_ground_truth_label: str = ""
47+
4248
type_: str = "conn"
4349
dir_: str = "->"
4450

@@ -74,6 +80,8 @@ class DNS:
7480

7581
answers: List[str]
7682
TTLs: str
83+
ground_truth_label: str = ""
84+
detailed_ground_truth_label: str = ""
7785

7886
type_: str = "dns"
7987

@@ -108,6 +116,9 @@ class HTTP:
108116
resp_mime_types: str
109117
resp_fuids: str
110118

119+
ground_truth_label: str = ""
120+
detailed_ground_truth_label: str = ""
121+
111122
type_: str = "http"
112123

113124
def __post_init__(self) -> None:
@@ -143,6 +154,9 @@ class SSL:
143154
ja3s: str
144155
is_DoH: str
145156

157+
ground_truth_label: str = ""
158+
detailed_ground_truth_label: str = ""
159+
146160
type_: str = "ssl"
147161

148162

@@ -168,29 +182,35 @@ class SSH:
168182
host_key_alg: str
169183
host_key: str
170184

185+
ground_truth_label: str = ""
186+
detailed_ground_truth_label: str = ""
187+
171188
type_: str = "ssh"
172189

173190

174191
@dataclass
175192
class DHCP:
176193
starttime: float
177194
uids: List[str]
178-
saddr: str
179-
daddr: str
180-
181195
client_addr: str
182196
server_addr: str
183197
host_name: str
184198

185199
smac: str # this is the client mac
186200
requested_addr: str
187201

202+
ground_truth_label: str = ""
203+
detailed_ground_truth_label: str = ""
204+
188205
type_: str = "dhcp"
189206

190207
def __post_init__(self) -> None:
191208
# Some zeek flow don't have saddr or daddr,
192209
# seen in dhcp.log and notice.log use the mac
193210
# address instead
211+
self.saddr = self.client_addr
212+
self.daddr = self.server_addr
213+
# if the client_addr is empty, use the mac address
194214
if not self.saddr and not self.daddr:
195215
self.saddr = self.smac
196216

@@ -203,6 +223,10 @@ class FTP:
203223
daddr: str
204224

205225
used_port: int
226+
227+
ground_truth_label: str = ""
228+
detailed_ground_truth_label: str = ""
229+
206230
type_: str = "ftp"
207231

208232

@@ -214,6 +238,10 @@ class SMTP:
214238
daddr: str
215239

216240
last_reply: str
241+
242+
ground_truth_label: str = ""
243+
detailed_ground_truth_label: str = ""
244+
217245
type_: str = "smtp"
218246

219247

@@ -230,6 +258,9 @@ class Tunnel:
230258
tunnel_type: str
231259
action: str
232260

261+
ground_truth_label: str = ""
262+
detailed_ground_truth_label: str = ""
263+
233264
type_: str = "tunnel"
234265

235266

@@ -250,6 +281,10 @@ class Notice:
250281

251282
# TODO srsly what is this?
252283
dst: str
284+
285+
ground_truth_label: str = ""
286+
detailed_ground_truth_label: str = ""
287+
253288
# every evidence needs a uid, notice.log flows dont have one by
254289
# default, slips adds one to them to be able to deal with it.
255290
type_: str = "notice"
@@ -293,6 +328,9 @@ class Files:
293328
tx_hosts: List[str]
294329
rx_hosts: List[str]
295330

331+
ground_truth_label: str = ""
332+
detailed_ground_truth_label: str = ""
333+
296334
type_: str = "files"
297335

298336
def __post_init__(self) -> None:
@@ -338,6 +376,9 @@ class ARP:
338376
dpkts: str = ""
339377
appproto: str = ""
340378

379+
ground_truth_label: str = ""
380+
detailed_ground_truth_label: str = ""
381+
341382
type_: str = "arp"
342383

343384

@@ -346,13 +387,18 @@ class Software:
346387
starttime: str
347388
uid: str
348389
saddr: str
349-
daddr: str
390+
sport: int
350391

351392
software: str
352393

353394
unparsed_version: str
354395
version_major: str
355396
version_minor: str
397+
# software log lines dont have daddr
398+
daddr: str = ""
399+
ground_truth_label: str = ""
400+
detailed_ground_truth_label: str = ""
401+
356402
type_: str = "software"
357403

358404
def __post_init__(self) -> None:

slips_files/core/input.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -271,9 +271,11 @@ def cache_nxt_line_in_file(self, filename: str):
271271
if not file_handle:
272272
return False
273273

274-
# Only read the next line if the previous line from this file was sent to profiler
274+
# Only read the next line if the previous line from this file was sent
275+
# to profiler
275276
if filename in self.cache_lines:
276-
# We have still something to send, do not read the next line from this file
277+
# We have still something to send, do not read the next line from
278+
# this file
277279
return False
278280

279281
# We don't have any waiting line for this file, so proceed
@@ -286,14 +288,21 @@ def cache_nxt_line_in_file(self, filename: str):
286288
return False
287289

288290
# Did the file end?
289-
if not zeek_line or zeek_line.startswith("#"):
291+
if not zeek_line or zeek_line.startswith("#close"):
290292
# We reached the end of one of the files that we were reading.
291293
# Wait for more lines to come from another file
292294
return False
293295

294-
timestamp, nline = self.get_ts_from_line(zeek_line)
295-
if not timestamp:
296-
return False
296+
if zeek_line.startswith("#fields"):
297+
# this line contains the zeek fields, we want to cache it and
298+
# send it to the profiler normally
299+
nline = zeek_line
300+
# to send the line as early as possible
301+
timestamp = -1
302+
else:
303+
timestamp, nline = self.get_ts_from_line(zeek_line)
304+
if not timestamp:
305+
return False
297306

298307
self.file_time[filename] = timestamp
299308
# Store the line in the cache

0 commit comments

Comments
 (0)