Skip to content

Commit 4d23a91

Browse files
authored
Merge pull request #1410 from stratosphereips/alya/support_reading_labeled_logs
Support reading labeled logs
2 parents bb5d977 + b952f03 commit 4d23a91

7 files changed

Lines changed: 77 additions & 82 deletions

File tree

.github/workflows/unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
strategy:
2727
matrix:
2828
test_file:
29-
- test_inputProc.py
29+
- test_input.py
3030
- test_main.py
3131
- test_conn.py
3232
- test_downloaded_file.py

slips/main.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -286,22 +286,18 @@ def get_input_file_type(self, given_path):
286286
elif "CSV" in cmd_result and os.path.isfile(given_path):
287287
input_type = "binetflow"
288288
elif "directory" in cmd_result and os.path.isdir(given_path):
289-
from slips_files.core.input import SUPPORTED_LOGFILES
290-
291289
for log_file in os.listdir(given_path):
292290
# if there is at least 1 supported log file inside the
293291
# given directory, start slips normally
294292
# otherwise, stop slips
295-
if log_file.replace(".log", "") in SUPPORTED_LOGFILES:
293+
if not utils.is_ignored_zeek_log_file(log_file):
296294
input_type = "zeek_folder"
297295
break
298296
else:
299-
# zeek dir filled with unsupported logs
300-
# or .labeled logs that slips can't read.
301297
print(
302298
f"Log files in {given_path} are not supported \n"
303299
f"Make sure all log files inside the given "
304-
f"directory end with .log .. Stopping."
300+
f"directory end with .log or .log.labeled .. Stopping."
305301
)
306302
sys.exit(-1)
307303
else:

slips_files/common/slips_utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
from dataclasses import is_dataclass, asdict
2929
from enum import Enum
3030

31+
from slips_files.core.supported_logfiles import SUPPORTED_LOGFILES
32+
3133
IS_IN_A_DOCKER_CONTAINER = os.environ.get("IS_IN_A_DOCKER_CONTAINER", False)
3234

3335

@@ -289,6 +291,23 @@ def drop_root_privs(self):
289291
os.setresuid(sudo_uid, sudo_uid, -1)
290292
return
291293

294+
def is_ignored_zeek_log_file(self, filepath: str) -> bool:
295+
"""
296+
Returns true if the given file ends with .log or .log.labeled and
297+
is in SUPPORTED_LOGFILES list
298+
:param filepath: a zeek log file
299+
"""
300+
if not (
301+
filepath.endswith(".log") or filepath.endswith(".log.labeled")
302+
):
303+
return True
304+
305+
filename = os.path.basename(filepath)
306+
# remove all extensions from filename
307+
while "." in filename:
308+
filename = filename.rsplit(".", 1)[0]
309+
return filename not in SUPPORTED_LOGFILES
310+
292311
def convert_format(self, ts, required_format: str):
293312
"""
294313
Detects and converts the given ts to the given format

slips_files/core/input.py

Lines changed: 8 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
# along with this program; if not, write to the Free Software
2727
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
2828
# Contact: eldraco@gmail.com, sebastian.garcia@agents.fel.cvut.cz, stratosphere@aic.fel.cvut.cz
29-
from pathlib import Path
3029
from re import split
3130

3231
from watchdog.observers import Observer
@@ -38,23 +37,7 @@
3837
from slips_files.common.slips_utils import utils
3938
import multiprocessing
4039
from slips_files.core.helpers.filemonitor import FileEventHandler
41-
42-
SUPPORTED_LOGFILES = (
43-
"conn",
44-
"dns",
45-
"http",
46-
"ssl",
47-
"ssh",
48-
"dhcp",
49-
"ftp",
50-
"smtp",
51-
"tunnel",
52-
"notice",
53-
"files",
54-
"arp",
55-
"software",
56-
"weird",
57-
)
40+
from slips_files.core.supported_logfiles import SUPPORTED_LOGFILES
5841

5942

6043
class Input(ICore):
@@ -198,14 +181,6 @@ def check_if_time_to_del_rotated_files(self):
198181
pass
199182
self.to_be_deleted = []
200183

201-
def is_ignored_file(self, filepath: str) -> bool:
202-
"""
203-
Ignore zeek log files that we don't use
204-
:param filepath: full path to a zeek log file
205-
"""
206-
filename_without_ext = Path(filepath).stem
207-
return filename_without_ext not in SUPPORTED_LOGFILES
208-
209184
def get_file_handle(self, filename):
210185
# Update which files we know about
211186
try:
@@ -373,7 +348,7 @@ def read_zeek_files(self) -> int:
373348
# Go to all the files generated by Zeek and read 1
374349
# line from each of them
375350
for filename in self.zeek_files:
376-
if self.is_ignored_file(filename):
351+
if utils.is_ignored_zeek_log_file(filename):
377352
continue
378353

379354
# reads 1 line from the given file and cache it
@@ -464,7 +439,7 @@ def read_zeek_folder(self):
464439
full_path = os.path.join(self.given_path, file)
465440

466441
# exclude ignored files from the total flows to be processed
467-
if self.is_ignored_file(full_path):
442+
if utils.is_ignored_zeek_log_file(full_path):
468443
continue
469444

470445
if not growing_zeek_dir:
@@ -602,14 +577,15 @@ def handle_zeek_log_file(self):
602577
and conn.log flows given to slips through CYST unix socket.
603578
"""
604579
if (
605-
not self.given_path.endswith(".log")
606-
or self.is_ignored_file(self.given_path)
607-
) and "cyst" not in self.given_path.lower():
580+
utils.is_ignored_zeek_log_file(self.given_path)
581+
and "cyst" not in self.given_path.lower()
582+
):
608583
# unsupported file
609584
return False
610585

611586
if os.path.exists(self.given_path):
612-
# in case of CYST flows, the given path is 'cyst' and there's no way to get the total flows
587+
# in case of CYST flows, the given path is 'cyst' and there's no
588+
# way to get the total flows
613589
self.is_zeek_tabs = self.is_zeek_tabs_file(self.given_path)
614590
total_flows = self.get_flows_number(self.given_path)
615591
self.db.set_input_metadata({"total_flows": total_flows})
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
SUPPORTED_LOGFILES = (
2+
"conn",
3+
"dns",
4+
"http",
5+
"ssl",
6+
"ssh",
7+
"dhcp",
8+
"ftp",
9+
"smtp",
10+
"tunnel",
11+
"notice",
12+
"files",
13+
"arp",
14+
"software",
15+
"weird",
16+
)
Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -333,49 +333,6 @@ def test_give_profiler(line, input_type, expected_line, expected_input_type):
333333
assert line_sent["input_type"] == expected_input_type
334334

335335

336-
@pytest.mark.parametrize(
337-
"filepath, expected_result",
338-
[ # Testcase 1: Supported file
339-
("path/to/conn.log", False),
340-
# Testcase 2: Supported file
341-
("path/to/dns.log", False),
342-
# Testcase 3: Supported file
343-
("path/to/http.log", False),
344-
# Testcase 4: Supported file
345-
("path/to/ssl.log", False),
346-
# Testcase 5: Supported file
347-
("path/to/ssh.log", False),
348-
# Testcase 6: Supported file
349-
("path/to/dhcp.log", False),
350-
# Testcase 7: Supported file
351-
("path/to/ftp.log", False),
352-
# Testcase 8: Supported file
353-
("path/to/smtp.log", False),
354-
# Testcase 9: Supported file
355-
("path/to/tunnel.log", False),
356-
# Testcase 10: Supported file
357-
("path/to/notice.log", False),
358-
# Testcase 11: Supported file
359-
("path/to/files.log", False),
360-
# Testcase 12: Supported file
361-
("path/to/arp.log", False),
362-
# Testcase 13: Supported file
363-
("path/to/software.log", False),
364-
# Testcase 14: Supported file
365-
("path/to/weird.log", False),
366-
# Testcase 15: Unsupported file
367-
("path/to/unsupported.log", True),
368-
],
369-
)
370-
def test_is_ignored_file(filepath, expected_result):
371-
"""
372-
Test that the is_ignored_file method correctly
373-
identifies ignored Zeek log files.
374-
"""
375-
input_process = ModuleFactory().create_input_obj("", "zeek_log_file")
376-
assert input_process.is_ignored_file(filepath) == expected_result
377-
378-
379336
def test_get_file_handle_existing_file():
380337
"""
381338
Test that the get_file_handle method correctly

tests/test_slips_utils.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,37 @@ def test_get_sha256_hash_from_nonexistent_file():
2727
utils.get_sha256_hash_of_file_contents("nonexistent_file.txt")
2828

2929

30+
@pytest.mark.parametrize(
31+
"filepath, expected_result",
32+
[ # Testcase 1: Supported file
33+
("path/to/conn.log", False),
34+
("path/to/dns.log", False),
35+
("path/to/http.log", False),
36+
("path/to/ssl.log", False),
37+
("path/to/ssh.log", False),
38+
("path/to/dhcp.log", False),
39+
("path/to/ftp.log", False),
40+
("path/to/smtp.log", False),
41+
("path/to/tunnel.log", False),
42+
("path/to/notice.log", False),
43+
("path/to/files.log", False),
44+
("path/to/arp.log", False),
45+
("path/to/software.log", False),
46+
("path/to/software.log.labeled", False),
47+
("path/to/weird.log", False),
48+
("path/to/software.log.labeled.something", True),
49+
("path/to/unsupported.log", True),
50+
],
51+
)
52+
def test_is_ignored_zeek_log_file(filepath, expected_result):
53+
"""
54+
Test that the is_ignored_file method correctly
55+
identifies ignored Zeek log files.
56+
"""
57+
utils = ModuleFactory().create_utils_obj()
58+
assert utils.is_ignored_zeek_log_file(filepath) == expected_result
59+
60+
3061
def test_get_sha256_hash_permission_error():
3162
utils = ModuleFactory().create_utils_obj()
3263
with patch("builtins.open", side_effect=PermissionError):

0 commit comments

Comments
 (0)