Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ $ docker compose build --build-arg uid=1000
|`unittests/scans/<parser_dir>/{many_vulns,no_vuln,one_vuln}.json` | Sample files containing meaningful data for unit tests. The minimal set.
|`unittests/tools/test_<parser_name>_parser.py` | Unit tests of the parser.
|`dojo/settings/settings.dist.py` | If you want to use a modern hashcode based deduplication algorithm
|`docs/content/en/connecting_your_tools/parsers/<file/api>/<parser_file>.md` | Documentation, what kind of file format is required and how it should be obtained
|`docs/content/en/connecting_your_tools/parsers/<file/api>/<parser_file>.md` | Documentation, what kind of file format is required and how it should be obtained


## Factory contract

Expand All @@ -57,6 +57,7 @@ Parsers are loaded dynamicaly with a factory pattern. To have your parser loaded
3. `def get_description_for_scan_types(self, scan_type):` This function return a string used to provide some text in the UI (long description)
4. `def get_findings(self, file, test)` This function return a list of findings
6. If your parser have more than 1 scan_type (for detailled mode) you **MUST** implement `def set_mode(self, mode)` method
7. The parser instance is re-used over all imports performed for this scan_type, so do not store any data at class level

Example:

Expand Down Expand Up @@ -145,7 +146,7 @@ Very bad example:
Various file formats are handled through libraries. In order to keep DefectDojo slim and also don't extend the attack surface, keep the number of libraries used minimal and take other parsers as an example.

#### defusedXML in favour of lxml
As xml is by default an unsecure format, the information parsed from various xml output has to be parsed in a secure way. Within an evaluation, we determined that defusedXML is the library which we will use in the future to parse xml files in parsers as this library is rated more secure. Thus, we will only accept PRs with the defusedxml library.
As xml is by default an unsecure format, the information parsed from various xml output has to be parsed in a secure way. Within an evaluation, we determined that defusedXML is the library which we will use in the future to parse xml files in parsers as this library is rated more secure. Thus, we will only accept PRs with the defusedxml library.

### Not all attributes are mandatory

Expand Down Expand Up @@ -366,4 +367,3 @@ Please add a new .md file in [`docs/content/en/connecting_your_tools/parsers`] w
* A link to the scanner itself - (e.g. GitHub or vendor link)

Here is an example of a completed Parser documentation page: [https://github.com/DefectDojo/django-DefectDojo/blob/master/docs/content/en/connecting_your_tools/parsers/file/acunetix.md](https://github.com/DefectDojo/django-DefectDojo/blob/master/docs/content/en/connecting_your_tools/parsers/file/acunetix.md)

20 changes: 10 additions & 10 deletions dojo/tools/ms_defender/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@ class MSDefenderParser:

"""Import from MSDefender findings"""

def __init__(self):
self.findings = []

def get_scan_types(self):
return ["MSDefender Parser"]

Expand All @@ -24,11 +21,12 @@ def get_description_for_scan_types(self, scan_type):
return ("MSDefender findings can be retrieved using the REST API")

def get_findings(self, file, test):
findings = []
if str(file.name).endswith(".json"):
vulnerabilityfile = json.load(file)
vulnerabilitydata = vulnerabilityfile["value"]
for vulnerability in vulnerabilitydata:
self.process_json(vulnerability)
findings.append(self.process_json(vulnerability))
elif str(file.name).endswith(".zip"):
if str(file.__class__) == "<class '_io.TextIOWrapper'>":
input_zip = zipfile.ZipFile(file.name, "r")
Expand All @@ -51,27 +49,29 @@ def get_findings(self, file, test):
vulnerabilities = []
machines = {}
for vulnerabilityfile in vulnerabilityfiles:
logger.debug("Loading vulnerabilitiy file: %s", vulnerabilityfile)
output = json.loads(zipdata[vulnerabilityfile].decode("ascii"))["value"]
for data in output:
vulnerabilities.append(data)
for machinefile in machinefiles:
logger.debug("Loading machine file: %s", vulnerabilityfile)
output = json.loads(zipdata[machinefile].decode("ascii"))["value"]
for data in output:
machines[data.get("id")] = data
for vulnerability in vulnerabilities:
try:
machine = machines.get(vulnerability["machineId"], None)
if machine is not None:
self.process_zip(vulnerability, machine)
findings.append(self.process_json_with_machine_info(vulnerability, machine))
else:
logger.debug("fallback to process without machine: no machine id")
self.process_json(vulnerability)
findings.append(self.process_json(vulnerability))
except (IndexError, KeyError):
logger.exception("fallback to process without machine: exception")
self.process_json(vulnerability)
else:
return []
return self.findings
return findings

def process_json(self, vulnerability):
description = ""
Expand All @@ -95,10 +95,10 @@ def process_json(self, vulnerability):
if vulnerability["cveId"] is not None:
finding.unsaved_vulnerability_ids = []
finding.unsaved_vulnerability_ids.append(vulnerability["cveId"])
self.findings.append(finding)
finding.unsaved_endpoints = []
return finding

def process_zip(self, vulnerability, machine):
def process_json_with_machine_info(self, vulnerability, machine):
description = ""
description += "cveId: " + str(vulnerability.get("cveId", "")) + "\n"
description += "machineId: " + str(vulnerability.get("machineId", "")) + "\n"
Expand Down Expand Up @@ -142,14 +142,14 @@ def process_zip(self, vulnerability, machine):
if "cveId" in vulnerability:
finding.unsaved_vulnerability_ids = []
finding.unsaved_vulnerability_ids.append(vulnerability["cveId"])
self.findings.append(finding)
finding.unsaved_endpoints = []
if "computerDnsName" in machine and machine["computerDnsName"] is not None:
finding.unsaved_endpoints.append(Endpoint(host=str(machine["computerDnsName"]).replace(" ", "").replace("(", "_").replace(")", "_")))
if "lastIpAddress" in machine and machine["lastIpAddress"] is not None:
finding.unsaved_endpoints.append(Endpoint(host=str(machine["lastIpAddress"])))
if "lastExternalIpAddress" in machine and machine["lastExternalIpAddress"] is not None:
finding.unsaved_endpoints.append(Endpoint(host=str(machine["lastExternalIpAddress"])))
return finding

def severity_check(self, severity_input):
if severity_input in {"Informational", "Low", "Medium", "High", "Critical"}:
Expand Down
16 changes: 16 additions & 0 deletions unittests/tools/test_ms_defender_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,22 @@ def test_parser_defender_zip(self):
endpoint.clean()
self.assertEqual("1.1.1.1", finding.unsaved_endpoints[0].host)

def test_parser_defender_zip_repeated(self):
"""
It was found that the defender parser was caching findings across different runs of the parser.
This test might be a good default test for any parser to make sure nothing is cached.
"""
testfile = (get_unit_tests_scans_path("ms_defender") / "defender.zip").open(encoding="utf-8")
parser = MSDefenderParser()
findings = parser.get_findings(testfile, Test())
testfile.close()
self.assertEqual(4, len(findings))

testfile_repeated = (get_unit_tests_scans_path("ms_defender") / "defender.zip").open(encoding="utf-8")
findings_repeated = parser.get_findings(testfile, Test())
testfile_repeated.close()
self.assertEqual(4, len(findings_repeated))

def test_parser_defender_wrong_machines_zip(self):
testfile = (get_unit_tests_scans_path("ms_defender") / "defender_wrong_machines.zip").open(encoding="utf-8")
parser = MSDefenderParser()
Expand Down