Skip to content

Commit 20af84e

Browse files
cleanup, test fixes
1 parent 39c7f2c commit 20af84e

5 files changed

Lines changed: 67 additions & 47 deletions

File tree

bbot/core/config/models.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,18 @@ class BaseModuleConfig(BaseModel):
108108

109109
model_config = STRICT
110110

111-
batch_size: Optional[int] = None
112-
module_threads: Optional[int] = None
113-
module_timeout: Optional[int] = None
111+
batch_size: Optional[int] = Field(
112+
default=None,
113+
description="The number of events to process in a single batch (only applies to batch modules)",
114+
)
115+
module_threads: Optional[int] = Field(
116+
default=None,
117+
description="How many event handlers to run in parallel",
118+
)
119+
module_timeout: Optional[int] = Field(
120+
default=None,
121+
description="Max time in seconds to spend handling each event or batch of events",
122+
)
114123

115124

116125
class BBOTConfig(BaseSettings):

bbot/scanner/preset/args.py

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
import re
21
import yaml
32
import logging
43
import argparse
54

65
from bbot.errors import *
7-
from bbot.core.config.merge import dotted_get, dotted_set
8-
from bbot.core.helpers.misc import chain_lists, get_closest_match, get_keys_in_dot_syntax
6+
from bbot.core.config.merge import dotted_set
7+
from bbot.core.helpers.misc import chain_lists
98

109

1110
def _parse_cli_value(raw: str):
@@ -14,8 +13,12 @@ def _parse_cli_value(raw: str):
1413
1514
YAML safe_load handles `true`/`false`/`null`/ints/floats and quoted strings
1615
the way users expect when they write `web.spider_distance=2` or
17-
`modules.stdout.event_fields='[type, data]'`.
16+
`modules.stdout.event_fields='[type, data]'`. An empty RHS (`-c key=`) is
17+
treated as an empty string rather than None — matching the "clear this
18+
value" intent users normally have.
1819
"""
20+
if raw == "":
21+
return ""
1922
try:
2023
return yaml.safe_load(raw)
2124
except yaml.YAMLError:
@@ -47,19 +50,7 @@ def parse_dotted_cli(entries):
4750
log = logging.getLogger("bbot.presets.args")
4851

4952

50-
universal_module_options = {
51-
"batch_size": "The number of events to process in a single batch (only applies to batch modules)",
52-
"module_threads": "How many event handlers to run in parallel",
53-
"module_timeout": "Max time in seconds to spend handling each event or batch of events",
54-
}
55-
56-
5753
class BBOTArgs:
58-
# module config options to exclude from validation
59-
exclude_from_validation = re.compile(
60-
r".*modules\.[a-z0-9_]+\.(?:" + "|".join(universal_module_options.keys()) + ")$"
61-
)
62-
6354
scan_examples = [
6455
(
6556
"Subdomains",
@@ -491,16 +482,16 @@ def sanitize_args(self):
491482
self.parsed.preset += ["fast"]
492483

493484
def validate(self):
494-
# validate config options
495-
sentinel = object()
496-
all_options = set(get_keys_in_dot_syntax(self.preset.core.default_config))
497-
for c in self.parsed.config:
498-
c = c.split("=")[0].strip()
499-
v = dotted_get(self.preset.core.default_config, c, default=sentinel)
500-
# if option isn't in the default config
501-
if v is sentinel:
502-
# skip if it's excluded from validation
503-
if self.exclude_from_validation.match(c):
504-
continue
505-
# otherwise, ensure it exists as a module option
506-
raise ValidationError(get_closest_match(c, all_options, msg="config option"))
485+
"""
486+
Validate the CLI `-c key=value` arguments against the composite
487+
preset schema. Catches typos like `bbot -c modules.shoudn.api_key=x`
488+
with a closest-match suggestion.
489+
"""
490+
from .validate import validate_preset
491+
492+
if not self.parsed.config:
493+
return
494+
cli_dict = parse_dotted_cli(self.parsed.config)
495+
errs = validate_preset({"config": cli_dict}, module_loader=self.preset.module_loader)
496+
if errs:
497+
raise ValidationError("\n".join(str(e) for e in errs))

bbot/scanner/preset/validate.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
from pydantic import ValidationError
2828

29-
from bbot.core.helpers.misc import get_closest_match
29+
from bbot.core.helpers.misc import get_closest_match, get_keys_in_dot_syntax
3030

3131

3232
log = logging.getLogger("bbot.presets.validate")
@@ -72,18 +72,25 @@ def _classify_loc(loc: tuple) -> tuple[str, str]:
7272
return ("preset", ".".join(parts))
7373

7474

75-
def _format_msg(err: dict, known_modules: set | None = None) -> str:
75+
def _format_msg(err: dict, known_modules: set | None = None, known_paths: set | None = None) -> str:
7676
kind = err["type"]
7777
input_value = err.get("input")
7878
loc = err["loc"]
7979
field = str(loc[-1]) if loc else ""
8080
path = ".".join(str(p) for p in loc)
8181

8282
if kind == "extra_forbidden":
83-
# Special-case unknown module name (config.modules.<bad>) so users get
84-
# a suggestion rather than "Unknown option".
83+
# Special-case unknown module name (config.modules.<bad>) users get
84+
# a suggestion drawn from the set of known module names.
8585
if len(loc) == 3 and loc[0] == "config" and loc[1] == "modules":
8686
return get_closest_match(field, known_modules or set(), msg="module")
87+
# For everything else, suggest from the known dotted-path universe
88+
# (`web.spier_distance` → `web.spider_distance`).
89+
if known_paths:
90+
# strip the leading "config." prefix when matching, since
91+
# default_config dotted paths don't include it
92+
lookup_path = ".".join(str(p) for p in loc[1:]) if loc and loc[0] == "config" else path
93+
return get_closest_match(lookup_path, known_paths, msg="config option")
8794
msg = f"Unknown option: {field!r}"
8895
if isinstance(input_value, (str, int, bool, float)):
8996
msg += f" (value: {input_value!r})"
@@ -114,11 +121,19 @@ def _format_msg(err: dict, known_modules: set | None = None) -> str:
114121
return err["msg"] if err.get("msg") else f"validation error at {path}"
115122

116123

117-
def _format_errors(exc: ValidationError, known_modules: set | None = None) -> list[PresetValidationError]:
124+
def _format_errors(
125+
exc: ValidationError,
126+
known_modules: set | None = None,
127+
known_paths: set | None = None,
128+
) -> list[PresetValidationError]:
118129
out: list[PresetValidationError] = []
119130
for err in exc.errors():
120131
where, path = _classify_loc(err["loc"])
121-
out.append(PresetValidationError(where=where, path=path, message=_format_msg(err, known_modules)))
132+
out.append(
133+
PresetValidationError(
134+
where=where, path=path, message=_format_msg(err, known_modules, known_paths)
135+
)
136+
)
122137
return out
123138

124139

@@ -168,14 +183,17 @@ def validate_preset(preset_dict: Any, module_loader=None) -> list[PresetValidati
168183

169184
errors: list[PresetValidationError] = []
170185
known_modules = set(module_loader.all_module_choices)
186+
# Universe of valid dotted config paths, used for "did you mean ...?"
187+
# suggestions on unknown global-config keys.
188+
known_paths = set(get_keys_in_dot_syntax(module_loader.core.default_config))
171189

172190
# Validate against the composite schema (rebuilt automatically if new
173191
# module_dirs were just preloaded above). Closest-match suggestions
174-
# for unknown module names are produced inside the formatter.
192+
# for unknown module names + config options are produced inside the formatter.
175193
try:
176194
module_loader.validation_schema.model_validate(preset_dict)
177195
except ValidationError as e:
178-
errors.extend(_format_errors(e, known_modules=known_modules))
196+
errors.extend(_format_errors(e, known_modules=known_modules, known_paths=known_paths))
179197

180198
# Module names listed in top-level `modules`/`output_modules`/`exclude_modules`
181199
# aren't covered by the composite schema (they're a list of strings, not a

bbot/scripts/docs.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,12 +182,12 @@ def update_individual_module_options():
182182
assert len(bbot_output_module_table.splitlines()) > 10
183183
update_md_files("BBOT OUTPUT MODULES", bbot_output_module_table)
184184

185-
# BBOT universal module options
186-
from bbot.scanner.preset.args import universal_module_options
185+
# BBOT universal module options (sourced from BaseModuleConfig)
186+
from bbot.core.config.models import BaseModuleConfig
187187

188188
universal_module_options_table = ""
189-
for option, description in universal_module_options.items():
190-
universal_module_options_table += f"**{option}**: {description}\n"
189+
for name, field in BaseModuleConfig.model_fields.items():
190+
universal_module_options_table += f"**{name}**: {field.description}\n"
191191
update_md_files("BBOT UNIVERSAL MODULE OPTIONS", universal_module_options_table)
192192

193193
# BBOT module options

bbot/test/test_step_1/test_cli.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -466,13 +466,15 @@ def test_cli_config_validation(monkeypatch, caplog):
466466
monkeypatch.setattr(sys, "exit", lambda *args, **kwargs: True)
467467
monkeypatch.setattr(os, "_exit", lambda *args, **kwargs: True)
468468

469-
# incorrect module option
469+
# incorrect module name nested under modules.* — surfaces as an unknown
470+
# module with a closest-match suggestion (more useful than the legacy
471+
# "Could not find config option ..." phrasing)
470472
caplog.clear()
471473
assert not caplog.text
472474
monkeypatch.setattr("sys.argv", ["bbot", "-c", "modules.ipnegibhor.num_bits=4"])
473475
cli.main()
474-
assert 'Could not find config option "modules.ipnegibhor.num_bits"' in caplog.text
475-
assert 'Did you mean "modules.ipneighbor.num_bits"?' in caplog.text
476+
assert 'Could not find module "ipnegibhor"' in caplog.text
477+
assert 'Did you mean "ipneighbor"?' in caplog.text
476478

477479
# incorrect global option
478480
caplog.clear()

0 commit comments

Comments
 (0)