diff --git a/graph_db/mixins/recon/js_recon_mixin.py b/graph_db/mixins/recon/js_recon_mixin.py index 35c1336e..692c849a 100644 --- a/graph_db/mixins/recon/js_recon_mixin.py +++ b/graph_db/mixins/recon/js_recon_mixin.py @@ -166,15 +166,37 @@ def _link_to_file(session, node_id: str, node_label: str, rel_type: str, source_ return False if rel_type not in ('HAS_JS_FINDING', 'HAS_SECRET', 'HAS_ENDPOINT'): return False - session.run( + result = session.run( f""" MATCH (file:JsReconFinding {{id: $fid, finding_type: 'js_file'}}) MATCH (n:{node_label} {{id: $nid}}) - MERGE (file)-[:{rel_type}]->(n) + MERGE (file)-[r:{rel_type}]->(n) + RETURN count(r) AS linked """, fid=file_node_id, nid=node_id ) - return True + record = result.single() + linked = record.get("linked", 0) if record else 0 + return int(linked) > 0 + + def _link_endpoint_to_file(session, source_url: str, path: str, method: str, baseurl: str) -> bool: + """Link a JS file to an Endpoint using the Endpoint canonical identity.""" + file_node_id = file_node_ids.get(source_url) + if not file_node_id: + return False + result = session.run( + """ + MATCH (file:JsReconFinding {id: $fid, finding_type: 'js_file'}) + MATCH (n:Endpoint {path: $path, method: $method, baseurl: $baseurl, user_id: $uid, project_id: $pid}) + MERGE (file)-[r:HAS_ENDPOINT]->(n) + RETURN count(r) AS linked + """, + fid=file_node_id, path=path, method=method, baseurl=baseurl, + uid=user_id, pid=project_id, + ) + record = result.single() + linked = record.get("linked", 0) if record else 0 + return int(linked) > 0 # --- 1. JsReconFinding nodes (non-secret findings) --- finding_types = [ @@ -549,6 +571,9 @@ def _link_to_file(session, node_id: str, node_label: str, rel_type: str, source_ created_endpoints = set() for ep in js_recon_data.get("endpoints", []): try: + if ep.get("validation_status") != "hittable": + continue + path = ep.get("path", "") method = ep.get("method", "GET") source_js = ep.get("source_js", "") @@ -569,29 +594,49 @@ def _link_to_file(session, node_id: str, node_label: str, rel_type: str, source_ created_endpoints.add(ep_key) effective_baseurl = base_url or 'upload' + id_hash = hashlib.sha256(f"{effective_baseurl}:{method}:{path}".encode()).hexdigest()[:16] + node_id = f"endpoint-{user_id}-{project_id}-js-{id_hash}" - session.run( + result = session.run( """ MERGE (e:Endpoint {path: $path, method: $method, baseurl: $baseurl, user_id: $uid, project_id: $pid}) - ON CREATE SET e.source = 'js_recon', + ON CREATE SET e.id = COALESCE(e.id, $id), + e.source = 'js_recon', e.category = $category, e.full_url = $full_url, + e.status_code = $status_code, + e.resolved_url = $resolved_url, + e.validation_status = $validation_status, e.endpoint_type = $ep_type, + e._js_recon_created = true, e.updated_at = datetime() - ON MATCH SET e.js_recon_source = true, + ON MATCH SET e.id = COALESCE(e.id, $id), + e.js_recon_source = true, e.endpoint_type = COALESCE(e.endpoint_type, $ep_type), e.full_url = COALESCE(e.full_url, $full_url), + e.status_code = COALESCE($status_code, e.status_code), + e.resolved_url = CASE WHEN $resolved_url <> '' THEN $resolved_url ELSE e.resolved_url END, + e.validation_status = COALESCE($validation_status, e.validation_status), e.updated_at = datetime() + WITH e, COALESCE(e._js_recon_created, false) AS created + REMOVE e._js_recon_created + RETURN created AS created """, path=path, method=method, baseurl=effective_baseurl, uid=user_id, pid=project_id, + id=node_id, category=ep.get("category", "endpoint"), full_url=ep.get("full_url", ""), + status_code=ep.get("status_code"), + resolved_url=ep.get("resolved_url", ""), + validation_status=ep.get("validation_status"), ep_type=ep.get("type", "rest"), ) - stats["endpoints_created"] += 1 + record = result.single() + if record and bool(record.get("created", False)): + stats["endpoints_created"] += 1 - if _link_to_file(session, node_id, 'Endpoint', 'HAS_ENDPOINT', source_js): + if _link_endpoint_to_file(session, source_js, path, method, effective_baseurl): stats["relationships_created"] += 1 except Exception as e: diff --git a/recon/main_recon_modules/js_recon.py b/recon/main_recon_modules/js_recon.py index 97829d72..4707b265 100644 --- a/recon/main_recon_modules/js_recon.py +++ b/recon/main_recon_modules/js_recon.py @@ -20,7 +20,7 @@ import requests from pathlib import Path from concurrent.futures import ThreadPoolExecutor, as_completed -from urllib.parse import urlparse +from urllib.parse import urljoin, urlparse from typing import Optional from recon.helpers.js_recon.patterns import ( @@ -66,6 +66,93 @@ _MAX_JS_FILE_SIZE = 5 * 1024 * 1024 +def _parse_endpoint_validation_headers(header_lines: list) -> dict: + """Parse user-provided endpoint validation headers.""" + headers = { + 'User-Agent': 'Mozilla/5.0 (compatible; RedAmon-JsRecon/1.0)', + } + + for line in header_lines: + if not isinstance(line, str): + continue + + line = line.strip() + if not line or ':' not in line: + continue + + name, value = line.split(':', 1) + name = name.strip() + value = value.strip() + if not name or not value: + continue + + headers[name] = value + + return headers + + +def _resolve_endpoint_candidate_url(endpoint: dict) -> str: + """Resolve an endpoint candidate to an absolute URL suitable for validation.""" + candidate = endpoint.get('full_url') or endpoint.get('path') or '' + if not isinstance(candidate, str): + return '' + + candidate = candidate.strip() + if not candidate: + return '' + + if candidate.startswith(('http://', 'https://')): + return candidate + + source_js = endpoint.get('source_js') or '' + source_scheme = '' + if isinstance(source_js, str) and source_js: + source_scheme = urlparse(source_js).scheme + + if candidate.startswith('//'): + scheme = source_scheme if source_scheme in ('http', 'https') else 'https' + return f'{scheme}:{candidate}' + + if not isinstance(source_js, str) or urlparse(source_js).scheme not in ('http', 'https'): + return '' + + return urljoin(source_js, candidate) + + +def _url_origin(url: str) -> str: + """Return a normalized URL origin, or an empty string when unavailable.""" + if not isinstance(url, str): + return '' + try: + parsed = urlparse(url) + if parsed.scheme in ('http', 'https') and parsed.netloc: + return f"{parsed.scheme}://{parsed.netloc}".lower() + except Exception: + pass + return '' + + +def _endpoint_probe_method(extracted_method: str) -> str: + """Choose a non-mutating method for endpoint validation probes.""" + method = extracted_method.upper() if isinstance(extracted_method, str) else 'GET' + if method in ('HEAD', 'OPTIONS'): + return method + if method == 'GET': + return 'GET' + return 'OPTIONS' + + +def _endpoint_probe_headers(endpoint: dict, resolved_url: str, custom_header_lines: list) -> dict: + """Use custom validation headers only for same-origin endpoint probes.""" + default_headers = _parse_endpoint_validation_headers([]) + custom_headers = _parse_endpoint_validation_headers(custom_header_lines) + source_origin = _url_origin(endpoint.get('source_js', '')) + target_origin = _url_origin(resolved_url) + if source_origin and source_origin == target_origin: + return custom_headers + return default_headers + + def _is_js_url(url: str) -> bool: """Check if a URL points to a JavaScript file.""" try: @@ -538,6 +625,104 @@ def _validate_one(secret: dict) -> None: return secrets +def _validate_extracted_endpoints(endpoints: list, settings: dict, request_func=None) -> list: + """Validate extracted endpoints with lightweight non-following HTTP probes.""" + if not settings.get('JS_RECON_VALIDATE_ENDPOINTS', True): + for endpoint in endpoints: + endpoint['validation_status'] = 'unvalidated' + endpoint['validation_error'] = 'validation_disabled' + return endpoints + + def _clamp_int(value, default: int, minimum: int, maximum: int) -> int: + try: + value = int(value) + except (TypeError, ValueError): + value = default + return max(minimum, min(value, maximum)) + + def _configured_status_codes(value) -> set: + default_codes = [200, 201, 204, 301, 302, 307, 308, 401, 403, 405] + if value is None: + value = default_codes + elif isinstance(value, str): + value = re.split(r'[\s,]+', value) + + codes = set() + try: + iterator = iter(value) + except TypeError: + iterator = iter([value]) + + for code in iterator: + try: + codes.add(int(code)) + except (TypeError, ValueError): + continue + + return codes or set(default_codes) + + accepted_statuses = _configured_status_codes( + settings.get('JS_RECON_ENDPOINT_ACCEPT_STATUS') + ) + custom_header_lines = settings.get('JS_RECON_ENDPOINT_CUSTOM_HEADERS', []) + timeout = _clamp_int(settings.get('JS_RECON_VALIDATION_TIMEOUT', 5), 5, 1, 30) + workers = _clamp_int(settings.get('JS_RECON_CONCURRENCY', 10), 10, 1, 20) + requester = request_func or requests.request + + def _validate_one(endpoint: dict) -> None: + candidate = endpoint.get('full_url') or endpoint.get('path') or '' + if isinstance(candidate, str) and candidate.strip().lower().startswith(('ws://', 'wss://')): + endpoint['validation_status'] = 'unvalidated' + endpoint['validation_error'] = 'unsupported_scheme' + return + + method = endpoint.get('method') or 'GET' + if not isinstance(method, str): + method = 'GET' + method = method.upper() + if endpoint.get('type') == 'websocket' or method in ('WS', 'WSS'): + endpoint['validation_status'] = 'unvalidated' + endpoint['validation_error'] = 'unsupported_scheme' + return + + resolved_url = _resolve_endpoint_candidate_url(endpoint) + if not resolved_url: + endpoint['validation_status'] = 'unvalidated' + endpoint['validation_error'] = 'unresolved_url' + return + + endpoint['resolved_url'] = resolved_url + probe_method = _endpoint_probe_method(method) + headers = _endpoint_probe_headers(endpoint, resolved_url, custom_header_lines) + + try: + response = requester( + probe_method, + resolved_url, + timeout=timeout, + headers=headers, + allow_redirects=False, + ) + status_code = int(getattr(response, 'status_code', 0)) + endpoint['status_code'] = status_code + endpoint['validation_status'] = ( + 'hittable' if status_code in accepted_statuses else 'not_hittable' + ) + endpoint['validation_error'] = '' + except requests.Timeout: + endpoint['validation_status'] = 'not_hittable' + endpoint['validation_error'] = 'timeout' + except Exception as exc: + endpoint['validation_status'] = 'not_hittable' + endpoint['validation_error'] = type(exc).__name__ + + if endpoints: + with ThreadPoolExecutor(max_workers=workers) as ex: + list(ex.map(_validate_one, endpoints)) + + return endpoints + + def _extract_subdomains( endpoints: list, root_domain: str, @@ -607,11 +792,18 @@ def _build_summary(results: dict) -> dict: validated['unvalidated'] += 1 filtered_stats = results.get('_filtered_stats', {}) + endpoint_validation = {'hittable': 0, 'not_hittable': 0, 'unvalidated': 0} + + for endpoint in results.get('endpoints', []): + vstatus = endpoint.get('validation_status', 'unvalidated') + endpoint_validation[vstatus] = endpoint_validation.get(vstatus, 0) + 1 return { 'secrets_by_severity': severity_counts, 'secrets_by_type': type_counts, 'validated_keys': validated, + 'endpoint_validation': endpoint_validation, + 'endpoints_hittable': endpoint_validation.get('hittable', 0), 'false_positives_filtered': filtered_stats, 'false_positives_filtered_total': sum(filtered_stats.values()), 'dependency_confusion_count': len(results.get('dependencies', [])), @@ -688,6 +880,9 @@ def run_js_recon(combined_result: dict, settings: dict) -> dict: ("JS_RECON_CUSTOM_FRAMEWORKS", "Filtering"), ("JS_RECON_VALIDATE_KEYS", "Validation"), ("JS_RECON_VALIDATION_TIMEOUT", "Validation"), + ("JS_RECON_VALIDATE_ENDPOINTS", "Endpoint validation"), + ("JS_RECON_ENDPOINT_ACCEPT_STATUS", "Endpoint validation"), + ("JS_RECON_ENDPOINT_CUSTOM_HEADERS", "Endpoint validation"), ("JS_RECON_AI_SDK_DETECTION_ENABLED", "AI surface"), ], ) @@ -742,6 +937,16 @@ def run_js_recon(combined_result: dict, settings: dict) -> dict: print(f"[*][JsRecon] Validating {len(results['secrets'])} discovered secrets...") results['secrets'] = _validate_secrets(results['secrets'], settings) + # 6. Validate endpoints + if results.get('endpoints'): + print(f"[*][JsRecon] Validating {len(results['endpoints'])} discovered endpoints...") + results['endpoints'] = _validate_extracted_endpoints(results['endpoints'], settings) + hittable_count = sum( + 1 for endpoint in results['endpoints'] + if endpoint.get('validation_status') == 'hittable' + ) + print(f"[+][JsRecon] Endpoint validation: {hittable_count} hittable") + # 6. Subdomain feedback loop root_domain = combined_result.get('domain', '') known_subs = set() diff --git a/recon/project_settings.py b/recon/project_settings.py index 90023dfd..bf4cfbf7 100644 --- a/recon/project_settings.py +++ b/recon/project_settings.py @@ -407,6 +407,9 @@ 'JS_RECON_VALIDATE_KEYS': True, 'JS_RECON_VALIDATION_TIMEOUT': 5, 'JS_RECON_EXTRACT_ENDPOINTS': True, + 'JS_RECON_VALIDATE_ENDPOINTS': True, + 'JS_RECON_ENDPOINT_ACCEPT_STATUS': [200, 201, 204, 301, 302, 307, 308, 401, 403, 405], + 'JS_RECON_ENDPOINT_CUSTOM_HEADERS': [], 'JS_RECON_REGEX_PATTERNS': True, 'JS_RECON_SOURCE_MAPS': True, 'JS_RECON_DEPENDENCY_CHECK': True, @@ -1026,6 +1029,9 @@ def fetch_project_settings(project_id: str, webapp_url: str) -> dict[str, Any]: settings['JS_RECON_CUSTOM_PACKAGES'] = project.get('jsReconCustomPackages', DEFAULT_SETTINGS['JS_RECON_CUSTOM_PACKAGES']) settings['JS_RECON_CUSTOM_ENDPOINT_KEYWORDS'] = project.get('jsReconCustomEndpointKeywords', DEFAULT_SETTINGS['JS_RECON_CUSTOM_ENDPOINT_KEYWORDS']) settings['JS_RECON_CUSTOM_FRAMEWORKS'] = project.get('jsReconCustomFrameworks', DEFAULT_SETTINGS['JS_RECON_CUSTOM_FRAMEWORKS']) + settings['JS_RECON_VALIDATE_ENDPOINTS'] = project.get('jsReconValidateEndpoints', DEFAULT_SETTINGS['JS_RECON_VALIDATE_ENDPOINTS']) + settings['JS_RECON_ENDPOINT_ACCEPT_STATUS'] = project.get('jsReconEndpointAcceptStatus') or DEFAULT_SETTINGS['JS_RECON_ENDPOINT_ACCEPT_STATUS'] + settings['JS_RECON_ENDPOINT_CUSTOM_HEADERS'] = project.get('jsReconEndpointCustomHeaders', DEFAULT_SETTINGS['JS_RECON_ENDPOINT_CUSTOM_HEADERS']) settings['JS_RECON_AI_SDK_DETECTION_ENABLED'] = project.get('jsReconAiSdkDetectionEnabled', DEFAULT_SETTINGS['JS_RECON_AI_SDK_DETECTION_ENABLED']) # FFuf Directory Fuzzer diff --git a/recon/tests/test_js_recon.py b/recon/tests/test_js_recon.py index b71a97ed..9ef8c71d 100644 --- a/recon/tests/test_js_recon.py +++ b/recon/tests/test_js_recon.py @@ -32,6 +32,8 @@ def _load_module(name, filepath): dependency = _load_module('recon.helpers.js_recon.dependency', os.path.join(BASE, 'helpers/js_recon/dependency.py')) endpoints_mod = _load_module('recon.helpers.js_recon.endpoints', os.path.join(BASE, 'helpers/js_recon/endpoints.py')) framework = _load_module('recon.helpers.js_recon.framework', os.path.join(BASE, 'helpers/js_recon/framework.py')) +ai_signal_catalog = _load_module('recon.helpers.ai_signal_catalog', os.path.join(BASE, 'helpers/ai_signal_catalog.py')) +js_recon = _load_module('recon.main_recon_modules.js_recon', os.path.join(BASE, 'main_recon_modules/js_recon.py')) def _scan(js, url='test.js', **kwargs): @@ -864,6 +866,203 @@ def test_disabled(self): self.assertEqual(endpoints_mod.extract_endpoints([], {'JS_RECON_EXTRACT_ENDPOINTS': False}), []) +class TestEndpointValidationHelpers(unittest.TestCase): + + def test_parse_endpoint_validation_headers_accepts_auth_and_cookie(self): + headers = js_recon._parse_endpoint_validation_headers([ + 'Cookie: session=abc123', + 'Authorization: Bearer token', + '', + 'malformed', + object(), + 'X-Blank: ', + ]) + + self.assertEqual(headers['Cookie'], 'session=abc123') + self.assertEqual(headers['Authorization'], 'Bearer token') + self.assertEqual(headers['User-Agent'], 'Mozilla/5.0 (compatible; RedAmon-JsRecon/1.0)') + self.assertNotIn('X-Blank', headers) + self.assertNotIn('malformed', headers) + + def test_resolve_endpoint_candidate_url_resolves_relative_from_source_js(self): + url = js_recon._resolve_endpoint_candidate_url({ + 'path': '/api/users', + 'source_js': 'https://app.example.com/static/main.js', + }) + + self.assertEqual(url, 'https://app.example.com/api/users') + + def test_resolve_endpoint_candidate_url_preserves_absolute_full_url(self): + url = js_recon._resolve_endpoint_candidate_url({ + 'full_url': 'https://api.example.com/v1/users', + 'source_js': 'https://app.example.com/static/main.js', + }) + + self.assertEqual(url, 'https://api.example.com/v1/users') + + def test_resolve_endpoint_candidate_url_returns_empty_for_uploaded_relative(self): + url = js_recon._resolve_endpoint_candidate_url({ + 'path': '/api/users', + 'source_js': 'upload://bundle.js', + }) + + self.assertEqual(url, '') + + def test_validate_extracted_endpoints_marks_accepted_status_hittable(self): + calls = [] + + class Response: + status_code = 403 + + def request_func(method, url, **kwargs): + calls.append((method, url, kwargs)) + return Response() + + endpoints = [{ + 'path': '/admin', + 'source_js': 'https://app.example.com/static/main.js', + 'method': 'post', + }] + + validated = js_recon._validate_extracted_endpoints(endpoints, { + 'JS_RECON_VALIDATE_ENDPOINTS': True, + 'JS_RECON_ENDPOINT_CUSTOM_HEADERS': ['Cookie: session=abc123'], + }, request_func=request_func) + + self.assertIs(validated, endpoints) + self.assertEqual(validated[0]['validation_status'], 'hittable') + self.assertEqual(validated[0]['status_code'], 403) + self.assertEqual(validated[0]['resolved_url'], 'https://app.example.com/admin') + self.assertEqual(validated[0]['validation_error'], '') + self.assertEqual(calls[0][0], 'OPTIONS') + self.assertEqual(calls[0][1], 'https://app.example.com/admin') + self.assertEqual(calls[0][2]['headers']['Cookie'], 'session=abc123') + self.assertFalse(calls[0][2]['allow_redirects']) + + def test_validate_extracted_endpoints_does_not_replay_mutating_methods(self): + calls = [] + + class Response: + status_code = 204 + + endpoints = [{ + 'path': '/api/user/123', + 'source_js': 'https://app.example.com/static/main.js', + 'method': 'DELETE', + }] + + validated = js_recon._validate_extracted_endpoints( + endpoints, + {'JS_RECON_VALIDATE_ENDPOINTS': True}, + request_func=lambda method, url, **kwargs: calls.append((method, url, kwargs)) or Response(), + ) + + self.assertEqual(validated[0]['validation_status'], 'hittable') + self.assertEqual(calls[0][0], 'OPTIONS') + + def test_validate_extracted_endpoints_does_not_send_custom_headers_cross_origin(self): + calls = [] + + class Response: + status_code = 200 + + endpoints = [{ + 'full_url': 'https://analytics.vendor.example/collect', + 'source_js': 'https://app.example.com/static/main.js', + 'method': 'GET', + }] + + js_recon._validate_extracted_endpoints( + endpoints, + { + 'JS_RECON_VALIDATE_ENDPOINTS': True, + 'JS_RECON_ENDPOINT_CUSTOM_HEADERS': [ + 'Cookie: session=abc123', + 'Authorization: Bearer token', + ], + }, + request_func=lambda method, url, **kwargs: calls.append((method, url, kwargs)) or Response(), + ) + + headers = calls[0][2]['headers'] + self.assertNotIn('Cookie', headers) + self.assertNotIn('Authorization', headers) + self.assertEqual(headers['User-Agent'], 'Mozilla/5.0 (compatible; RedAmon-JsRecon/1.0)') + + def test_validate_extracted_endpoints_keeps_404_as_not_hittable(self): + class Response: + status_code = 404 + + endpoints = [{ + 'full_url': 'https://api.example.com/missing', + 'method': 'GET', + }] + + validated = js_recon._validate_extracted_endpoints( + endpoints, + {'JS_RECON_VALIDATE_ENDPOINTS': True}, + request_func=lambda *args, **kwargs: Response(), + ) + + self.assertEqual(len(validated), 1) + self.assertEqual(validated[0]['full_url'], 'https://api.example.com/missing') + self.assertEqual(validated[0]['validation_status'], 'not_hittable') + self.assertEqual(validated[0]['status_code'], 404) + self.assertEqual(validated[0]['resolved_url'], 'https://api.example.com/missing') + self.assertEqual(validated[0]['validation_error'], '') + + def test_validate_extracted_endpoints_uploaded_relative_is_unvalidated(self): + calls = [] + endpoints = [{ + 'path': '/api/users', + 'source_js': 'upload://bundle.js', + }] + + validated = js_recon._validate_extracted_endpoints( + endpoints, + {'JS_RECON_VALIDATE_ENDPOINTS': True}, + request_func=lambda *args, **kwargs: calls.append((args, kwargs)), + ) + + self.assertEqual(validated[0]['validation_status'], 'unvalidated') + self.assertEqual(validated[0]['validation_error'], 'unresolved_url') + self.assertEqual(calls, []) + + def test_validate_extracted_endpoints_relative_websocket_is_unvalidated(self): + calls = [] + endpoints = [{ + 'method': 'WS', + 'type': 'websocket', + 'path': '/ws', + 'source_js': 'https://app.example.com/app.js', + }] + + validated = js_recon._validate_extracted_endpoints( + endpoints, + {'JS_RECON_VALIDATE_ENDPOINTS': True}, + request_func=lambda *args, **kwargs: calls.append((args, kwargs)), + ) + + self.assertEqual(validated[0]['validation_status'], 'unvalidated') + self.assertEqual(validated[0]['validation_error'], 'unsupported_scheme') + self.assertEqual(calls, []) + + def test_validate_extracted_endpoints_disabled_marks_unvalidated(self): + calls = [] + endpoints = [{'full_url': 'https://api.example.com/users'}] + + validated = js_recon._validate_extracted_endpoints( + endpoints, + {'JS_RECON_VALIDATE_ENDPOINTS': False}, + request_func=lambda *args, **kwargs: calls.append((args, kwargs)), + ) + + self.assertIs(validated, endpoints) + self.assertEqual(validated[0]['validation_status'], 'unvalidated') + self.assertEqual(validated[0]['validation_error'], 'validation_disabled') + self.assertEqual(calls, []) + + # ============================================================ # FRAMEWORK MODULE TESTS # ============================================================ diff --git a/tests/test_js_recon_graph_ingestion.py b/tests/test_js_recon_graph_ingestion.py new file mode 100644 index 00000000..3a511b2a --- /dev/null +++ b/tests/test_js_recon_graph_ingestion.py @@ -0,0 +1,163 @@ +import hashlib +import os +import sys +import unittest +from unittest.mock import MagicMock + + +_REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if _REPO not in sys.path: + sys.path.insert(0, _REPO) + +sys.modules.setdefault("neo4j", MagicMock()) +sys.modules.setdefault("dotenv", MagicMock()) + +from graph_db.mixins.recon.js_recon_mixin import JsReconMixin + + +class FakeSession: + def __init__(self, endpoint_created=True, endpoint_linked=True): + self.calls = [] + self.endpoint_created = endpoint_created + self.endpoint_linked = endpoint_linked + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def run(self, query, **kwargs): + self.calls.append((query, kwargs)) + if "RETURN created AS created" in query: + return FakeResult({"created": self.endpoint_created}) + if "RETURN count(r) AS linked" in query: + return FakeResult({"linked": 1 if self.endpoint_linked else 0}) + return FakeResult({"enriched": 0}) + + +class FakeResult: + def __init__(self, record): + self.record = record + + def single(self): + return self.record + + +class FakeDriver: + def __init__(self, endpoint_created=True, endpoint_linked=True): + self.session_obj = FakeSession( + endpoint_created=endpoint_created, + endpoint_linked=endpoint_linked, + ) + + def session(self): + return self.session_obj + + +class GraphClient(JsReconMixin): + def __init__(self, endpoint_created=True, endpoint_linked=True): + self.driver = FakeDriver( + endpoint_created=endpoint_created, + endpoint_linked=endpoint_linked, + ) + + +class TestJsReconGraphIngestion(unittest.TestCase): + def test_only_hittable_endpoints_are_ingested_with_validation_metadata_and_id(self): + client = GraphClient() + recon_data = { + "domain": "example.com", + "js_recon": { + "scan_metadata": {"scan_timestamp": "2026-05-28T00:00:00Z"}, + "endpoints": [ + { + "path": "/api/live", + "method": "POST", + "source_js": "https://example.com/app.js", + "base_url": "https://example.com", + "full_url": "https://example.com/api/live", + "validation_status": "hittable", + "status_code": 200, + "resolved_url": "https://example.com/api/live", + }, + { + "path": "/api/dead", + "method": "GET", + "source_js": "https://example.com/app.js", + "base_url": "https://example.com", + "validation_status": "not_hittable", + }, + { + "path": "/api/unknown", + "method": "GET", + "source_js": "https://example.com/app.js", + "base_url": "https://example.com", + }, + ], + }, + } + + stats = client.update_graph_from_js_recon(recon_data, "u1", "p1") + + endpoint_calls = [ + kwargs for query, kwargs in client.driver.session_obj.calls + if "MERGE (e:Endpoint" in query + ] + self.assertEqual(stats["endpoints_created"], 1) + self.assertEqual(len(endpoint_calls), 1) + + expected_hash = hashlib.sha256( + "https://example.com:POST:/api/live".encode() + ).hexdigest()[:16] + self.assertEqual(endpoint_calls[0]["id"], f"endpoint-u1-p1-js-{expected_hash}") + self.assertEqual(endpoint_calls[0]["validation_status"], "hittable") + self.assertEqual(endpoint_calls[0]["status_code"], 200) + self.assertEqual(endpoint_calls[0]["resolved_url"], "https://example.com/api/live") + link_calls = [ + (query, kwargs) for query, kwargs in client.driver.session_obj.calls + if "MERGE (file)-[r:HAS_ENDPOINT]->(n)" in query + ] + self.assertEqual(len(link_calls), 1) + self.assertIn("MATCH (n:Endpoint {path: $path, method: $method, baseurl: $baseurl", link_calls[0][0]) + self.assertNotIn("MATCH (n:Endpoint {id: $nid})", link_calls[0][0]) + self.assertEqual(link_calls[0][1]["path"], "/api/live") + self.assertEqual(link_calls[0][1]["method"], "POST") + self.assertEqual(link_calls[0][1]["baseurl"], "https://example.com") + self.assertEqual(stats["errors"], []) + + def test_existing_endpoint_and_unmatched_file_link_do_not_increment_counts(self): + client = GraphClient(endpoint_created=False, endpoint_linked=False) + recon_data = { + "domain": "example.com", + "js_recon": { + "scan_metadata": {"scan_timestamp": "2026-05-28T00:00:00Z"}, + "endpoints": [ + { + "path": "/api/live", + "method": "POST", + "source_js": "https://example.com/app.js", + "base_url": "https://example.com", + "full_url": "https://example.com/api/live", + "validation_status": "hittable", + "status_code": 200, + "resolved_url": "https://example.com/api/live", + }, + ], + }, + } + + stats = client.update_graph_from_js_recon(recon_data, "u1", "p1") + + self.assertEqual(stats["endpoints_created"], 0) + self.assertEqual(stats["relationships_created"], 1) + link_calls = [ + (query, kwargs) for query, kwargs in client.driver.session_obj.calls + if "MERGE (file)-[r:HAS_ENDPOINT]->(n)" in query + ] + self.assertEqual(len(link_calls), 1) + self.assertEqual(stats["errors"], []) + + +if __name__ == "__main__": + unittest.main() diff --git a/webapp/prisma/migrations/20260528190000_add_js_recon_endpoint_validation/migration.sql b/webapp/prisma/migrations/20260528190000_add_js_recon_endpoint_validation/migration.sql new file mode 100644 index 00000000..ea8ae455 --- /dev/null +++ b/webapp/prisma/migrations/20260528190000_add_js_recon_endpoint_validation/migration.sql @@ -0,0 +1,3 @@ +ALTER TABLE "projects" ADD COLUMN "js_recon_validate_endpoints" BOOLEAN NOT NULL DEFAULT true; +ALTER TABLE "projects" ADD COLUMN "js_recon_endpoint_accept_status" INTEGER[] DEFAULT ARRAY[200, 201, 204, 301, 302, 307, 308, 401, 403, 405]::INTEGER[]; +ALTER TABLE "projects" ADD COLUMN "js_recon_endpoint_custom_headers" TEXT[] DEFAULT ARRAY[]::TEXT[]; diff --git a/webapp/prisma/schema.prisma b/webapp/prisma/schema.prisma index 28364c98..2f697de8 100644 --- a/webapp/prisma/schema.prisma +++ b/webapp/prisma/schema.prisma @@ -490,6 +490,9 @@ model Project { jsReconValidateKeys Boolean @default(true) @map("js_recon_validate_keys") jsReconValidationTimeout Int @default(5) @map("js_recon_validation_timeout") jsReconExtractEndpoints Boolean @default(true) @map("js_recon_extract_endpoints") + jsReconValidateEndpoints Boolean @default(true) @map("js_recon_validate_endpoints") + jsReconEndpointAcceptStatus Int[] @default([200, 201, 204, 301, 302, 307, 308, 401, 403, 405]) @map("js_recon_endpoint_accept_status") + jsReconEndpointCustomHeaders String[] @default([]) @map("js_recon_endpoint_custom_headers") jsReconRegexPatterns Boolean @default(true) @map("js_recon_regex_patterns") jsReconSourceMaps Boolean @default(true) @map("js_recon_source_maps") jsReconDependencyCheck Boolean @default(true) @map("js_recon_dependency_check") diff --git a/webapp/src/app/api/projects/route.ts b/webapp/src/app/api/projects/route.ts index 4cf41868..25ddb90b 100644 --- a/webapp/src/app/api/projects/route.ts +++ b/webapp/src/app/api/projects/route.ts @@ -157,6 +157,7 @@ export async function POST(request: NextRequest) { 'nucleiTags', 'nucleiExcludeTags', 'httpxPaths', 'httpxCustomHeaders', 'httpxMatchCodes', 'httpxFilterCodes', 'katanaExcludePatterns', 'katanaCustomHeaders', + 'jsReconEndpointCustomHeaders', 'gauProviders', 'gauBlacklistExtensions', 'gauYearRange', 'kiterunnerWordlists', 'kiterunnerHeaders', 'kiterunnerBruteforceMethods', 'roeExcludedHosts', 'roeExcludedHostReasons', 'roeTimeWindowDays', @@ -170,6 +171,7 @@ export async function POST(request: NextRequest) { } // Int[] fields: ensure elements are numbers, not strings const INT_ARRAY_FIELDS = [ + 'jsReconEndpointAcceptStatus', 'gauVerifyAcceptStatus', 'kiterunnerIgnoreStatus', 'kiterunnerMatchStatus', ] for (const key of INT_ARRAY_FIELDS) { diff --git a/webapp/src/components/projects/ProjectForm/sections/JsReconSection.tsx b/webapp/src/components/projects/ProjectForm/sections/JsReconSection.tsx index a047edd7..e9d35a0e 100644 --- a/webapp/src/components/projects/ProjectForm/sections/JsReconSection.tsx +++ b/webapp/src/components/projects/ProjectForm/sections/JsReconSection.tsx @@ -251,6 +251,8 @@ The example above is ready to copy-paste into a .json file.`, }, ] +const DEFAULT_ENDPOINT_ACCEPT_STATUS = [200, 201, 204, 301, 302, 307, 308, 401, 403, 405] + interface JsReconSectionProps { data: FormData updateField: (field: K, value: FormData[K]) => void @@ -572,6 +574,55 @@ export function JsReconSection({ data, updateField, projectId, mode, onRun }: Js onChange={(checked) => updateField('jsReconExtractEndpoints' as any, checked)} /> +
+
+ Validate Extracted Endpoints +

Probe endpoints extracted from JS before graph write. Headers below apply only to endpoint probes.

+
+ updateField('jsReconValidateEndpoints' as any, checked)} + /> +
+ {((data as any).jsReconValidateEndpoints ?? true) && ( +
+
+ + updateField( + 'jsReconEndpointAcceptStatus' as any, + e.target.value + .split(',') + .map((value) => value.trim()) + .filter(Boolean) + .map((value) => Number(value)) + .filter(Number.isFinite) as any, + )} + /> + Comma-separated HTTP status codes accepted as live endpoints +
+
+ +