Skip to content

Commit cba9d95

Browse files
committed
working krr scans
1 parent fdebc23 commit cba9d95

1 file changed

Lines changed: 104 additions & 100 deletions

File tree

  • playbooks/robusta_playbooks

playbooks/robusta_playbooks/krr.py

Lines changed: 104 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
KRR_MEMORY_LIMIT: str = os.getenv("KRR_MEMORY_LIMIT", "2Gi")
3939
KRR_MEMORY_REQUEST: str = os.getenv("KRR_MEMORY_REQUEST", "2Gi")
4040
KRR_STRATEGY: str = os.getenv("KRR_STRATEGY", "simple")
41+
KRR_PUBLISH_URL: str = os.getenv("KRR_PUBLISH_URL", f"http://{RELEASE_NAME}-runner.{INSTALLATION_NAMESPACE}.svc.{CLUSTER_DOMAIN}/api/trigger")
4142

4243

4344
SeverityType = Literal["CRITICAL", "WARNING", "OK", "GOOD", "UNKNOWN"]
@@ -300,10 +301,103 @@ def _generate_prometheus_secrets(prom_config: PrometheusConfig) -> List[KRRSecre
300301
class ProcessScanParams(ActionParams):
301302
scan_type: str
302303
result: Any
303-
304+
scan_id: str
305+
start_time: datetime
306+
304307
@action
305308
def process_scan(event: ExecutionBaseEvent, params: ProcessScanParams):
306-
logging.warning(f"Processing scan event: {params}")
309+
if params.scan_type.lower != "krr":
310+
logging.warning(f"Processing scans not supported for type: {params.scan_type}")
311+
return
312+
metadata: Dict[str, Any] = {
313+
"job": {
314+
"name": f"krr-job-{params.scan_id}",
315+
"namespace": INSTALLATION_NAMESPACE,
316+
},
317+
}
318+
try:
319+
krr_scan = KRRResponse(**params.result)
320+
except Exception as e:
321+
if isinstance(e, json.JSONDecodeError):
322+
logging.exception("*KRR scan job failed. Expecting json result.*")
323+
elif isinstance(e, TypeError):
324+
logging.exception("*KRR scan job failed.\n Error from KRR pod:\n {e}.*")
325+
else:
326+
logging.exception(f"*KRR scan job unexpected error.*\n {e}")
327+
logging.error(f"Logs: {params.result}")
328+
event.emit_event(
329+
"scan_updated",
330+
scan_id=params.scan_id,
331+
metadata=metadata,
332+
state=ScanState.FAILED,
333+
type=ScanType.KRR,
334+
start_time=params.start_time,
335+
)
336+
return
337+
else:
338+
metadata["strategy"] = krr_scan.strategy.dict() if krr_scan.strategy else None
339+
metadata["description"] = krr_scan.description
340+
metadata["errors"] = krr_scan.errors
341+
metadata["config"] = krr_scan.config
342+
metadata["cluster_summary"] = krr_scan.clusterSummary
343+
344+
scan_block = KRRScanReportBlock(
345+
title="KRR scan",
346+
scan_id=params.scan_id,
347+
type=ScanType.KRR,
348+
start_time=params.start_time,
349+
end_time=datetime.now(),
350+
score=krr_scan.score,
351+
metadata=metadata,
352+
results=[
353+
ScanReportRow(
354+
scan_id=params.scan_id,
355+
priority=scan.priority,
356+
scan_type=ScanType.KRR,
357+
namespace=scan.object.namespace,
358+
name=scan.object.name,
359+
kind=scan.object.kind,
360+
container=scan.object.container,
361+
content=[
362+
{
363+
"resource": resource,
364+
"allocated": {
365+
"request": scan.object.allocations["requests"][resource],
366+
"limit": scan.object.allocations["limits"][resource],
367+
},
368+
"recommended": {
369+
"request": scan.recommended.requests[resource].value,
370+
"limit": scan.recommended.limits[resource].value,
371+
},
372+
"priority": {
373+
"request": scan.recommended.requests[resource].priority,
374+
"limit": scan.recommended.limits[resource].priority,
375+
},
376+
"info": scan.recommended.info.get(resource),
377+
"metric": scan.metrics.get(resource).dict() if scan.metrics.get(resource) else {},
378+
"description": krr_scan.description,
379+
"strategy": krr_scan.strategy.dict() if krr_scan.strategy else None,
380+
"warnings": scan.object.warnings,
381+
"current_pod_count": scan.object.current_pod_count,
382+
}
383+
for resource in krr_scan.resources
384+
],
385+
)
386+
for scan in krr_scan.scans
387+
],
388+
config=params.json(indent=4),
389+
)
390+
391+
finding = Finding(
392+
title="KRR Report",
393+
source=FindingSource.MANUAL,
394+
aggregation_key="KrrReport",
395+
finding_type=FindingType.REPORT,
396+
failure=False,
397+
)
398+
finding.add_enrichment([scan_block], annotations={EnrichmentAnnotation.SCAN: True})
399+
event.add_finding(finding)
400+
307401

308402
@action
309403
def krr_scan(event: ExecutionBaseEvent, params: KRRParams):
@@ -319,7 +413,9 @@ def krr_scan(event: ExecutionBaseEvent, params: KRRParams):
319413
subject = event.get_subject()
320414
args_sanitized = format_event_templated_string(subject, params.args_sanitized)
321415

322-
python_command = f"python krr.py {params.strategy} {args_sanitized} {additional_flags} "
416+
publish_scan_args = f"--publish_scan_url={KRR_PUBLISH_URL} --scan_id={scan_id} --start_time=\"{datetime.now()}\""
417+
418+
python_command = f"python krr.py {params.strategy} {publish_scan_args} {args_sanitized} {additional_flags} "
323419
verbose_str = "-v" if params.krr_verbose else ""
324420
python_command += f"--max-workers {params.max_workers} {verbose_str} -f json --width 2048"
325421

@@ -346,10 +442,7 @@ def krr_scan(event: ExecutionBaseEvent, params: KRRParams):
346442
python_command += " " + _generate_additional_env_args(krr_secrets)
347443

348444
logging.info(f"krr command '{python_command}'")
349-
env_var.append(EnvVar(
350-
name="ROBUSTA_URL",
351-
value=f"http://{RELEASE_NAME}.{INSTALLATION_NAMESPACE}.svc.{CLUSTER_DOMAIN}:5000/api/trigger",
352-
))
445+
353446
resources = ResourceRequirements(
354447
limits={
355448
"memory": (str(KRR_MEMORY_LIMIT)),
@@ -375,7 +468,6 @@ def krr_scan(event: ExecutionBaseEvent, params: KRRParams):
375468
)
376469

377470
start_time = datetime.now()
378-
logs = None
379471
job_name = f"krr-job-{scan_id}"
380472
metadata: Dict[str, Any] = {
381473
"job": {
@@ -398,7 +490,7 @@ def update_state(state: ScanState) -> None:
398490

399491
try:
400492
krr_pod_labels = {"app": "krr.robusta.dev"}
401-
logs = RobustaJob.run_simple_job_spec(
493+
RobustaJob.run_simple_job_spec(
402494
spec,
403495
job_name,
404496
params.timeout,
@@ -407,97 +499,9 @@ def update_state(state: ScanState) -> None:
407499
ttl_seconds_after_finished=43200, # 12 hours
408500
delete_job_post_execution=False,
409501
process_name=False,
410-
finalizers=["robusta.dev/krr-job-output"],
411502
custom_pod_labels=krr_pod_labels,
412503
)
413-
414-
# NOTE: We need to remove the logs before the json result
415-
end_logs_string = "Result collected, displaying..." # This is the last line shown in the logs
416-
returning_result = logs.find(end_logs_string)
417-
if returning_result != -1:
418-
logs = logs[returning_result + len(end_logs_string) :]
419-
420-
# Sometimes we get warnings from the pod before the json result, so we need to remove them
421-
if "{" not in logs:
422-
raise json.JSONDecodeError("Failed to find json result in logs", "", 0)
423-
logs = logs[logs.find("{") :]
424-
425-
krr_response = json.loads(logs)
426-
krr_scan = KRRResponse(**krr_response)
427-
428-
except Exception as e:
429-
if isinstance(e, json.JSONDecodeError):
430-
logging.exception("*KRR scan job failed. Expecting json result.*")
431-
elif isinstance(e, ValidationError):
432-
logging.exception("*KRR scan job failed. Result format issue.*")
433-
elif str(e) == "Failed to reach wait condition":
434-
logging.exception(f"*KRR scan job failed. The job wait condition timed out ({params.timeout}s)*")
435-
else:
436-
logging.exception(f"*KRR scan job unexpected error.*\n {e}")
437-
438-
logging.error(f"Logs: {logs}")
504+
except Exception:
505+
logging.exception(f"Error while executing krr job: {job_name}")
439506
update_state(ScanState.FAILED)
440-
return
441-
else:
442-
metadata["strategy"] = krr_scan.strategy.dict() if krr_scan.strategy else None
443-
metadata["description"] = krr_scan.description
444-
metadata["errors"] = krr_scan.errors
445-
metadata["config"] = krr_scan.config
446-
metadata["cluster_summary"] = krr_scan.clusterSummary
447-
448-
scan_block = KRRScanReportBlock(
449-
title="KRR scan",
450-
scan_id=scan_id,
451-
type=ScanType.KRR,
452-
start_time=start_time,
453-
end_time=datetime.now(),
454-
score=krr_scan.score,
455-
metadata=metadata,
456-
results=[
457-
ScanReportRow(
458-
scan_id=scan_id,
459-
priority=scan.priority,
460-
scan_type=ScanType.KRR,
461-
namespace=scan.object.namespace,
462-
name=scan.object.name,
463-
kind=scan.object.kind,
464-
container=scan.object.container,
465-
content=[
466-
{
467-
"resource": resource,
468-
"allocated": {
469-
"request": scan.object.allocations["requests"][resource],
470-
"limit": scan.object.allocations["limits"][resource],
471-
},
472-
"recommended": {
473-
"request": scan.recommended.requests[resource].value,
474-
"limit": scan.recommended.limits[resource].value,
475-
},
476-
"priority": {
477-
"request": scan.recommended.requests[resource].priority,
478-
"limit": scan.recommended.limits[resource].priority,
479-
},
480-
"info": scan.recommended.info.get(resource),
481-
"metric": scan.metrics.get(resource).dict() if scan.metrics.get(resource) else {},
482-
"description": krr_scan.description,
483-
"strategy": krr_scan.strategy.dict() if krr_scan.strategy else None,
484-
"warnings": scan.object.warnings,
485-
"current_pod_count": scan.object.current_pod_count,
486-
}
487-
for resource in krr_scan.resources
488-
],
489-
)
490-
for scan in krr_scan.scans
491-
],
492-
config=params.json(indent=4),
493-
)
494-
495-
finding = Finding(
496-
title="KRR Report",
497-
source=FindingSource.MANUAL,
498-
aggregation_key="KrrReport",
499-
finding_type=FindingType.REPORT,
500-
failure=False,
501-
)
502-
finding.add_enrichment([scan_block], annotations={EnrichmentAnnotation.SCAN: True})
503-
event.add_finding(finding)
507+
return

0 commit comments

Comments
 (0)