diff --git a/playbooks/robusta_playbooks/popeye.py b/playbooks/robusta_playbooks/popeye.py index 6a2f7186f..0a83823c0 100644 --- a/playbooks/robusta_playbooks/popeye.py +++ b/playbooks/robusta_playbooks/popeye.py @@ -179,8 +179,10 @@ def update_state(state: ScanState) -> None: delete_job_post_execution=False, process_name=False, ) + + logs = clean_up_k8s_logs_from_job_output(logs) scan = json.loads(logs) - popeye_scan = PopeyeReport(**scan["popeye"]) + scan_report = PopeyeReport(**scan["popeye"]) except Exception as e: if isinstance(e, JSONDecodeError): logging.exception(f"*Popeye scan job failed. Expecting json result.*\n\n Result:\n{logs}") @@ -201,14 +203,14 @@ def update_state(state: ScanState) -> None: type=ScanType.POPEYE, start_time=start_time, end_time=datetime.now(), - score=popeye_scan.score, + score=scan_report.score, metadata=metadata, results=[], config=f"{params.args} \n\n {params.spinach}", ) scan_issues: List[ScanReportRow] = [] - for section in popeye_scan.sanitizers or []: + for section in scan_report.sanitizers or []: kind = section.sanitizer issues_dict: Dict[str, List[Issue]] = section.issues or {} for resource, issuesList in issues_dict.items(): @@ -239,3 +241,21 @@ def update_state(state: ScanState) -> None: ) finding.add_enrichment([scan_block], annotations={EnrichmentAnnotation.SCAN: True}) event.add_finding(finding) + + +def clean_up_k8s_logs_from_job_output(logs: str) -> str: + """Remove any log messages prepended to job output by k8s.""" + # This would ideally be handled inside RobustaJob.run_simple_job_spec, but the general + # job output processing code does not assume JSON output, so identifying spurious text + # would be problematic. + # Note this code is not able to correctly handle log messages containing endlines. Doing + # so would be impossible in some cases (like "{" following and endline) and/or + # require meticulous parsing of k8s log messages. + while logs and not logs.startswith("{"): + # Assume every line not looking like JSON is log information added by k8s + endline_pos = logs.find("\n") + if endline_pos == -1: + logs = "" + else: + logs = logs[endline_pos + 1:] + return logs diff --git a/tests/test_popey_job_log_cleanup.py b/tests/test_popey_job_log_cleanup.py new file mode 100644 index 000000000..a0b0abbc7 --- /dev/null +++ b/tests/test_popey_job_log_cleanup.py @@ -0,0 +1,41 @@ +from unittest import mock + +import pytest + +from playbooks.robusta_playbooks.popeye import clean_up_k8s_logs_from_job_output + + +@pytest.mark.parametrize( + "input,expected_output", + [ + ("", ""), + ("{}", "{}"), + ("{}\nextra text", "{}\nextra text"), + ('{"popeye": []}', '{"popeye": []}'), + ('{"popeye": []}\n{}', '{"popeye": []}\n{}'), + ('{"invalid_json": {[{', '{"invalid_json": {[{'), + (" \t\n", ""), + ('\nlog line 1\n\nweird log\n{"x": 3}', '{"x": 3}'), + ( + "xxx Waited for 3.14159s due to client-side throttling, not priority and fairness, blah\n" + "\n" + '{"data": []}', + '{"data": []}', + ), + ( + "xxx Waited for 21.37s - request: some text here\n" "\n" '{"x": 123}', + '{"x": 123}', + ), + ( + "xxx Waited for 666s due to client-side throttling, not priority and fairness, blah\n" + "\n" + "aaa Waited for 777s - request: some text here\n" + "\n" + "bad logging message\n" + '{"p": "q"}', + '{"p": "q"}', + ), + ], +) +def test_clean_up_k8s_logs_from_job_output(input, expected_output): + assert clean_up_k8s_logs_from_job_output(input) == expected_output