Skip to content

Commit

Permalink
Handle k8s-prepended log messages in Popey job scan results (#1468)
Browse files Browse the repository at this point in the history
* Handle k8s-prepended log messages in Popey job scan results

* simplify clean_up_k8s_logs_from_job_output
  • Loading branch information
Robert Szefler authored Jun 21, 2024
1 parent 57be0b5 commit 3d72712
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 3 deletions.
26 changes: 23 additions & 3 deletions playbooks/robusta_playbooks/popeye.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,10 @@ def update_state(state: ScanState) -> None:
delete_job_post_execution=False,
process_name=False,
)

logs = clean_up_k8s_logs_from_job_output(logs)
scan = json.loads(logs)
popeye_scan = PopeyeReport(**scan["popeye"])
scan_report = PopeyeReport(**scan["popeye"])
except Exception as e:
if isinstance(e, JSONDecodeError):
logging.exception(f"*Popeye scan job failed. Expecting json result.*\n\n Result:\n{logs}")
Expand All @@ -201,14 +203,14 @@ def update_state(state: ScanState) -> None:
type=ScanType.POPEYE,
start_time=start_time,
end_time=datetime.now(),
score=popeye_scan.score,
score=scan_report.score,
metadata=metadata,
results=[],
config=f"{params.args} \n\n {params.spinach}",
)

scan_issues: List[ScanReportRow] = []
for section in popeye_scan.sanitizers or []:
for section in scan_report.sanitizers or []:
kind = section.sanitizer
issues_dict: Dict[str, List[Issue]] = section.issues or {}
for resource, issuesList in issues_dict.items():
Expand Down Expand Up @@ -239,3 +241,21 @@ def update_state(state: ScanState) -> None:
)
finding.add_enrichment([scan_block], annotations={EnrichmentAnnotation.SCAN: True})
event.add_finding(finding)


def clean_up_k8s_logs_from_job_output(logs: str) -> str:
"""Remove any log messages prepended to job output by k8s."""
# This would ideally be handled inside RobustaJob.run_simple_job_spec, but the general
# job output processing code does not assume JSON output, so identifying spurious text
# would be problematic.
# Note this code is not able to correctly handle log messages containing endlines. Doing
# so would be impossible in some cases (like "{" following and endline) and/or
# require meticulous parsing of k8s log messages.
while logs and not logs.startswith("{"):
# Assume every line not looking like JSON is log information added by k8s
endline_pos = logs.find("\n")
if endline_pos == -1:
logs = ""
else:
logs = logs[endline_pos + 1:]
return logs
41 changes: 41 additions & 0 deletions tests/test_popey_job_log_cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from unittest import mock

import pytest

from playbooks.robusta_playbooks.popeye import clean_up_k8s_logs_from_job_output


@pytest.mark.parametrize(
"input,expected_output",
[
("", ""),
("{}", "{}"),
("{}\nextra text", "{}\nextra text"),
('{"popeye": []}', '{"popeye": []}'),
('{"popeye": []}\n{}', '{"popeye": []}\n{}'),
('{"invalid_json": {[{', '{"invalid_json": {[{'),
(" \t\n", ""),
('\nlog line 1\n\nweird log\n{"x": 3}', '{"x": 3}'),
(
"xxx Waited for 3.14159s due to client-side throttling, not priority and fairness, blah\n"
"\n"
'{"data": []}',
'{"data": []}',
),
(
"xxx Waited for 21.37s - request: some text here\n" "\n" '{"x": 123}',
'{"x": 123}',
),
(
"xxx Waited for 666s due to client-side throttling, not priority and fairness, blah\n"
"\n"
"aaa Waited for 777s - request: some text here\n"
"\n"
"bad logging message\n"
'{"p": "q"}',
'{"p": "q"}',
),
],
)
def test_clean_up_k8s_logs_from_job_output(input, expected_output):
assert clean_up_k8s_logs_from_job_output(input) == expected_output

0 comments on commit 3d72712

Please sign in to comment.