diff --git a/sast-fuzz/static_analysis/sast/config.yml b/sast-fuzz/static_analysis/sast/config.yml index f4c4192..019b64b 100644 --- a/sast-fuzz/static_analysis/sast/config.yml +++ b/sast-fuzz/static_analysis/sast/config.yml @@ -258,6 +258,9 @@ tools: - '%LIBRARY_PATH%/cpp/ql/src/Security/CWE/CWE-764/UnreleasedLock.ql' - '%LIBRARY_PATH%/cpp/ql/src/Security/CWE/CWE-807/TaintedCondition.ql' - '%LIBRARY_PATH%/cpp/ql/src/Security/CWE/CWE-835/InfiniteLoopWithUnsatisfiableExitCondition.ql' + # not actually security queries, but metrics for sanity checking: + - '%LIBRARY_PATH%/cpp/ql/src/Summary/LinesOfCode.ql' + - '%LIBRARY_PATH%/cpp/ql/src/Summary/LinesOfUserCode.ql' num_threads: 8 clang_scan: path: '/opt/llvm-12.0.0/build/bin/scan-build' diff --git a/sast-fuzz/static_analysis/sast/src/sfa/analysis/tool_runner.py b/sast-fuzz/static_analysis/sast/src/sfa/analysis/tool_runner.py index d380d27..879e6fb 100644 --- a/sast-fuzz/static_analysis/sast/src/sfa/analysis/tool_runner.py +++ b/sast-fuzz/static_analysis/sast/src/sfa/analysis/tool_runner.py @@ -57,6 +57,41 @@ def is_cmake_project(subject_dir: Path) -> bool: return (subject_dir / "CMakeLists.txt").exists() +def codeql_sanity_check(sarif_string: str) -> None: + """ + Check if the CodeQL database actually contains user code. + """ + + if len(sarif_string.strip()) == 0: + raise ValueError("Empty input / no JSON string.") + + sarif_data = json.loads(sarif_string) + + if sarif_data["version"] != SARIF_VERSION: + raise ValueError(f"SARIF version {sarif_data['version']} is not supported.") + + for run in sarif_data["runs"]: + metrics = run["properties"].get("metricResults") + if metrics is None: + logging.debug( + "CodeQL sanity check failed because required metadata was not found in the SARIF file. Make sure to include the 'Summary' queries if you want this check." + ) + return + + for metric in metrics: + if metric["ruleId"] == "cpp/summary/lines-of-code": + loc = int(metric["value"]) + elif metric["ruleId"] == "cpp/summary/lines-of-user-code": + uloc = int(metric["value"]) + + if uloc == 0: + logging.warn( + "The CodeQL database contains no user code. That usually means CodeQL did not process the build script as intended and will lead to EMPTY RESULTS." + ) + else: + logging.info(f"CodeQL picked up {loc} LoC, {uloc} of which are considered user-written code.") + + def convert_sarif(string: str) -> SASTFlags: """ Convert SARIF data into our SAST flag format. @@ -241,7 +276,7 @@ def _setup(self, temp_dir: Path) -> Path: result_dir = temp_dir / "codeql_res" run_shell_command( - f'./{BUILD_SCRIPT_NAME} "{self._config.path} database create --language=cpp --command=make --threads={self._config.num_threads} {result_dir}"', + f"{self._config.path} database create --language=cpp --command=./{BUILD_SCRIPT_NAME} --threads={self._config.num_threads} {result_dir}", cwd=copy_dir(self._subject_dir, temp_dir), env=SAST_SETUP_ENV, ) @@ -257,7 +292,9 @@ def _analyze(self, working_dir: Path) -> str: time.sleep(5) - return result_file.read_text() + sarif_string = result_file.read_text() + codeql_sanity_check(sarif_string) + return sarif_string def _format(self, string: str) -> SASTFlags: return convert_sarif(string)