Skip to content

Commit

Permalink
refactor(sast): add sanity checks in main SAST routine
Browse files Browse the repository at this point in the history
  • Loading branch information
sphl committed Oct 27, 2023
1 parent b25651a commit d1518d3
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 68 deletions.
6 changes: 5 additions & 1 deletion sast-fuzz/static_analysis/sast/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@ scoring:
tools: 1.0
tools:
flawfinder:
sanity_checks: 'always' # Options: always, cmake, none
path: 'python2 /opt/flawfinder-2.0.19/flawfinder.py'
checks:
- '--falsepositive'
- '--minlevel=3'
- '--neverignore'
semgrep:
sanity_checks: 'always' # Options: always, cmake, none
path: '/usr/local/bin/semgrep'
checks:
- 'r/c.lang.security.double-free.double-free'
Expand All @@ -27,6 +29,7 @@ tools:
- 'r/c.lang.security.use-after-free.use-after-free'
num_threads: 8
infer:
sanity_checks: 'always' # Options: always, cmake, none
path: '/opt/infer-1.1.0/bin/infer'
checks:
- '--no-default-checkers'
Expand All @@ -41,7 +44,7 @@ tools:
- '--uninit'
num_threads: 8
codeql:
sanity_checks: 'cmake' # Options: always, cmake, none
sanity_checks: 'always' # Options: always, cmake, none
lib_path: '/opt/codeql-2.12.0/lib'
path: '/opt/codeql-2.12.0/cli/codeql'
checks:
Expand Down Expand Up @@ -264,6 +267,7 @@ tools:
- '%LIBRARY_PATH%/cpp/ql/src/Summary/LinesOfUserCode.ql'
num_threads: 8
clang_scan:
sanity_checks: 'always' # Options: always, cmake, none
path: '/opt/llvm-12.0.0/build/bin/scan-build'
checks:
- '-disable-checker core.CallAndMessage'
Expand Down
29 changes: 12 additions & 17 deletions sast-fuzz/static_analysis/sast/src/sfa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,11 @@

import yaml


# fmt: off
class SanityChecks(Enum):
"""
Options of when to run sanity checks.
"""

ALWAYS = auto(); CMAKE = auto(); NONE = auto()
# fmt: on


ScoreWeights = namedtuple("ScoreWeights", ["flags", "tools"], defaults=[0.5, 0.5])

# SAST tool configuration
SASTToolConfig = namedtuple(
"SASTToolConfig", ["sanity_checks", "path", "checks", "num_threads"], defaults=[SanityChecks.NONE, "", "", -1]
"SASTToolConfig", ["sanity_checks", "path", "checks", "num_threads"], defaults=["", "", "", -1]
)


Expand Down Expand Up @@ -70,27 +59,33 @@ def from_yaml(cls, file: Path) -> "AppConfig":
return cls(
ScoreWeights(config["scoring"]["weights"]["flags"], config["scoring"]["weights"]["tools"]),
flawfinder=SASTToolConfig(
SanityChecks.NONE, config["tools"]["flawfinder"]["path"], config["tools"]["flawfinder"]["checks"], -1
config["tools"]["flawfinder"]["sanity_checks"],
config["tools"]["flawfinder"]["path"],
config["tools"]["flawfinder"]["checks"],
-1,
),
semgrep=SASTToolConfig(
SanityChecks.NONE,
config["tools"]["semgrep"]["sanity_checks"],
config["tools"]["semgrep"]["path"],
config["tools"]["semgrep"]["checks"],
config["tools"]["semgrep"]["num_threads"],
),
infer=SASTToolConfig(
SanityChecks.NONE,
config["tools"]["infer"]["sanity_checks"],
config["tools"]["infer"]["path"],
config["tools"]["infer"]["checks"],
config["tools"]["infer"]["num_threads"],
),
codeql=SASTToolConfig(
SanityChecks[config["tools"]["codeql"]["sanity_checks"].upper()],
config["tools"]["codeql"]["sanity_checks"],
config["tools"]["codeql"]["path"],
codeql_checks,
config["tools"]["codeql"]["num_threads"],
),
clang_scan=SASTToolConfig(
SanityChecks.NONE, config["tools"]["clang_scan"]["path"], config["tools"]["clang_scan"]["checks"], -1
config["tools"]["clang_scan"]["sanity_checks"],
config["tools"]["clang_scan"]["path"],
config["tools"]["clang_scan"]["checks"],
-1,
),
)
125 changes: 75 additions & 50 deletions sast-fuzz/static_analysis/sast/src/sfa/analysis/tool_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from tempfile import TemporaryDirectory
from typing import Callable, ClassVar, Dict, Optional

from sfa import SanityChecks, SASTToolConfig
from sfa import SASTToolConfig
from sfa.analysis import SASTFlag, SASTFlags
from sfa.utils.fs import copy_dir, find_files
from sfa.utils.proc import run_shell_command
Expand Down Expand Up @@ -57,12 +57,11 @@ def is_cmake_project(subject_dir: Path) -> bool:
return (subject_dir / "CMakeLists.txt").exists()


def convert_sarif(string: str, sanity_check: Optional[Callable[[Dict], None]] = None) -> SASTFlags:
def default_sarif_checks(string: str) -> Dict:
"""
Convert SARIF data into our SAST flag format.
Run default checks on SARIF string.
:param string:
:param sanity_check:
:return:
"""
if len(string.strip()) == 0:
Expand All @@ -73,8 +72,17 @@ def convert_sarif(string: str, sanity_check: Optional[Callable[[Dict], None]] =
if sarif_data["version"] != SARIF_VERSION:
raise ValueError(f"SARIF version {sarif_data['version']} is not supported.")

if sanity_check is not None:
sanity_check(sarif_data)
return sarif_data


def convert_sarif(string: str) -> SASTFlags:
"""
Convert SARIF data into our SAST flag format.
:param string:
:return:
"""
sarif_data = json.loads(string)

flags = SASTFlags()

Expand Down Expand Up @@ -129,6 +137,16 @@ def _analyze(self, working_dir: Path) -> str:
"""
pass

@abstractmethod
def _sanity_checks(self, string: str) -> None:
"""
Run sanity checks on SAST tool output.
:param string:
:return:
"""
pass

@abstractmethod
def _format(self, string: str) -> SASTFlags:
"""
Expand All @@ -141,7 +159,7 @@ def _format(self, string: str) -> SASTFlags:

def run(self) -> SASTFlags:
"""
Setup target program, run SAST tool, and format output.
Setup target program, run SAST tool (+ sanity checks), and format output.
:return:
"""
Expand All @@ -150,6 +168,11 @@ def run(self) -> SASTFlags:
working_dir = self._setup(Path(temp_dir))
flags = self._analyze(working_dir)

if self._config.sanity_checks == "always" or (
self._config.sanity_checks == "cmake" and self._is_cmake_project
):
self._sanity_checks(flags)

return self._format(flags)

except Exception as ex:
Expand All @@ -172,6 +195,9 @@ def _analyze(self, working_dir: Path) -> str:
f"{self._config.path} --dataonly --sarif {' '.join(self._config.checks)} {working_dir}"
)

def _sanity_checks(self, string: str) -> None:
default_sarif_checks(string)

def _format(self, string: str) -> SASTFlags:
return convert_sarif(string)

Expand All @@ -189,6 +215,9 @@ def _analyze(self, working_dir: Path) -> str:
f"{self._config.path} scan --quiet --sarif --jobs {self._config.num_threads} {' '.join([f'--config {check}' for check in self._config.checks])} {working_dir}"
)

def _sanity_checks(self, string: str) -> None:
default_sarif_checks(string)

def _format(self, string: str) -> SASTFlags:
return convert_sarif(string)

Expand Down Expand Up @@ -220,6 +249,9 @@ def _analyze(self, working_dir: Path) -> str:
# By default, Infer writes the results into the 'report.json' file once the analysis is complete.
return (working_dir / "report.json").read_text()

def _sanity_checks(self, string: str) -> None:
pass

def _format(self, string: str) -> SASTFlags:
flags = SASTFlags()

Expand All @@ -241,39 +273,6 @@ class CodeQLRunner(SASTToolRunner):
CodeQL runner.
"""

def _sanity_check(self, sarif_data: Dict) -> None:
"""
Run sanity checks on CodeQL output.
:param sarif_data:
:return:
"""
n_runs = len(sarif_data["runs"])

if n_runs == 0:
raise ValueError("No CodeQL execution runs found.")

# Let's take the last executed SAST run for the sanity check
run = sarif_data["runs"][n_runs - 1]
metrics = run["properties"].get("metricResults")

if metrics is None:
raise ValueError("No CodeQL metrics data found in SARIF file.")

loc = 0
user_loc = 0

for m in metrics:
if m["ruleId"] == "cpp/summary/lines-of-code":
loc = int(m["value"])
if m["ruleId"] == "cpp/summary/lines-of-user-code":
user_loc = int(m["value"])

logging.debug(f"Sanity-Check [CodeQL]: LoC = {loc}, User-LoC = {user_loc}")

if user_loc == 0:
raise ValueError("No user C/C++ source code found in the CodeQL database.")

def _setup(self, temp_dir: Path) -> Path:
result_dir = temp_dir / "codeql_res"

Expand Down Expand Up @@ -303,18 +302,37 @@ def _analyze(self, working_dir: Path) -> str:

return result_file.read_text()

def _format(self, string: str) -> SASTFlags:
run_sc = False
def _sanity_checks(self, string: str) -> None:
sarif_data = default_sarif_checks(string)

if self._config.sanity_checks == SanityChecks.ALWAYS or (
self._config.sanity_checks == SanityChecks.CMAKE and self._is_cmake_project
):
run_sc = True
n_runs = len(sarif_data["runs"])

if not run_sc:
return convert_sarif(string)
else:
return convert_sarif(string, self._sanity_check)
if n_runs == 0:
raise ValueError("No CodeQL execution runs found.")

# Let's take the last executed SAST run for the sanity check
run = sarif_data["runs"][n_runs - 1]
metrics = run["properties"].get("metricResults")

if metrics is None:
raise ValueError("No CodeQL metrics data found in SARIF file.")

loc = 0
user_loc = 0

for m in metrics:
if m["ruleId"] == "cpp/summary/lines-of-code":
loc = int(m["value"])
if m["ruleId"] == "cpp/summary/lines-of-user-code":
user_loc = int(m["value"])

logging.debug(f"Sanity-Check [CodeQL]: LoC = {loc}, User-LoC = {user_loc}")

if user_loc == 0:
raise ValueError("No user C/C++ source code found in the CodeQL database.")

def _format(self, string: str) -> SASTFlags:
return convert_sarif(string)


class ClangScanRunner(SASTToolRunner):
Expand All @@ -340,6 +358,10 @@ def _analyze(self, working_dir: Path) -> str:
# (JSON string) of each file as one line to the return string.
return os.linesep.join(map(lambda file: json.dumps(json.loads(file.read_text()), indent=None), result_files))

def _sanity_checks(self, string: str) -> None:
for sarif_str in string.split(os.linesep):
default_sarif_checks(sarif_str)

def _format(self, string: str) -> SASTFlags:
nested_flags = map(convert_sarif, string.split(os.linesep))

Expand Down Expand Up @@ -369,6 +391,9 @@ def _setup(self, temp_dir: Path) -> Path:
def _analyze(self, working_dir: Path) -> str:
return (working_dir / self._report_name).read_text()

def _sanity_checks(self, string: str) -> None:
pass

def _format(self, string: str) -> SASTFlags:
flags = SASTFlags()

Expand Down

0 comments on commit d1518d3

Please sign in to comment.