Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(sast): add sanity checks in main SAST routine #22

Merged
merged 1 commit into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion sast-fuzz/static_analysis/sast/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@ scoring:
tools: 1.0
tools:
flawfinder:
sanity_checks: 'always' # Options: always, cmake, none
path: 'python2 /opt/flawfinder-2.0.19/flawfinder.py'
checks:
- '--falsepositive'
- '--minlevel=3'
- '--neverignore'
semgrep:
sanity_checks: 'always' # Options: always, cmake, none
path: '/usr/local/bin/semgrep'
checks:
- 'r/c.lang.security.double-free.double-free'
Expand All @@ -27,6 +29,7 @@ tools:
- 'r/c.lang.security.use-after-free.use-after-free'
num_threads: 8
infer:
sanity_checks: 'always' # Options: always, cmake, none
path: '/opt/infer-1.1.0/bin/infer'
checks:
- '--no-default-checkers'
Expand All @@ -41,7 +44,7 @@ tools:
- '--uninit'
num_threads: 8
codeql:
sanity_checks: 'cmake' # Options: always, cmake, none
sanity_checks: 'always' # Options: always, cmake, none
lib_path: '/opt/codeql-2.12.0/lib'
path: '/opt/codeql-2.12.0/cli/codeql'
checks:
Expand Down Expand Up @@ -264,6 +267,7 @@ tools:
- '%LIBRARY_PATH%/cpp/ql/src/Summary/LinesOfUserCode.ql'
num_threads: 8
clang_scan:
sanity_checks: 'always' # Options: always, cmake, none
path: '/opt/llvm-12.0.0/build/bin/scan-build'
checks:
- '-disable-checker core.CallAndMessage'
Expand Down
29 changes: 12 additions & 17 deletions sast-fuzz/static_analysis/sast/src/sfa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,11 @@

import yaml


# fmt: off
class SanityChecks(Enum):
"""
Options of when to run sanity checks.
"""

ALWAYS = auto(); CMAKE = auto(); NONE = auto()
# fmt: on


ScoreWeights = namedtuple("ScoreWeights", ["flags", "tools"], defaults=[0.5, 0.5])

# SAST tool configuration
SASTToolConfig = namedtuple(
"SASTToolConfig", ["sanity_checks", "path", "checks", "num_threads"], defaults=[SanityChecks.NONE, "", "", -1]
"SASTToolConfig", ["sanity_checks", "path", "checks", "num_threads"], defaults=["", "", "", -1]
)


Expand Down Expand Up @@ -70,27 +59,33 @@ def from_yaml(cls, file: Path) -> "AppConfig":
return cls(
ScoreWeights(config["scoring"]["weights"]["flags"], config["scoring"]["weights"]["tools"]),
flawfinder=SASTToolConfig(
SanityChecks.NONE, config["tools"]["flawfinder"]["path"], config["tools"]["flawfinder"]["checks"], -1
config["tools"]["flawfinder"]["sanity_checks"],
config["tools"]["flawfinder"]["path"],
config["tools"]["flawfinder"]["checks"],
-1,
),
semgrep=SASTToolConfig(
SanityChecks.NONE,
config["tools"]["semgrep"]["sanity_checks"],
config["tools"]["semgrep"]["path"],
config["tools"]["semgrep"]["checks"],
config["tools"]["semgrep"]["num_threads"],
),
infer=SASTToolConfig(
SanityChecks.NONE,
config["tools"]["infer"]["sanity_checks"],
config["tools"]["infer"]["path"],
config["tools"]["infer"]["checks"],
config["tools"]["infer"]["num_threads"],
),
codeql=SASTToolConfig(
SanityChecks[config["tools"]["codeql"]["sanity_checks"].upper()],
config["tools"]["codeql"]["sanity_checks"],
config["tools"]["codeql"]["path"],
codeql_checks,
config["tools"]["codeql"]["num_threads"],
),
clang_scan=SASTToolConfig(
SanityChecks.NONE, config["tools"]["clang_scan"]["path"], config["tools"]["clang_scan"]["checks"], -1
config["tools"]["clang_scan"]["sanity_checks"],
config["tools"]["clang_scan"]["path"],
config["tools"]["clang_scan"]["checks"],
-1,
),
)
125 changes: 75 additions & 50 deletions sast-fuzz/static_analysis/sast/src/sfa/analysis/tool_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from tempfile import TemporaryDirectory
from typing import Callable, ClassVar, Dict, Optional

from sfa import SanityChecks, SASTToolConfig
from sfa import SASTToolConfig
from sfa.analysis import SASTFlag, SASTFlags
from sfa.utils.fs import copy_dir, find_files
from sfa.utils.proc import run_shell_command
Expand Down Expand Up @@ -57,12 +57,11 @@ def is_cmake_project(subject_dir: Path) -> bool:
return (subject_dir / "CMakeLists.txt").exists()


def convert_sarif(string: str, sanity_check: Optional[Callable[[Dict], None]] = None) -> SASTFlags:
def default_sarif_checks(string: str) -> Dict:
"""
Convert SARIF data into our SAST flag format.
Run default checks on SARIF string.

:param string:
:param sanity_check:
:return:
"""
if len(string.strip()) == 0:
Expand All @@ -73,8 +72,17 @@ def convert_sarif(string: str, sanity_check: Optional[Callable[[Dict], None]] =
if sarif_data["version"] != SARIF_VERSION:
raise ValueError(f"SARIF version {sarif_data['version']} is not supported.")

if sanity_check is not None:
sanity_check(sarif_data)
return sarif_data


def convert_sarif(string: str) -> SASTFlags:
"""
Convert SARIF data into our SAST flag format.

:param string:
:return:
"""
sarif_data = json.loads(string)

flags = SASTFlags()

Expand Down Expand Up @@ -129,6 +137,16 @@ def _analyze(self, working_dir: Path) -> str:
"""
pass

@abstractmethod
def _sanity_checks(self, string: str) -> None:
"""
Run sanity checks on SAST tool output.

:param string:
:return:
"""
pass

@abstractmethod
def _format(self, string: str) -> SASTFlags:
"""
Expand All @@ -141,7 +159,7 @@ def _format(self, string: str) -> SASTFlags:

def run(self) -> SASTFlags:
"""
Setup target program, run SAST tool, and format output.
Setup target program, run SAST tool (+ sanity checks), and format output.

:return:
"""
Expand All @@ -150,6 +168,11 @@ def run(self) -> SASTFlags:
working_dir = self._setup(Path(temp_dir))
flags = self._analyze(working_dir)

if self._config.sanity_checks == "always" or (
self._config.sanity_checks == "cmake" and self._is_cmake_project
):
self._sanity_checks(flags)

return self._format(flags)

except Exception as ex:
Expand All @@ -172,6 +195,9 @@ def _analyze(self, working_dir: Path) -> str:
f"{self._config.path} --dataonly --sarif {' '.join(self._config.checks)} {working_dir}"
)

def _sanity_checks(self, string: str) -> None:
default_sarif_checks(string)

def _format(self, string: str) -> SASTFlags:
return convert_sarif(string)

Expand All @@ -189,6 +215,9 @@ def _analyze(self, working_dir: Path) -> str:
f"{self._config.path} scan --quiet --sarif --jobs {self._config.num_threads} {' '.join([f'--config {check}' for check in self._config.checks])} {working_dir}"
)

def _sanity_checks(self, string: str) -> None:
default_sarif_checks(string)

def _format(self, string: str) -> SASTFlags:
return convert_sarif(string)

Expand Down Expand Up @@ -220,6 +249,9 @@ def _analyze(self, working_dir: Path) -> str:
# By default, Infer writes the results into the 'report.json' file once the analysis is complete.
return (working_dir / "report.json").read_text()

def _sanity_checks(self, string: str) -> None:
pass

def _format(self, string: str) -> SASTFlags:
flags = SASTFlags()

Expand All @@ -241,39 +273,6 @@ class CodeQLRunner(SASTToolRunner):
CodeQL runner.
"""

def _sanity_check(self, sarif_data: Dict) -> None:
"""
Run sanity checks on CodeQL output.

:param sarif_data:
:return:
"""
n_runs = len(sarif_data["runs"])

if n_runs == 0:
raise ValueError("No CodeQL execution runs found.")

# Let's take the last executed SAST run for the sanity check
run = sarif_data["runs"][n_runs - 1]
metrics = run["properties"].get("metricResults")

if metrics is None:
raise ValueError("No CodeQL metrics data found in SARIF file.")

loc = 0
user_loc = 0

for m in metrics:
if m["ruleId"] == "cpp/summary/lines-of-code":
loc = int(m["value"])
if m["ruleId"] == "cpp/summary/lines-of-user-code":
user_loc = int(m["value"])

logging.debug(f"Sanity-Check [CodeQL]: LoC = {loc}, User-LoC = {user_loc}")

if user_loc == 0:
raise ValueError("No user C/C++ source code found in the CodeQL database.")

def _setup(self, temp_dir: Path) -> Path:
result_dir = temp_dir / "codeql_res"

Expand Down Expand Up @@ -303,18 +302,37 @@ def _analyze(self, working_dir: Path) -> str:

return result_file.read_text()

def _format(self, string: str) -> SASTFlags:
run_sc = False
def _sanity_checks(self, string: str) -> None:
sarif_data = default_sarif_checks(string)

if self._config.sanity_checks == SanityChecks.ALWAYS or (
self._config.sanity_checks == SanityChecks.CMAKE and self._is_cmake_project
):
run_sc = True
n_runs = len(sarif_data["runs"])

if not run_sc:
return convert_sarif(string)
else:
return convert_sarif(string, self._sanity_check)
if n_runs == 0:
raise ValueError("No CodeQL execution runs found.")

# Let's take the last executed SAST run for the sanity check
run = sarif_data["runs"][n_runs - 1]
metrics = run["properties"].get("metricResults")

if metrics is None:
raise ValueError("No CodeQL metrics data found in SARIF file.")

loc = 0
user_loc = 0

for m in metrics:
if m["ruleId"] == "cpp/summary/lines-of-code":
loc = int(m["value"])
if m["ruleId"] == "cpp/summary/lines-of-user-code":
user_loc = int(m["value"])

logging.debug(f"Sanity-Check [CodeQL]: LoC = {loc}, User-LoC = {user_loc}")

if user_loc == 0:
raise ValueError("No user C/C++ source code found in the CodeQL database.")

def _format(self, string: str) -> SASTFlags:
return convert_sarif(string)


class ClangScanRunner(SASTToolRunner):
Expand All @@ -340,6 +358,10 @@ def _analyze(self, working_dir: Path) -> str:
# (JSON string) of each file as one line to the return string.
return os.linesep.join(map(lambda file: json.dumps(json.loads(file.read_text()), indent=None), result_files))

def _sanity_checks(self, string: str) -> None:
for sarif_str in string.split(os.linesep):
default_sarif_checks(sarif_str)

def _format(self, string: str) -> SASTFlags:
nested_flags = map(convert_sarif, string.split(os.linesep))

Expand Down Expand Up @@ -369,6 +391,9 @@ def _setup(self, temp_dir: Path) -> Path:
def _analyze(self, working_dir: Path) -> str:
return (working_dir / self._report_name).read_text()

def _sanity_checks(self, string: str) -> None:
pass

def _format(self, string: str) -> SASTFlags:
flags = SASTFlags()

Expand Down
Loading