Skip to content

Commit

Permalink
Merge pull request #21 from sphl/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
sphl authored Oct 27, 2023
2 parents 4ce1467 + b25651a commit f6af000
Show file tree
Hide file tree
Showing 8 changed files with 171 additions and 71 deletions.
3 changes: 2 additions & 1 deletion sast-fuzz/static_analysis/sast/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ tools:
- '--uninit'
num_threads: 8
codeql:
sanity_checks: 'cmake' # Options: always, cmake, none
lib_path: '/opt/codeql-2.12.0/lib'
path: '/opt/codeql-2.12.0/cli/codeql'
checks:
Expand Down Expand Up @@ -258,7 +259,7 @@ tools:
- '%LIBRARY_PATH%/cpp/ql/src/Security/CWE/CWE-764/UnreleasedLock.ql'
- '%LIBRARY_PATH%/cpp/ql/src/Security/CWE/CWE-807/TaintedCondition.ql'
- '%LIBRARY_PATH%/cpp/ql/src/Security/CWE/CWE-835/InfiniteLoopWithUnsatisfiableExitCondition.ql'
# not actually security queries, but metrics for sanity checking:
# No security checks, but important for CodeQL debugging and sanity checks
- '%LIBRARY_PATH%/cpp/ql/src/Summary/LinesOfCode.ql'
- '%LIBRARY_PATH%/cpp/ql/src/Summary/LinesOfUserCode.ql'
num_threads: 8
Expand Down
27 changes: 23 additions & 4 deletions sast-fuzz/static_analysis/sast/src/sfa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,28 @@

from collections import namedtuple
from dataclasses import dataclass
from enum import Enum, auto
from pathlib import Path

import yaml


# fmt: off
class SanityChecks(Enum):
"""
Options of when to run sanity checks.
"""

ALWAYS = auto(); CMAKE = auto(); NONE = auto()
# fmt: on


ScoreWeights = namedtuple("ScoreWeights", ["flags", "tools"], defaults=[0.5, 0.5])

# SAST tool configuration
SASTToolConfig = namedtuple("SASTToolConfig", ["path", "checks", "num_threads"], defaults=["", "", -1])
SASTToolConfig = namedtuple(
"SASTToolConfig", ["sanity_checks", "path", "checks", "num_threads"], defaults=[SanityChecks.NONE, "", "", -1]
)


@dataclass
Expand Down Expand Up @@ -56,22 +70,27 @@ def from_yaml(cls, file: Path) -> "AppConfig":
return cls(
ScoreWeights(config["scoring"]["weights"]["flags"], config["scoring"]["weights"]["tools"]),
flawfinder=SASTToolConfig(
config["tools"]["flawfinder"]["path"], config["tools"]["flawfinder"]["checks"], -1
SanityChecks.NONE, config["tools"]["flawfinder"]["path"], config["tools"]["flawfinder"]["checks"], -1
),
semgrep=SASTToolConfig(
SanityChecks.NONE,
config["tools"]["semgrep"]["path"],
config["tools"]["semgrep"]["checks"],
config["tools"]["semgrep"]["num_threads"],
),
infer=SASTToolConfig(
SanityChecks.NONE,
config["tools"]["infer"]["path"],
config["tools"]["infer"]["checks"],
config["tools"]["infer"]["num_threads"],
),
codeql=SASTToolConfig(
config["tools"]["codeql"]["path"], codeql_checks, config["tools"]["codeql"]["num_threads"]
SanityChecks[config["tools"]["codeql"]["sanity_checks"].upper()],
config["tools"]["codeql"]["path"],
codeql_checks,
config["tools"]["codeql"]["num_threads"],
),
clang_scan=SASTToolConfig(
config["tools"]["clang_scan"]["path"], config["tools"]["clang_scan"]["checks"], -1
SanityChecks.NONE, config["tools"]["clang_scan"]["path"], config["tools"]["clang_scan"]["checks"], -1
),
)
111 changes: 64 additions & 47 deletions sast-fuzz/static_analysis/sast/src/sfa/analysis/tool_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
from itertools import chain
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import ClassVar, Dict
from typing import Callable, ClassVar, Dict, Optional

from sfa import SASTToolConfig
from sfa import SanityChecks, SASTToolConfig
from sfa.analysis import SASTFlag, SASTFlags
from sfa.utils.fs import copy_dir, find_files
from sfa.utils.proc import run_shell_command
Expand Down Expand Up @@ -57,46 +57,12 @@ def is_cmake_project(subject_dir: Path) -> bool:
return (subject_dir / "CMakeLists.txt").exists()


def codeql_sanity_check(sarif_string: str) -> None:
"""
Check if the CodeQL database actually contains user code.
"""

if len(sarif_string.strip()) == 0:
raise ValueError("Empty input / no JSON string.")

sarif_data = json.loads(sarif_string)

if sarif_data["version"] != SARIF_VERSION:
raise ValueError(f"SARIF version {sarif_data['version']} is not supported.")

for run in sarif_data["runs"]:
metrics = run["properties"].get("metricResults")
if metrics is None:
logging.debug(
"CodeQL sanity check failed because required metadata was not found in the SARIF file. Make sure to include the 'Summary' queries if you want this check."
)
return

for metric in metrics:
if metric["ruleId"] == "cpp/summary/lines-of-code":
loc = int(metric["value"])
elif metric["ruleId"] == "cpp/summary/lines-of-user-code":
uloc = int(metric["value"])

if uloc == 0:
logging.warn(
"The CodeQL database contains no user code. That usually means CodeQL did not process the build script as intended and will lead to EMPTY RESULTS."
)
else:
logging.info(f"CodeQL picked up {loc} LoC, {uloc} of which are considered user-written code.")


def convert_sarif(string: str) -> SASTFlags:
def convert_sarif(string: str, sanity_check: Optional[Callable[[Dict], None]] = None) -> SASTFlags:
"""
Convert SARIF data into our SAST flag format.
:param string:
:param sanity_check:
:return:
"""
if len(string.strip()) == 0:
Expand All @@ -107,6 +73,9 @@ def convert_sarif(string: str) -> SASTFlags:
if sarif_data["version"] != SARIF_VERSION:
raise ValueError(f"SARIF version {sarif_data['version']} is not supported.")

if sanity_check is not None:
sanity_check(sarif_data)

flags = SASTFlags()

for run in sarif_data["runs"]:
Expand Down Expand Up @@ -272,14 +241,54 @@ class CodeQLRunner(SASTToolRunner):
CodeQL runner.
"""

def _sanity_check(self, sarif_data: Dict) -> None:
"""
Run sanity checks on CodeQL output.
:param sarif_data:
:return:
"""
n_runs = len(sarif_data["runs"])

if n_runs == 0:
raise ValueError("No CodeQL execution runs found.")

# Let's take the last executed SAST run for the sanity check
run = sarif_data["runs"][n_runs - 1]
metrics = run["properties"].get("metricResults")

if metrics is None:
raise ValueError("No CodeQL metrics data found in SARIF file.")

loc = 0
user_loc = 0

for m in metrics:
if m["ruleId"] == "cpp/summary/lines-of-code":
loc = int(m["value"])
if m["ruleId"] == "cpp/summary/lines-of-user-code":
user_loc = int(m["value"])

logging.debug(f"Sanity-Check [CodeQL]: LoC = {loc}, User-LoC = {user_loc}")

if user_loc == 0:
raise ValueError("No user C/C++ source code found in the CodeQL database.")

def _setup(self, temp_dir: Path) -> Path:
result_dir = temp_dir / "codeql_res"

run_shell_command(
f"{self._config.path} database create --language=cpp --command=./{BUILD_SCRIPT_NAME} --threads={self._config.num_threads} {result_dir}",
cwd=copy_dir(self._subject_dir, temp_dir),
env=SAST_SETUP_ENV,
)
if self._is_cmake_project:
run_shell_command(
f"{self._config.path} database create --language=cpp --command=./{BUILD_SCRIPT_NAME} --threads={self._config.num_threads} {result_dir}",
cwd=copy_dir(self._subject_dir, temp_dir),
env=SAST_SETUP_ENV,
)
else:
run_shell_command(
f'./{BUILD_SCRIPT_NAME} "{self._config.path} database create --language=cpp --command=make --threads={self._config.num_threads} {result_dir}"',
cwd=copy_dir(self._subject_dir, temp_dir),
env=SAST_SETUP_ENV,
)

return result_dir

Expand All @@ -292,12 +301,20 @@ def _analyze(self, working_dir: Path) -> str:

time.sleep(5)

sarif_string = result_file.read_text()
codeql_sanity_check(sarif_string)
return sarif_string
return result_file.read_text()

def _format(self, string: str) -> SASTFlags:
return convert_sarif(string)
run_sc = False

if self._config.sanity_checks == SanityChecks.ALWAYS or (
self._config.sanity_checks == SanityChecks.CMAKE and self._is_cmake_project
):
run_sc = True

if not run_sc:
return convert_sarif(string)
else:
return convert_sarif(string, self._sanity_check)


class ClangScanRunner(SASTToolRunner):
Expand Down
15 changes: 12 additions & 3 deletions scripts/crash-dedup/src/cdd/container/san.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,18 +92,27 @@ def __init__(self, input_id: str, san: str, vtype: str, stack_trace: StackTrace)
self.vtype = vtype
self.stack_trace = stack_trace

def sorting_key(self, n_frames: Optional[int] = None, consider_lines: bool = False) -> Tuple:
def sorting_key(
self, n_frames: Optional[int] = None, consider_filepaths: bool = False, consider_lines: bool = False
) -> Tuple:
"""
Get sorting key for grouping sanitizer outputs.
:param n_frames:
:param consider_filepaths:
:param consider_lines:
:return:
"""
stack_trace = self.stack_trace if n_frames is None else self.stack_trace[:n_frames]

if not consider_lines:
stack_trace = [(t.id, t.file, t.function) for t in stack_trace] # type: ignore
if consider_filepaths:
if not consider_lines:
stack_trace = [(t.id, t.file, t.function) for t in stack_trace] # type: ignore
else:
if not consider_lines:
stack_trace = [(t.id, t.function) for t in stack_trace] # type: ignore
else:
stack_trace = [(t.id, t.function, t.line) for t in stack_trace] # type: ignore

return self.san, self.vtype, stack_trace

Expand Down
11 changes: 9 additions & 2 deletions scripts/crash-dedup/src/cdd/container/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,14 @@ class DedupSummary:
"""

def __init__(
self, n_frames: Optional[int], consider_lines: bool, summary: Optional[List[DedupEntry]] = None
self,
n_frames: Optional[int],
consider_filepaths: bool,
consider_lines: bool,
summary: Optional[List[DedupEntry]] = None,
) -> None:
self.n_frames = n_frames
self.consider_filepaths = consider_filepaths
self.consider_lines = consider_lines
self.summary = summary or []

Expand All @@ -44,7 +49,8 @@ def add(self, id: int, key: Tuple, elems: List[SanitizerOutput]) -> None:
def to_csv(self, file: Path) -> None:
with file.open("w+") as csv_file:
csv_file.write(
"bug_id,n_dedup_frames,consider_lines,input_name,sanitizer,vuln_type,stack_trace,n_frames" + os.linesep
"bug_id,n_dedup_frames,consider_filepaths,consider_lines,input_name,sanitizer,vuln_type,stack_trace,n_frames"
+ os.linesep
)

for entry in self.summary:
Expand All @@ -57,6 +63,7 @@ def to_csv(self, file: Path) -> None:
(
str(entry.bug_id),
frame_str,
str(self.consider_filepaths),
str(self.consider_lines),
str(san_output.input_id).replace(CSV_SEP, "-"),
san_output.san,
Expand Down
9 changes: 7 additions & 2 deletions scripts/crash-dedup/src/cdd/grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,25 @@


def group_by(
sanitizer_infos: List[SanitizerOutput], n_frames: Optional[int] = None, consider_lines: bool = False
sanitizer_infos: List[SanitizerOutput],
n_frames: Optional[int] = None,
consider_filepaths: bool = False,
consider_lines: bool = False,
) -> DedupSummary:
"""
Group/deduplicate sanitizer outputs.
:param sanitizer_infos:
:param n_frames:
:param consider_filepaths:
:param consider_lines:
:return:
"""
keyfunc = lambda s: s.sorting_key(n_frames, consider_lines)
keyfunc = lambda s: s.sorting_key(n_frames, consider_filepaths, consider_lines)

return DedupSummary(
n_frames,
consider_filepaths,
consider_lines,
[
DedupEntry(i, k, list(g))
Expand Down
12 changes: 10 additions & 2 deletions scripts/crash-dedup/src/cdd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ def main(
help="Number(s) of stack frames to be included in deduplication. Note: If not specified, all frames are considered.",
),
] = None,
consider_filepaths: Annotated[
bool,
typer.Option(
"--consider-filepaths",
is_flag=True,
help="Consider the filepaths within the stack frames for deduplication.",
),
] = False,
consider_lines: Annotated[
bool,
typer.Option(
Expand Down Expand Up @@ -150,7 +158,7 @@ def main(
logging.error(ex)

for n_frames in set(n_frames_list or [None]): # type: ignore
summary_file = output_dir / f"summary{'' if n_frames is None else '_nframes' + str(n_frames)}.csv"
summary_file = output_dir / f"summary{'' if n_frames is None else '_nf' + str(n_frames)}.csv"

summary = group_by(sanitizer_infos, n_frames, consider_lines)
summary = group_by(sanitizer_infos, n_frames, consider_filepaths, consider_lines)
summary.to_csv(summary_file)
Loading

0 comments on commit f6af000

Please sign in to comment.