Skip to content

Commit

Permalink
Merge pull request #80 from Unparalleled-Calvin/main
Browse files Browse the repository at this point in the history
feat: support closure and sequence number in dataflow, add hash key cloning detection
  • Loading branch information
hxuhack authored Dec 8, 2024
2 parents 5f49347 + 490bd8a commit 6a430b3
Show file tree
Hide file tree
Showing 7 changed files with 234 additions and 17 deletions.
57 changes: 50 additions & 7 deletions rap/src/analysis/core/dataflow/debug.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,42 @@ impl GraphEdge {
let mut dot = String::new();
match self {
//label=xxx
GraphEdge::NodeEdge { src: _, dst: _, op } => {
write!(attr, "label=\"{}\" ", escaped_string(format!("{:?}", op))).unwrap();
GraphEdge::NodeEdge {
src: _,
dst: _,
op,
seq,
} => {
write!(
attr,
"label=\"{}\" ",
escaped_string(format!("{}_{:?}", seq, op))
)
.unwrap();
}
GraphEdge::ConstEdge { src: _, dst: _, op } => {
write!(attr, "label=\"{}\" ", escaped_string(format!("{:?}", op))).unwrap();
GraphEdge::ConstEdge {
src: _,
dst: _,
op,
seq,
} => {
write!(
attr,
"label=\"{}\" ",
escaped_string(format!("{}_{:?}", seq, op))
)
.unwrap();
}
}
match self {
GraphEdge::NodeEdge { src, dst, op: _ } => {
GraphEdge::NodeEdge {
src, dst, op: _, ..
} => {
write!(dot, "{:?} -> {:?} [{}]", src, dst, attr).unwrap();
}
GraphEdge::ConstEdge { src, dst, op: _ } => {
GraphEdge::ConstEdge {
src, dst, op: _, ..
} => {
write!(dot, "{:?} -> {:?} [{}]", src, dst, attr).unwrap();
}
}
Expand Down Expand Up @@ -96,6 +120,25 @@ impl GraphNode {
.unwrap();
}
}
AggKind::Closure(def_id) => {
let agg_name = tcx.def_path_str(def_id);
if is_marker {
write!(
attr,
"label=\"Clos {}\" style=dashed ",
escaped_string(agg_name)
)
.unwrap();
} else {
write!(
attr,
"label=\"<f0> {:?} | <f1> Clos {}\" ",
local,
escaped_string(agg_name)
)
.unwrap();
}
}
_ => {
if is_marker {
write!(attr, "label=\"{:?}\" style=dashed ", agg_kind).unwrap();
Expand Down Expand Up @@ -134,7 +177,7 @@ impl Graph {
for (local, node) in self.nodes.iter_enumerated() {
let node_dot = if local <= Local::from_usize(self.argc) {
node.to_dot_graph(tcx, local, Some(String::from("red")), false)
} else if local <= Local::from_usize(self.n_locals) {
} else if local < Local::from_usize(self.n_locals) {
node.to_dot_graph(tcx, local, None, false)
} else {
node.to_dot_graph(tcx, local, None, true)
Expand Down
33 changes: 25 additions & 8 deletions rap/src/analysis/core/dataflow/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub enum NodeOp {
Len,
Cast,
BinaryOp,
CheckedBinaryOp, //deprecated in the latest(1.81) nightly rustc
CheckedBinaryOp,
NullaryOp,
UnaryOp,
Discriminant,
Expand Down Expand Up @@ -56,14 +56,25 @@ pub enum EdgeOp {

#[derive(Clone)]
pub enum GraphEdge {
NodeEdge { src: Local, dst: Local, op: EdgeOp },
ConstEdge { src: String, dst: Local, op: EdgeOp },
NodeEdge {
src: Local,
dst: Local,
op: EdgeOp,
seq: u32,
},
ConstEdge {
src: String,
dst: Local,
op: EdgeOp,
seq: u32,
},
}

#[derive(Clone)]
pub struct GraphNode {
pub op: NodeOp,
pub span: Span,
pub span: Span, //the corresponding code span
pub seq: u32, //the sequence number, edges with the same seq number are added in the same batch within a statement or terminator
pub out_edges: Vec<EdgeIdx>,
pub in_edges: Vec<EdgeIdx>,
}
Expand All @@ -73,6 +84,7 @@ impl GraphNode {
Self {
op: NodeOp::Nop,
span: DUMMY_SP,
seq: 0,
out_edges: vec![],
in_edges: vec![],
}
Expand Down Expand Up @@ -104,14 +116,16 @@ impl Graph {
}

pub fn add_node_edge(&mut self, src: Local, dst: Local, op: EdgeOp) -> EdgeIdx {
let edge_idx = self.edges.push(GraphEdge::NodeEdge { src, dst, op });
let seq = self.nodes[dst].seq;
let edge_idx = self.edges.push(GraphEdge::NodeEdge { src, dst, op, seq });
self.nodes[dst].in_edges.push(edge_idx);
self.nodes[src].out_edges.push(edge_idx);
edge_idx
}

pub fn add_const_edge(&mut self, src: String, dst: Local, op: EdgeOp) -> EdgeIdx {
let edge_idx = self.edges.push(GraphEdge::ConstEdge { src, dst, op });
let seq = self.nodes[dst].seq;
let edge_idx = self.edges.push(GraphEdge::ConstEdge { src, dst, op, seq });
self.nodes[dst].in_edges.push(edge_idx);
edge_idx
}
Expand Down Expand Up @@ -164,6 +178,7 @@ impl Graph {
}
let mut ret = place.local;
for place_elem in place.projection {
// if there are projections, then add marker nodes
ret = parse_one_step(self, ret, place_elem);
}
ret
Expand Down Expand Up @@ -258,6 +273,7 @@ impl Graph {
}
Rvalue::RawPtr(_, _) => todo!(),
};
self.nodes[dst].seq += 1;
}
}

Expand All @@ -279,6 +295,7 @@ impl Graph {
}
self.nodes[dst].op = NodeOp::Call(*def_id);
self.nodes[dst].span = terminator.source_info.span;
self.nodes[dst].seq += 1;
return;
}
}
Expand Down Expand Up @@ -363,10 +380,10 @@ impl Graph {
find.get()
}

// Whether there exists dataflow between each parameter and the return value
pub fn param_return_deps(&self) -> IndexVec<Local, bool> {
//the length is argc + 1, because _0 depends on _0 itself.
let _0 = Local::from_usize(0);
let deps = (0..self.argc + 1)
let deps = (0..self.argc + 1) //the length is argc + 1, because _0 depends on _0 itself.
.map(|i| {
let _i = Local::from_usize(i);
self.is_connected(_i, _0)
Expand Down
7 changes: 5 additions & 2 deletions rap/src/analysis/opt.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
pub mod checking;
pub mod memory_cloning;

use rustc_middle::ty::TyCtxt;

use super::core::dataflow::DataFlow;
use checking::bounds_checking::check;
use checking::bounds_checking;
use memory_cloning::hash_key_cloning;

pub struct Opt<'tcx> {
pub tcx: TyCtxt<'tcx>,
Expand All @@ -18,7 +20,8 @@ impl<'tcx> Opt<'tcx> {
let mut dataflow = DataFlow::new(self.tcx, false);
dataflow.build_graphs();
for (_, graph) in dataflow.graphs.iter() {
check(graph, &self.tcx);
bounds_checking::check(graph, &self.tcx);
hash_key_cloning::check(graph, &self.tcx);
}
}
}
1 change: 1 addition & 0 deletions rap/src/analysis/opt/memory_cloning.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod hash_key_cloning;
138 changes: 138 additions & 0 deletions rap/src/analysis/opt/memory_cloning/hash_key_cloning.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
use annotate_snippets::Level;
use annotate_snippets::Renderer;
use annotate_snippets::Snippet;
use once_cell::sync::OnceCell;

use crate::analysis::core::dataflow::graph::DFSStatus;
use crate::analysis::core::dataflow::graph::Direction;
use crate::analysis::core::dataflow::graph::GraphEdge::NodeEdge;
use rustc_hir::{intravisit, Expr, ExprKind};
use rustc_middle::mir::Local;
use rustc_middle::ty::{TyCtxt, TyKind, TypeckResults};
use rustc_span::Span;
use std::collections::HashSet;
static DEFPATHS: OnceCell<DefPaths> = OnceCell::new();

use crate::analysis::core::dataflow::graph::Graph;
use crate::analysis::core::dataflow::graph::GraphNode;
use crate::analysis::core::dataflow::graph::NodeOp;
use crate::analysis::utils::def_path::DefPath;
use crate::utils::log::{
relative_pos_range, span_to_filename, span_to_line_number, span_to_source_code,
};

struct DefPaths {
hashset_insert: DefPath,
clone: DefPath,
}

impl DefPaths {
pub fn new(tcx: &TyCtxt<'_>) -> Self {
Self {
hashset_insert: DefPath::new("std::collections::HashSet::insert", tcx),
clone: DefPath::new("std::clone::Clone::clone", tcx),
}
}
}

struct HashSetInsertFinder<'tcx> {
typeck_results: &'tcx TypeckResults<'tcx>,
record: HashSet<Span>,
}

impl<'tcx> intravisit::Visitor<'tcx> for HashSetInsertFinder<'tcx> {
fn visit_expr(&mut self, ex: &'tcx Expr<'tcx>) {
if let ExprKind::MethodCall(_, receiver, ..) = ex.kind {
let def_id = self
.typeck_results
.type_dependent_def_id(ex.hir_id)
.unwrap();
let target_def_id = (&DEFPATHS.get().unwrap()).hashset_insert.last_def_id();
if def_id == target_def_id {
let ty = self.typeck_results.node_type(receiver.hir_id);
if let TyKind::Adt(.., generic_args) = ty.kind() {
// we check whether the first generic arg is a ref type
if !matches!(
generic_args.get(0).unwrap().expect_ty().kind(),
TyKind::Ref(..)
) {
self.record.insert(ex.span);
}
}
}
}
intravisit::walk_expr(self, ex);
}
}

// check that the param of insert is moved from a cloned value
fn find_first_param_upside_clone(graph: &Graph, node: &GraphNode) -> Option<Local> {
let mut clone_node_idx = None;
let def_paths = &DEFPATHS.get().unwrap();
let target_def_id = def_paths.clone.last_def_id();
if let NodeEdge { src, .. } = &graph.edges[node.in_edges[1]] {
// the first param is self, so we use 1
let mut node_operator = |idx: Local| -> DFSStatus {
let node = &graph.nodes[idx];
if let NodeOp::Call(def_id) = node.op {
if def_id == target_def_id {
clone_node_idx = Some(idx);
return DFSStatus::Stop;
}
}
DFSStatus::Continue
};
graph.dfs(
*src,
Direction::Upside,
&mut node_operator,
&mut Graph::equivalent_edge_validator,
false,
);
}
clone_node_idx
}

fn report_hash_key_cloning(graph: &Graph, clone_span: Span, insert_span: Span) {
let code_source = span_to_source_code(graph.span);
let filename = span_to_filename(clone_span);
let mut snippet = Snippet::source(&code_source)
.line_start(span_to_line_number(graph.span))
.origin(&filename)
.fold(true)
.annotation(
Level::Error
.span(relative_pos_range(graph.span, clone_span))
.label("Cloning happens here."),
)
.annotation(
Level::Error
.span(relative_pos_range(graph.span, insert_span))
.label("Used here."),
);
let message = Level::Warning
.title("Unnecessary memory cloning detected")
.snippet(snippet);
let renderer = Renderer::styled();
println!("{}", renderer.render(message));
}

pub fn check(graph: &Graph, tcx: &TyCtxt) {
let _ = &DEFPATHS.get_or_init(|| DefPaths::new(tcx));
let def_id = graph.def_id;
let body = tcx.hir().body_owned_by(def_id.as_local().unwrap());
let typeck_results = tcx.typeck(def_id.as_local().unwrap());
let mut hashset_finder = HashSetInsertFinder {
typeck_results,
record: HashSet::new(),
};
intravisit::walk_body(&mut hashset_finder, body);
for (idx, node) in graph.nodes.iter_enumerated() {
if hashset_finder.record.contains(&node.span) {
if let Some(clone_node_idx) = find_first_param_upside_clone(graph, node) {
let clone_span = graph.nodes[clone_node_idx].span;
report_hash_key_cloning(graph, clone_span, node.span);
}
}
}
}
6 changes: 6 additions & 0 deletions tests/hash_key_cloning/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[package]
name = "hash_key_cloning"
version = "0.1.0"
edition = "2021"

[dependencies]
9 changes: 9 additions & 0 deletions tests/hash_key_cloning/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use std::collections::HashSet;

fn foo(a: &Vec<String>) {
let mut b = HashSet::new();
for i in a {
let c = i.clone();
b.insert(c);
}
}

0 comments on commit 6a430b3

Please sign in to comment.