From 09189d3e14c01d5ae8aecf05271a55056448a146 Mon Sep 17 00:00:00 2001 From: Calvin Date: Tue, 26 Nov 2024 13:50:03 +0800 Subject: [PATCH 1/4] add seq number in edges --- rap/src/analysis/core/dataflow/debug.rs | 38 ++++++++++++++++++++----- rap/src/analysis/core/dataflow/graph.rs | 29 +++++++++++++++---- 2 files changed, 54 insertions(+), 13 deletions(-) diff --git a/rap/src/analysis/core/dataflow/debug.rs b/rap/src/analysis/core/dataflow/debug.rs index 5449fd8..3e49c5c 100644 --- a/rap/src/analysis/core/dataflow/debug.rs +++ b/rap/src/analysis/core/dataflow/debug.rs @@ -19,18 +19,42 @@ impl GraphEdge { let mut dot = String::new(); match self { //label=xxx - GraphEdge::NodeEdge { src: _, dst: _, op } => { - write!(attr, "label=\"{}\" ", escaped_string(format!("{:?}", op))).unwrap(); + GraphEdge::NodeEdge { + src: _, + dst: _, + op, + seq, + } => { + write!( + attr, + "label=\"{}\" ", + escaped_string(format!("{}_{:?}", seq, op)) + ) + .unwrap(); } - GraphEdge::ConstEdge { src: _, dst: _, op } => { - write!(attr, "label=\"{}\" ", escaped_string(format!("{:?}", op))).unwrap(); + GraphEdge::ConstEdge { + src: _, + dst: _, + op, + seq, + } => { + write!( + attr, + "label=\"{}\" ", + escaped_string(format!("{}_{:?}", seq, op)) + ) + .unwrap(); } } match self { - GraphEdge::NodeEdge { src, dst, op: _ } => { + GraphEdge::NodeEdge { + src, dst, op: _, .. + } => { write!(dot, "{:?} -> {:?} [{}]", src, dst, attr).unwrap(); } - GraphEdge::ConstEdge { src, dst, op: _ } => { + GraphEdge::ConstEdge { + src, dst, op: _, .. + } => { write!(dot, "{:?} -> {:?} [{}]", src, dst, attr).unwrap(); } } @@ -134,7 +158,7 @@ impl Graph { for (local, node) in self.nodes.iter_enumerated() { let node_dot = if local <= Local::from_usize(self.argc) { node.to_dot_graph(tcx, local, Some(String::from("red")), false) - } else if local <= Local::from_usize(self.n_locals) { + } else if local < Local::from_usize(self.n_locals) { node.to_dot_graph(tcx, local, None, false) } else { node.to_dot_graph(tcx, local, None, true) diff --git a/rap/src/analysis/core/dataflow/graph.rs b/rap/src/analysis/core/dataflow/graph.rs index 760cedf..33459a5 100644 --- a/rap/src/analysis/core/dataflow/graph.rs +++ b/rap/src/analysis/core/dataflow/graph.rs @@ -24,7 +24,7 @@ pub enum NodeOp { Len, Cast, BinaryOp, - CheckedBinaryOp, //deprecated in the latest(1.81) nightly rustc + CheckedBinaryOp, NullaryOp, UnaryOp, Discriminant, @@ -56,14 +56,25 @@ pub enum EdgeOp { #[derive(Clone)] pub enum GraphEdge { - NodeEdge { src: Local, dst: Local, op: EdgeOp }, - ConstEdge { src: String, dst: Local, op: EdgeOp }, + NodeEdge { + src: Local, + dst: Local, + op: EdgeOp, + seq: u32, + }, + ConstEdge { + src: String, + dst: Local, + op: EdgeOp, + seq: u32, + }, } #[derive(Clone)] pub struct GraphNode { pub op: NodeOp, - pub span: Span, + pub span: Span, //the corresponding code span + pub seq: u32, //the sequence number, edges with the same seq number are added in the same batch within a statement or terminator pub out_edges: Vec, pub in_edges: Vec, } @@ -73,6 +84,7 @@ impl GraphNode { Self { op: NodeOp::Nop, span: DUMMY_SP, + seq: 0, out_edges: vec![], in_edges: vec![], } @@ -104,14 +116,16 @@ impl Graph { } pub fn add_node_edge(&mut self, src: Local, dst: Local, op: EdgeOp) -> EdgeIdx { - let edge_idx = self.edges.push(GraphEdge::NodeEdge { src, dst, op }); + let seq = self.nodes[dst].seq; + let edge_idx = self.edges.push(GraphEdge::NodeEdge { src, dst, op, seq }); self.nodes[dst].in_edges.push(edge_idx); self.nodes[src].out_edges.push(edge_idx); edge_idx } pub fn add_const_edge(&mut self, src: String, dst: Local, op: EdgeOp) -> EdgeIdx { - let edge_idx = self.edges.push(GraphEdge::ConstEdge { src, dst, op }); + let seq = self.nodes[dst].seq; + let edge_idx = self.edges.push(GraphEdge::ConstEdge { src, dst, op, seq }); self.nodes[dst].in_edges.push(edge_idx); edge_idx } @@ -164,6 +178,7 @@ impl Graph { } let mut ret = place.local; for place_elem in place.projection { + // if there are projections, then add marker nodes ret = parse_one_step(self, ret, place_elem); } ret @@ -258,6 +273,7 @@ impl Graph { } Rvalue::RawPtr(_, _) => todo!(), }; + self.nodes[dst].seq += 1; } } @@ -279,6 +295,7 @@ impl Graph { } self.nodes[dst].op = NodeOp::Call(*def_id); self.nodes[dst].span = terminator.source_info.span; + self.nodes[dst].seq += 1; return; } } From 75a6d4ed5048c2200633e847d5e7aea6eccb05ce Mon Sep 17 00:00:00 2001 From: Calvin Date: Sat, 30 Nov 2024 16:45:09 +0800 Subject: [PATCH 2/4] fix param_return_deps and debug for closure --- rap/src/analysis/core/dataflow/debug.rs | 19 +++++++++++++++++++ rap/src/analysis/core/dataflow/graph.rs | 4 ++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/rap/src/analysis/core/dataflow/debug.rs b/rap/src/analysis/core/dataflow/debug.rs index 3e49c5c..df70901 100644 --- a/rap/src/analysis/core/dataflow/debug.rs +++ b/rap/src/analysis/core/dataflow/debug.rs @@ -120,6 +120,25 @@ impl GraphNode { .unwrap(); } } + AggKind::Closure(def_id) => { + let agg_name = tcx.def_path_str(def_id); + if is_marker { + write!( + attr, + "label=\"Clos {}\" style=dashed ", + escaped_string(agg_name) + ) + .unwrap(); + } else { + write!( + attr, + "label=\" {:?} | Clos {}\" ", + local, + escaped_string(agg_name) + ) + .unwrap(); + } + } _ => { if is_marker { write!(attr, "label=\"{:?}\" style=dashed ", agg_kind).unwrap(); diff --git a/rap/src/analysis/core/dataflow/graph.rs b/rap/src/analysis/core/dataflow/graph.rs index 33459a5..8c03789 100644 --- a/rap/src/analysis/core/dataflow/graph.rs +++ b/rap/src/analysis/core/dataflow/graph.rs @@ -380,10 +380,10 @@ impl Graph { find.get() } + // Whether there exists dataflow between each parameter and the return value pub fn param_return_deps(&self) -> IndexVec { - //the length is argc + 1, because _0 depends on _0 itself. let _0 = Local::from_usize(0); - let deps = (0..self.argc + 1) + let deps = (0..self.argc + 1) //the length is argc + 1, because _0 depends on _0 itself. .map(|i| { let _i = Local::from_usize(i); self.is_connected(_i, _0) From a0a218da4a32fcd1dcae8a2fabc0c20bfad99e9d Mon Sep 17 00:00:00 2001 From: Calvin Date: Sun, 8 Dec 2024 20:49:41 +0800 Subject: [PATCH 3/4] hash key cloning detection --- rap/src/analysis/opt.rs | 7 +- rap/src/analysis/opt/memory_cloning.rs | 1 + .../opt/memory_cloning/hash_key_cloning.rs | 138 ++++++++++++++++++ 3 files changed, 144 insertions(+), 2 deletions(-) create mode 100644 rap/src/analysis/opt/memory_cloning.rs create mode 100644 rap/src/analysis/opt/memory_cloning/hash_key_cloning.rs diff --git a/rap/src/analysis/opt.rs b/rap/src/analysis/opt.rs index fdb4301..79011bb 100644 --- a/rap/src/analysis/opt.rs +++ b/rap/src/analysis/opt.rs @@ -1,9 +1,11 @@ pub mod checking; +pub mod memory_cloning; use rustc_middle::ty::TyCtxt; use super::core::dataflow::DataFlow; -use checking::bounds_checking::check; +use checking::bounds_checking; +use memory_cloning::hash_key_cloning; pub struct Opt<'tcx> { pub tcx: TyCtxt<'tcx>, @@ -18,7 +20,8 @@ impl<'tcx> Opt<'tcx> { let mut dataflow = DataFlow::new(self.tcx, false); dataflow.build_graphs(); for (_, graph) in dataflow.graphs.iter() { - check(graph, &self.tcx); + bounds_checking::check(graph, &self.tcx); + hash_key_cloning::check(graph, &self.tcx); } } } diff --git a/rap/src/analysis/opt/memory_cloning.rs b/rap/src/analysis/opt/memory_cloning.rs new file mode 100644 index 0000000..cf66a92 --- /dev/null +++ b/rap/src/analysis/opt/memory_cloning.rs @@ -0,0 +1 @@ +pub mod hash_key_cloning; diff --git a/rap/src/analysis/opt/memory_cloning/hash_key_cloning.rs b/rap/src/analysis/opt/memory_cloning/hash_key_cloning.rs new file mode 100644 index 0000000..16de496 --- /dev/null +++ b/rap/src/analysis/opt/memory_cloning/hash_key_cloning.rs @@ -0,0 +1,138 @@ +use annotate_snippets::Level; +use annotate_snippets::Renderer; +use annotate_snippets::Snippet; +use once_cell::sync::OnceCell; + +use crate::analysis::core::dataflow::graph::DFSStatus; +use crate::analysis::core::dataflow::graph::Direction; +use crate::analysis::core::dataflow::graph::GraphEdge::NodeEdge; +use rustc_hir::{intravisit, Expr, ExprKind}; +use rustc_middle::mir::Local; +use rustc_middle::ty::{TyCtxt, TyKind, TypeckResults}; +use rustc_span::Span; +use std::collections::HashSet; +static DEFPATHS: OnceCell = OnceCell::new(); + +use crate::analysis::core::dataflow::graph::Graph; +use crate::analysis::core::dataflow::graph::GraphNode; +use crate::analysis::core::dataflow::graph::NodeOp; +use crate::analysis::utils::def_path::DefPath; +use crate::utils::log::{ + relative_pos_range, span_to_filename, span_to_line_number, span_to_source_code, +}; + +struct DefPaths { + hashset_insert: DefPath, + clone: DefPath, +} + +impl DefPaths { + pub fn new(tcx: &TyCtxt<'_>) -> Self { + Self { + hashset_insert: DefPath::new("std::collections::HashSet::insert", tcx), + clone: DefPath::new("std::clone::Clone::clone", tcx), + } + } +} + +struct HashSetInsertFinder<'tcx> { + typeck_results: &'tcx TypeckResults<'tcx>, + record: HashSet, +} + +impl<'tcx> intravisit::Visitor<'tcx> for HashSetInsertFinder<'tcx> { + fn visit_expr(&mut self, ex: &'tcx Expr<'tcx>) { + if let ExprKind::MethodCall(_, receiver, ..) = ex.kind { + let def_id = self + .typeck_results + .type_dependent_def_id(ex.hir_id) + .unwrap(); + let target_def_id = (&DEFPATHS.get().unwrap()).hashset_insert.last_def_id(); + if def_id == target_def_id { + let ty = self.typeck_results.node_type(receiver.hir_id); + if let TyKind::Adt(.., generic_args) = ty.kind() { + // we check whether the first generic arg is a ref type + if !matches!( + generic_args.get(0).unwrap().expect_ty().kind(), + TyKind::Ref(..) + ) { + self.record.insert(ex.span); + } + } + } + } + intravisit::walk_expr(self, ex); + } +} + +// check that the param of insert is moved from a cloned value +fn find_first_param_upside_clone(graph: &Graph, node: &GraphNode) -> Option { + let mut clone_node_idx = None; + let def_paths = &DEFPATHS.get().unwrap(); + let target_def_id = def_paths.clone.last_def_id(); + if let NodeEdge { src, .. } = &graph.edges[node.in_edges[1]] { + // the first param is self, so we use 1 + let mut node_operator = |idx: Local| -> DFSStatus { + let node = &graph.nodes[idx]; + if let NodeOp::Call(def_id) = node.op { + if def_id == target_def_id { + clone_node_idx = Some(idx); + return DFSStatus::Stop; + } + } + DFSStatus::Continue + }; + graph.dfs( + *src, + Direction::Upside, + &mut node_operator, + &mut Graph::equivalent_edge_validator, + false, + ); + } + clone_node_idx +} + +fn report_hash_key_cloning(graph: &Graph, clone_span: Span, insert_span: Span) { + let code_source = span_to_source_code(graph.span); + let filename = span_to_filename(clone_span); + let mut snippet = Snippet::source(&code_source) + .line_start(span_to_line_number(graph.span)) + .origin(&filename) + .fold(true) + .annotation( + Level::Error + .span(relative_pos_range(graph.span, clone_span)) + .label("Cloning happens here."), + ) + .annotation( + Level::Error + .span(relative_pos_range(graph.span, insert_span)) + .label("Used here."), + ); + let message = Level::Warning + .title("Unnecessary memory cloning detected") + .snippet(snippet); + let renderer = Renderer::styled(); + println!("{}", renderer.render(message)); +} + +pub fn check(graph: &Graph, tcx: &TyCtxt) { + let _ = &DEFPATHS.get_or_init(|| DefPaths::new(tcx)); + let def_id = graph.def_id; + let body = tcx.hir().body_owned_by(def_id.as_local().unwrap()); + let typeck_results = tcx.typeck(def_id.as_local().unwrap()); + let mut hashset_finder = HashSetInsertFinder { + typeck_results, + record: HashSet::new(), + }; + intravisit::walk_body(&mut hashset_finder, body); + for (idx, node) in graph.nodes.iter_enumerated() { + if hashset_finder.record.contains(&node.span) { + if let Some(clone_node_idx) = find_first_param_upside_clone(graph, node) { + let clone_span = graph.nodes[clone_node_idx].span; + report_hash_key_cloning(graph, clone_span, node.span); + } + } + } +} From 490bd8ae8b851c26d1c39e7b11769e7e26efcfcf Mon Sep 17 00:00:00 2001 From: Calvin Date: Sun, 8 Dec 2024 20:54:36 +0800 Subject: [PATCH 4/4] add test case for hash key cloning --- tests/hash_key_cloning/Cargo.toml | 6 ++++++ tests/hash_key_cloning/src/lib.rs | 9 +++++++++ 2 files changed, 15 insertions(+) create mode 100644 tests/hash_key_cloning/Cargo.toml create mode 100644 tests/hash_key_cloning/src/lib.rs diff --git a/tests/hash_key_cloning/Cargo.toml b/tests/hash_key_cloning/Cargo.toml new file mode 100644 index 0000000..ec315cf --- /dev/null +++ b/tests/hash_key_cloning/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "hash_key_cloning" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/tests/hash_key_cloning/src/lib.rs b/tests/hash_key_cloning/src/lib.rs new file mode 100644 index 0000000..8353234 --- /dev/null +++ b/tests/hash_key_cloning/src/lib.rs @@ -0,0 +1,9 @@ +use std::collections::HashSet; + +fn foo(a: &Vec) { + let mut b = HashSet::new(); + for i in a { + let c = i.clone(); + b.insert(c); + } +}