diff --git a/src/feature.rs b/src/feature.rs index 65e3885..f29a95c 100644 --- a/src/feature.rs +++ b/src/feature.rs @@ -41,4 +41,8 @@ impl Feature { pub fn strand(&self) -> char { self.strand } + + pub fn set_end(&mut self, end: i32) { + self.end = end; + } } \ No newline at end of file diff --git a/src/interval.rs b/src/interval.rs index 2b362d3..7103873 100644 --- a/src/interval.rs +++ b/src/interval.rs @@ -107,6 +107,16 @@ impl Interval { self.start < end && self.end-1 > start } + // as_ref() returns a reference to the Option's value + pub fn name(&self) -> Option<&str> { + self.data.as_ref().map(|f| f.name()) + } + + pub fn set_end(&mut self, end: i32) { + self.end = end; + self.data.as_mut().map(|f| f.set_end(end)); + } + } impl PartialOrd for Interval { @@ -139,5 +149,3 @@ impl std::fmt::Display for Interval { } } } - - diff --git a/src/intervaltree.rs b/src/intervaltree.rs index 8d26259..0560792 100644 --- a/src/intervaltree.rs +++ b/src/intervaltree.rs @@ -22,6 +22,39 @@ impl IntervalTree { for interval in intervals { unique_intervals.insert(interval); } + + // merge overlapping intervals with same name + use std::collections::HashMap; + + // Create a HashMap where the keys are the names of the intervals and the values are vectors of intervals with that name. + let mut grouped_intervals: HashMap> = HashMap::new(); + for interval in &unique_intervals { + grouped_intervals.entry(interval.name().unwrap().to_string()).or_insert(Vec::new()).push(interval.clone()); + } + + // For each name in the HashMap, sort the intervals by their start position and then merge the overlapping intervals. + for intervals in grouped_intervals.values_mut() { + intervals.sort_by(|a, b| a.start.cmp(&b.start)); + let mut i = 0; + while i < intervals.len() - 1 { + if intervals[i].end >= intervals[i+1].start && intervals[i+1].end >= intervals[i].end { + intervals[i].end = intervals[i+1].end; + intervals.remove(i+1); + } else { + i += 1; + } + } + } + + // Create a new HashSet of intervals from the merged intervals. + let mut unique_intervals: HashSet = HashSet::new(); + for intervals in grouped_intervals.values() { + for interval in intervals { + unique_intervals.insert(interval.clone()); + } + } + + // convert to vector //let unique_intervals: Vec = unique_intervals.into_iter().collect(); //eprintln!("filtered intervals"); @@ -91,6 +124,7 @@ impl IntervalTree { } } + pub fn add(&mut self, interval: Interval) { if interval.is_null() { panic!("IntervalTree: Null Interval objects are not allowed in IntervalTree"); @@ -300,8 +334,7 @@ impl IntervalTree { } pub fn contains(&self, start: i32, end: i32) -> Vec<&Interval> { - let mut result = self.overlap(start, end); - // TODO: merge overlapping intervals with same name into one interval + let mut result: Vec<&Interval> = self.overlap(start, end); result.retain(|x| x.start <= start && x.end >= end); result } diff --git a/src/main.rs b/src/main.rs index c5d9bbb..526e502 100755 --- a/src/main.rs +++ b/src/main.rs @@ -244,7 +244,6 @@ struct Args { _m: String, // Stranded - // TODO: implement stranded mode #[structopt( short = "s", long = "stranded", @@ -363,17 +362,17 @@ fn prepare_count_hashmap(gtf: &Vec>) -> HashMap) -> Vec> { let mut map: HashMap> = HashMap::new(); - let file = File::open(file_path).expect("Kan het bestand niet openen"); + let file = File::open(file_path).expect("Could not open this file"); let mut reader = BufReader::new(file); let mut counter = 0; let mut line = String::new(); @@ -691,6 +690,7 @@ fn process_intersection_nonempty_read(_features: &IntervalTree, _start_pos: i32, fn process_intersection_strict_read(features: &IntervalTree, start_pos: i32, end_pos: i32, strand: bool, overlapping_features: &mut Vec, args: &Args) { //todo!("process_partial_read for intersection-strict"); let new_contained = features.contains(start_pos, end_pos); + // change new_contained to a Vec<&Interval> let strand = if strand { '-' } else { '+' }; // add all contained features to the list add_stranded_features(new_contained, strand, overlapping_features, args);