Skip to content

Commit

Permalink
Merge branch 'intervaltree'
Browse files Browse the repository at this point in the history
  • Loading branch information
CedricHermansBIT committed Feb 15, 2024
2 parents 6d01192 + c7bc919 commit a5f6c82
Show file tree
Hide file tree
Showing 10 changed files with 1,478 additions and 26,793 deletions.
11 changes: 10 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,13 @@ target
*.gtf
perf.data
htseq
*.sam
*.sam
IGV*
*.dot
*.rust
*.python
test*
perf*
*.bai
*.txt
*.svg
54 changes: 54 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ edition = "2021"

[dependencies]
structopt = "0.3"
bam = "0.1.4"
bam = "0.1.4"
rand = "0.8.5"
13 changes: 13 additions & 0 deletions diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/python

import sys

file1 = sys.argv[1]
file2 = sys.argv[2]

with open(file1, 'r') as f1, open(file2, 'r') as f2:
for line1, line2 in zip(f1, f2):
if line1 != line2:
spline1 = line1.split('\t')
# split the lines on tab, print the first, fourth, sixth and last fields side by side for each file
print('\t'.join([spline1[0], spline1[2], spline1[3], spline1[5], spline1[-1].strip()]), '\t', line2.split('\t')[-1].strip())
48 changes: 48 additions & 0 deletions src/feature.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#![allow(dead_code)]

// Struct to store the features
#[derive(Debug, Eq, PartialEq, Hash, Clone, Default)]
pub struct Feature {
//type_: String,
name: String,
chr: i32,
start: i32,
end: i32,
strand: char,
}

impl Feature {
pub fn new(name: String, chr: i32, start: i32, end: i32, strand: char) -> Self {
Feature {
name,
chr,
start,
end,
strand,
}
}

pub fn name(&self) -> &str {
&self.name
}

pub fn chr(&self) -> i32 {
self.chr
}

pub fn start(&self) -> i32 {
self.start
}

pub fn end(&self) -> i32 {
self.end
}

pub fn strand(&self) -> char {
self.strand
}

pub fn set_end(&mut self, end: i32) {
self.end = end;
}
}
151 changes: 151 additions & 0 deletions src/interval.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#![allow(dead_code)]

use crate::Feature;

#[derive(Debug, Eq, PartialEq, Hash, Clone)]
pub struct Interval {
pub start: i32,
pub end: i32,
//optional data
pub data: Option<Feature>,
}

impl Interval {
pub fn new(start: i32, end: i32, data: Option<Feature>) -> Self {
Interval { start, end, data }
}

pub fn overlaps(&self, other: &Interval) -> bool {
other.start < self.end && other.end > self.start
}

pub fn overlap_size(&self, other: &Interval) -> i32 {
if self.overlaps(other) {
let start = std::cmp::max(self.start, other.start);
let end = std::cmp::min(self.end, other.end);
end - start
} else {
0
}
}

pub fn contains_point(&self, point: i32) -> bool {
self.start <= point && point <= self.end
}

pub fn range_matches(&self, other: &Interval) -> bool {
self.start == other.start && self.end == other.end
}

pub fn contains_interval(&self, other: &Interval) -> bool {
self.start <= other.start && self.end >= other.end
}

pub fn distance_to(&self, other: &Interval) -> i32 {
if self.overlaps(other) {
0
} else if self.start > other.end {
self.start - other.end
} else {
other.start - self.end
}

}

pub fn distance_to_point(&self, point: i32) -> i32 {
if self.contains_point(point) {
0
} else if self.start > point {
self.start - point
} else {
point - self.end
}
}

pub fn is_null(&self) -> bool {
self.start>=self.end
}

pub fn length(&self) -> i32 {
if self.is_null() {
0
} else {
self.end - self.start
}
}

fn raise_if_null(&self, other: &Interval) {
if self.is_null() || other.is_null(){
panic!("Cannot perform operation on null interval");
}
}
pub fn lt(&self, other: &Interval) -> bool {
// Strictly less than
self.raise_if_null(other);
self.end <= other.start
}

pub fn le(&self, other: &Interval) -> bool {
// Less than or overlaps
self.raise_if_null(other);
self.end < other.end
}

pub fn gt(&self, other: &Interval) -> bool {
// Strictly greater than
self.raise_if_null(other);
self.start > other.end
}

pub fn ge(&self, other: &Interval) -> bool {
// Greater than or overlaps
self.raise_if_null(other);
self.start >= other.start
}

pub fn overlaps_with(&self, start: i32, end: i32) -> bool {
self.start < end && self.end-1 > start
}

// as_ref() returns a reference to the Option's value
pub fn name(&self) -> Option<&str> {
self.data.as_ref().map(|f| f.name())
}

pub fn set_end(&mut self, end: i32) {
self.end = end;
self.data.as_mut().map(|f| f.set_end(end));
}

}

impl PartialOrd for Interval {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}

impl Ord for Interval {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
// check if both intervals are not null
// if self.is_null() || other.is_null() {
// // throw error
// panic!("Cannot compare null intervals");
// }

// first by start, then by end, then by data.name (alphabetically)
self.start.cmp(&other.start)
.then(self.end.cmp(&other.end))
.then(self.data.as_ref().unwrap().name().cmp(&other.data.as_ref().unwrap().name()))

}
}

impl std::fmt::Display for Interval {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match &self.data {
Some(feature) => write!(f, "Interval({}, {}, {})", self.start, self.end, feature.name()),
None => write!(f, "Interval({}, {})", self.start, self.end),
}
}
}
Loading

0 comments on commit a5f6c82

Please sign in to comment.