diff --git a/Cargo.toml b/Cargo.toml index cb955d5..61b5b54 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,5 +36,9 @@ harness = false name = "ancestors" harness = false +[[bench]] +name = "parser" +harness = false + [profile.release] lto = true diff --git a/examples/compare_ontologies.rs b/examples/compare_ontologies.rs index 3b39ca6..f32dd3d 100644 --- a/examples/compare_ontologies.rs +++ b/examples/compare_ontologies.rs @@ -16,7 +16,7 @@ fn ontology(path_arg: &str) -> Ontology { } } -fn ontology2(path_arg: &str) -> Ontology { +fn ontology_transitive(path_arg: &str) -> Ontology { let path = Path::new(path_arg); match path.is_file() { @@ -213,17 +213,21 @@ fn print_replacement_diff(replacements: Option<(Option, Option "); + println!("Usage:\ncompare_ontologies []"); println!("e.g.:\ncompare_ontologies tests/ontology.hpo tests/ontology_v2.hpo:\n"); + println!("Alternatively compare transitive vs non-transitive:\ncompare_ontologies tests/ontology.hpo\n"); process::exit(1) } let arg_old = args.nth(1).unwrap(); - let arg_new = args.next().unwrap(); - let lhs = ontology(&arg_old); - let rhs = ontology2(&arg_new); + + let rhs = if let Some(arg_new) = args.next() { + ontology(&arg_new) + } else { + ontology_transitive(&arg_old) + }; let diffs = lhs.compare(&rhs); diff --git a/src/lib.rs b/src/lib.rs index ee1a777..b28dd60 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,6 +58,9 @@ pub enum HpoError { /// Failed to convert an integer to a float #[error("cannot convert int to float")] TryFromIntError(#[from] std::num::TryFromIntError), + /// Failed to parse a line of input data from the JAX obo + #[error("invalid input data: {0}")] + InvalidInput(String), } impl From for HpoError { diff --git a/src/ontology.rs b/src/ontology.rs index 20875f6..5a3f66d 100644 --- a/src/ontology.rs +++ b/src/ontology.rs @@ -1,5 +1,5 @@ use core::fmt::Debug; -use std::collections::hash_map::Values; +use std::collections::hash_map::{Entry, Values}; use std::collections::{HashMap, HashSet}; use std::fs::File; use std::io::Read; @@ -366,7 +366,7 @@ impl Ontology { /// /// - Actual OBO data: [`hp.obo`](https://hpo.jax.org/app/data/ontology) /// - Links between HPO and OMIM diseases: [`phenotype.hpoa`](https://hpo.jax.org/app/data/annotations) - /// - Links between HPO and Genes: [`phenotype_to_genes.txt`](http://purl.obolibrary.org/obo/hp/hpoa/phenotype_to_genes.txt) + /// - Links between HPO and Genes: [`genes_to_phenotype.txt`](http://purl.obolibrary.org/obo/hp/hpoa/genes_to_phenotype.txt) /// /// and then specify the folder where the data is stored. /// @@ -424,7 +424,7 @@ impl Ontology { /// /// - Actual OBO data: [`hp.obo`](https://hpo.jax.org/app/data/ontology) /// - Links between HPO and OMIM diseases: [`phenotype.hpoa`](https://hpo.jax.org/app/data/annotations) - /// - Links between HPO and Genes: [`genes_to_phenotypes.txt`](http://purl.obolibrary.org/obo/hp/hpoa/genes_to_phenotype.txt) + /// - Links between HPO and Genes: [`phenotype_to_genes.txt`](http://purl.obolibrary.org/obo/hp/hpoa/phenotype_to_genes.txt) /// /// and then specify the folder where the data is stored. /// @@ -898,11 +898,12 @@ impl Ontology { /// /// ``` /// use hpo::Ontology; + /// use hpo::annotations::GeneId; /// /// let ontology_1 = Ontology::from_binary("tests/example.hpo").unwrap(); /// let mut ontology_2 = Ontology::default(); /// - /// ontology_2.add_gene("FOOBAR", "666666").unwrap(); + /// ontology_2.add_gene("FOOBAR", GeneId::from(666666)); /// /// let compare = ontology_1.compare(&ontology_2); /// assert_eq!(compare.added_hpo_terms().len(), 0); @@ -974,13 +975,13 @@ impl Ontology { if matched_terms.is_empty() { continue; } - let gene_id = ont.add_gene( + ont.add_gene( self.gene(gene.id()).ok_or(HpoError::DoesNotExist)?.name(), - &gene.id().as_u32().to_string(), - )?; + *gene.id(), + ); for term in &matched_terms { - ont.link_gene_term(term, gene_id)?; - ont.gene_mut(&gene_id) + ont.link_gene_term(term, *gene.id())?; + ont.gene_mut(gene.id()) .ok_or(HpoError::DoesNotExist)? .add_term(term); } @@ -1270,7 +1271,7 @@ impl Ontology { } } - /// Add a gene to the Ontology. and return the [`GeneId`] + /// Add a gene to the Ontology /// /// If the gene does not yet exist, a new [`Gene`] entity is created /// and stored in the Ontology. @@ -1281,19 +1282,19 @@ impl Ontology { /// Adding a gene does not connect it to any HPO terms. /// Use [`Ontology::link_gene_term`] for creating connections. /// - /// # Errors - /// - /// If the `gene_id` is invalid, an [`HpoError::ParseIntError`] is returned + /// This method was changed to receive the `gene_id` as [`GeneId`] + /// instead of `str` in `0.10` and does not return a `Result` anymore. /// /// # Examples /// /// ``` /// use hpo::Ontology; + /// use hpo::annotations::GeneId; /// /// let mut ontology = Ontology::default(); /// assert!(ontology.gene(&1u32.into()).is_none()); /// - /// ontology.add_gene("Foo", "1"); + /// ontology.add_gene("Foo", GeneId::from(1)); /// /// // Genes can be iterated... /// let mut gene_iterator = ontology.genes(); @@ -1304,14 +1305,9 @@ impl Ontology { /// // .. or accessed directly /// assert!(ontology.gene(&1u32.into()).is_some()); /// ``` - pub fn add_gene(&mut self, gene_name: &str, gene_id: &str) -> HpoResult { - let id = GeneId::try_from(gene_id)?; - match self.genes.entry(id) { - std::collections::hash_map::Entry::Occupied(_) => Ok(id), - std::collections::hash_map::Entry::Vacant(entry) => { - entry.insert(Gene::new(id, gene_name)); - Ok(id) - } + pub fn add_gene(&mut self, gene_name: &str, gene_id: GeneId) { + if let Entry::Vacant(entry) = self.genes.entry(gene_id) { + entry.insert(Gene::new(gene_id, gene_name)); } } @@ -1383,7 +1379,7 @@ impl Ontology { /// /// let mut ontology = Ontology::default(); /// ontology.insert_term("Term-Foo".into(), 1u32); - /// ontology.add_gene("Foo", "5"); + /// ontology.add_gene("Foo", GeneId::from(5)); /// ontology.link_gene_term(1u32, GeneId::from(5u32)).unwrap(); /// /// let term = ontology.hpo(1u32).unwrap(); diff --git a/src/parser.rs b/src/parser.rs index 716072c..a2d3c4c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -8,9 +8,15 @@ pub(crate) mod binary; /// Module to parse `hp.obo` file pub(crate) mod hp_obo; -/// Module to parse HPO - `Gene` associations from `genes_to_phenotype.txt` file -pub(crate) mod genes_to_phenotype { +/// Module to parse HPO - `Gene` associations +/// +/// It contains functions to parse `genes_to_phenotype.txt` and +/// `phenotype_to_genes.txt` input files +pub(crate) mod gene_to_hpo { + + use crate::annotations::GeneId; use crate::parser::Path; + use crate::HpoError; use crate::HpoResult; use std::fs::File; use std::io::BufRead; @@ -19,67 +25,323 @@ pub(crate) mod genes_to_phenotype { use crate::HpoTermId; use crate::Ontology; - /// Quick and dirty parser for development and debugging - pub fn parse>(file: P, ontology: &mut Ontology) -> HpoResult<()> { - let file = File::open(file).unwrap(); - let reader = BufReader::new(file); - for line in reader.lines() { - let line = line.unwrap(); - // TODO: Check for the header outside of the `lines` iterator - if line.starts_with('#') || line.starts_with("ncbi_gene_id") { - continue; - } - let cols: Vec<&str> = line.trim().split('\t').collect(); - let gene_id = ontology.add_gene(cols[1], cols[0])?; - let term_id = HpoTermId::try_from(cols[2])?; - ontology.link_gene_term(term_id, gene_id)?; + struct ParsedGene<'a> { + ncbi_id: GeneId, + symbol: &'a str, + hpo: HpoTermId, + } - ontology - .gene_mut(&gene_id) - .expect("Cannot find gene {gene_id}") - .add_term(term_id); + impl<'a> ParsedGene<'a> { + fn try_new(ncbi_id: &'a str, symbol: &'a str, hpo: &'a str) -> HpoResult { + let hpo = HpoTermId::try_from(hpo)?; + let ncbi_id = GeneId::try_from(ncbi_id)?; + Ok(Self { + ncbi_id, + symbol, + hpo, + }) + } + } + + /// Removes the first (header) line. + /// + /// TODO: Update this once is stable + fn remove_header(reader: &mut R) -> HpoResult<()> { + let mut trash = String::with_capacity(80); + reader.read_line(&mut trash).map_err(|_| { + HpoError::InvalidInput("Invalid data in genes_to_phenotype.txt".to_string()) + })?; + if !trash.starts_with('#') + && !trash.starts_with("ncbi_gene_id") + && !trash.starts_with("hpo_id") + { + return Err(HpoError::InvalidInput( + "genes_to_phenotype.txt file must contain a header".to_string(), + )); } Ok(()) } -} -/// Module to parse HPO - `Gene` associations from `phenotype_to_genes.txt` file -pub(crate) mod phenotype_to_genes { - use crate::parser::Path; - use crate::HpoResult; - use std::fs::File; - use std::io::BufRead; - use std::io::BufReader; + /// Parses a single line of `genes_to_phenotype.txt` + /// + /// and returns a `ParsedGene` struct with gene and HPO info + /// + /// ```text + /// 10 NAT2 HP:0000007 Autosomal recessive inheritance - OMIM:243400 + /// ``` + fn genes_to_phenotype_line(line: &str) -> HpoResult> { + let mut cols = line.split('\t'); - use crate::HpoTermId; - use crate::Ontology; + // Column 1 is the NCBI-ID of the gene + let Some(ncbi_id) = cols.next() else { + return Err(HpoError::InvalidInput(line.to_string())); + }; + + // Column 2 is the gene symbol + let Some(symbol) = cols.next() else { + return Err(HpoError::InvalidInput(line.to_string())); + }; + + // Column 3 is the Hpo Term ID + let Some(hpo) = cols.next() else { + return Err(HpoError::InvalidInput(line.to_string())); + }; + + ParsedGene::try_new(ncbi_id, symbol, hpo) + } + + /// Parses a single line of `phenotype_to_genes.txt` + /// + /// ```text + /// HP:0000002 Abnormality of body height 81848 SPRY4 orphadata ORPHA:432 + /// ``` + fn phenotype_to_gene_line(line: &str) -> HpoResult> { + let mut cols = line.split('\t'); + + // Column 1 is the Hpo Term ID + let Some(hpo) = cols.next() else { + return Err(HpoError::InvalidInput(line.to_string())); + }; + + // Column 2 is the HPO-name, which we don't need + if cols.next().is_none() { + return Err(HpoError::InvalidInput(line.to_string())); + }; + + // Column 3 is the NCBI-ID of the gene + let Some(ncbi_id) = cols.next() else { + return Err(HpoError::InvalidInput(line.to_string())); + }; + + // Column 4 is the gene symbol + let Some(symbol) = cols.next() else { + return Err(HpoError::InvalidInput(line.to_string())); + }; + + ParsedGene::try_new(ncbi_id, symbol, hpo) + } + + /// Parse `genes_to_phenotype.txt` file + /// + /// ```text + /// ncbi_gene_id gene_symbol hpo_id hpo_name frequency disease_id + /// 10 NAT2 HP:0000007 Autosomal recessive inheritance - OMIM:243400 + /// 10 NAT2 HP:0001939 Abnormality of metabolism/homeostasis - OMIM:243400 + /// 16 AARS1 HP:0002460 Distal muscle weakness 15/15 OMIM:613287 + /// ``` + pub fn parse_genes_to_phenotype>( + file: P, + ontology: &mut Ontology, + ) -> HpoResult<()> { + parse(file, ontology, genes_to_phenotype_line) + } + + /// Parse `phenotype_to_genes.txt` file + /// + /// ```text + /// #Format: HPO-idHPO labelentrez-gene-identrez-gene-symbolAdditional Info from G-D sourceG-D sourcedisease-ID for link + /// HP:0000002 Abnormality of body height 81848 SPRY4 orphadata ORPHA:432 + /// HP:0000002 Abnormality of body height 204219 CERS3 orphadata ORPHA:79394 + /// HP:0000002 Abnormality of body height 51360 MBTPS2 - mim2gene OMIM:308205 + /// ``` + pub fn parse_phenotype_to_genes>( + file: P, + ontology: &mut Ontology, + ) -> HpoResult<()> { + parse(file, ontology, phenotype_to_gene_line) + } + + /// Parses a file to connect genes to HPO terms + fn parse, F: Fn(&str) -> HpoResult>>( + file: P, + ontology: &mut Ontology, + parse_line: F, + ) -> HpoResult<()> { + let filename = file.as_ref().display().to_string(); + let file = File::open(file).map_err(|_| HpoError::CannotOpenFile(filename))?; + let mut reader = BufReader::new(file); + + remove_header(&mut reader)?; - /// Quick and dirty parser for development and debugging - pub fn parse>(file: P, ontology: &mut Ontology) -> HpoResult<()> { - let file = File::open(file).unwrap(); - let reader = BufReader::new(file); for line in reader.lines() { - let line = line.unwrap(); - // TODO: Check for the header outside of the `lines` iterator - if line.starts_with('#') || line.starts_with("hpo_id") { - continue; - } - let cols: Vec<&str> = line.trim().split('\t').collect(); - let gene_id = ontology.add_gene(cols[3], cols[2])?; - let term_id = HpoTermId::try_from(cols[0])?; - ontology.link_gene_term(term_id, gene_id)?; + let line = line.map_err(|_| { + HpoError::InvalidInput("Invalid data in genes_to_phenotype.txt".to_string()) + })?; + let gene = parse_line(&line)?; + + ontology.add_gene(gene.symbol, gene.ncbi_id); + ontology.link_gene_term(gene.hpo, gene.ncbi_id)?; ontology - .gene_mut(&gene_id) - .expect("Cannot find gene {gene_id}") - .add_term(term_id); + .gene_mut(&gene.ncbi_id) + .expect("Gene is present because it was just add_omim_disease") + .add_term(gene.hpo); } Ok(()) } + + #[cfg(test)] + mod test { + use super::*; + #[test] + fn test_remove_header_ncbi_gene() { + let x = "ncbi_gene_id\txyz\n10\tNAT2\n".as_bytes(); + let mut reader = BufReader::new(x); + assert!(remove_header(&mut reader).is_ok()); + + let mut lines = reader.lines(); + assert_eq!(lines.next().unwrap().unwrap(), "10\tNAT2"); + assert!(lines.next().is_none()); + } + + #[test] + fn test_remove_header_hpo_id() { + let x = "hpo_id\txyz\n10\tNAT2\n".as_bytes(); + let mut reader = BufReader::new(x); + assert!(remove_header(&mut reader).is_ok()); + + let mut lines = reader.lines(); + assert_eq!(lines.next().unwrap().unwrap(), "10\tNAT2"); + assert!(lines.next().is_none()); + } + + #[test] + fn test_remove_header_hashtag() { + let x = "#foobar\txyz\n10\tNAT2\n".as_bytes(); + let mut reader = BufReader::new(x); + assert!(remove_header(&mut reader).is_ok()); + + let mut lines = reader.lines(); + assert_eq!(lines.next().unwrap().unwrap(), "10\tNAT2"); + assert!(lines.next().is_none()); + } + + #[test] + fn test_remove_header_fails() { + let x = "foobar\txyz\n10\tNAT2\n".as_bytes(); + let mut reader = BufReader::new(x); + assert!(remove_header(&mut reader).is_err()); + } + } + + #[cfg(test)] + mod test_genes_to_phenotype { + use crate::annotations::AnnotationId; + + use super::*; + + #[test] + fn test_parse_correct_line_with_newline() { + let line = "10\tNAT2\tHP:0000007\tfoobar\n"; + let res = genes_to_phenotype_line(line).expect("This line should parse correctly"); + assert_eq!(res.ncbi_id.as_u32(), 10); + assert_eq!(res.symbol, "NAT2"); + assert_eq!(res.hpo.as_u32(), 7u32); + } + + #[test] + fn test_parse_correct_line_without_newline() { + let line = "10\tNAT2\tHP:0000007\tfoobar"; + let res = genes_to_phenotype_line(line).expect("This line should parse correctly"); + assert_eq!(res.ncbi_id.as_u32(), 10); + assert_eq!(res.symbol, "NAT2"); + assert_eq!(res.hpo.as_u32(), 7u32); + } + + #[test] + fn test_parse_missing_id() { + let line = "NAT2\tHP:0000007\tfoobar\n"; + let res = genes_to_phenotype_line(line); + assert!(res.is_err()); + } + + #[test] + fn test_parse_missing_symbol() { + let line = "10\tHP:0000007\tfoobar\n"; + let res = genes_to_phenotype_line(line); + assert!(res.is_err()); + } + + #[test] + fn test_parse_missing_hpo() { + let line = "10\tNAT2\tfoobar\n"; + let res = genes_to_phenotype_line(line); + assert!(res.is_err()); + } + + #[test] + fn test_parse_invalid_hpo() { + let line = "10\tNAT2\tHP:000000A\tfoobar\n"; + let res = genes_to_phenotype_line(line); + assert!(res.is_err()); + } + } + + #[cfg(test)] + mod test_phenotpye_to_genes { + use super::*; + use crate::annotations::AnnotationId; + + #[test] + fn test_parse_correct_line_with_newline() { + let line = "HP:0000007\tAbnormality of body height\t10\tNAT2\tfoobar\n"; + let res = phenotype_to_gene_line(line).expect("This line should parse correctly"); + assert_eq!(res.ncbi_id.as_u32(), 10); + assert_eq!(res.symbol, "NAT2"); + assert_eq!(res.hpo.as_u32(), 7u32); + } + + #[test] + fn test_parse_correct_line_without_newline() { + let line = "HP:0000007\tAbnormality of body height\t10\tNAT2\tfoobar"; + let res = phenotype_to_gene_line(line).expect("This line should parse correctly"); + assert_eq!(res.ncbi_id.as_u32(), 10); + assert_eq!(res.symbol, "NAT2"); + assert_eq!(res.hpo.as_u32(), 7u32); + } + + #[test] + fn test_parse_missing_id() { + let line = "HP:0000007\tAbnormality of body height\tNAT2\tfoobar\n"; + let res = phenotype_to_gene_line(line); + assert!(res.is_err()); + } + + #[test] + fn test_parse_empty_symbol() { + let line = "HP:0000007\tAbnormality of body height\t10\t\tfoobar\n"; + let res = phenotype_to_gene_line(line).expect("This line should parse correctly"); + assert_eq!(res.ncbi_id.as_u32(), 10); + assert_eq!(res.symbol, ""); + assert_eq!(res.hpo.as_u32(), 7u32); + } + + #[test] + fn test_parse_missing_hpo() { + let line = "Abnormality of body height\t10\tNAT2\tfoobar\n"; + let res = phenotype_to_gene_line(line); + assert!(res.is_err()); + } + + #[test] + fn test_parse_invalid_hpo() { + let line = "HP:0000007A\tAbnormality of body height\t10\tNAT2\tfoobar\n"; + let res = genes_to_phenotype_line(line); + assert!(res.is_err()); + } + } } /// Module to parse HPO - `OmimDisease` associations from `phenotype.hpoa` file -pub(crate) mod phenotype_hpoa { +/// +/// # Example line +/// +/// ```text +/// OMIM:619340 Developmental and epileptic encephalopathy 96 HP:0011097 PMID:31675180 PCS 1/2 P HPO:probinson[2021-06-21] +/// OMIM:609153 Pseudohyperkalemia NOT HP:0001878 PMID:2766660 PCS P HPO:lccarmody[2018-10-03] +/// ``` +/// +pub(crate) mod disease_to_hpo { use crate::HpoError; use crate::HpoResult; use crate::HpoTermId; @@ -88,8 +350,6 @@ pub(crate) mod phenotype_hpoa { use std::io::BufReader; use std::path::Path; - use tracing::error; - use crate::Ontology; struct Omim<'a> { @@ -98,51 +358,66 @@ pub(crate) mod phenotype_hpoa { hpo_id: HpoTermId, } - fn parse_line(line: &str) -> Option> { + fn parse_line(line: &str) -> HpoResult>> { // TODO (nice to have): Add check to skip `database_id` header row // It is not strictly needed, because we're discarding non-OMIM rows if line.starts_with('#') { - return None; + return Ok(None); } if !line.starts_with("OMIM") { - return None; + return Ok(None); } - let cols: Vec<&str> = line.trim().split('\t').collect(); - if cols[2] == "NOT" { - return None; - } + let mut cols = line.trim().splitn(5, '\t'); - let Some((_, omim_id)) = cols[0].split_once(':') else { - error!("cannot parse OMIM ID from {}", cols[0]); - return None; + let Some(id_col) = cols.next() else { + return Err(HpoError::InvalidInput(line.to_string())); + }; + let Some((_, omim_id)) = id_col.split_once(':') else { + return Err(HpoError::InvalidInput(line.to_string())); + }; + + let Some(omim_name) = cols.next() else { + return Err(HpoError::InvalidInput(line.to_string())); + }; + + if let Some("NOT") = cols.next() { + return Ok(None); }; - let Ok(hpo_id) = HpoTermId::try_from(cols[3]) else { - error!("invalid HPO ID: {}", cols[3]); - return None; + let hpo_id = if let Some(id) = cols.next() { + HpoTermId::try_from(id)? + } else { + return Err(HpoError::InvalidInput(line.to_string())); }; - Some(Omim { + Ok(Some(Omim { id: omim_id, - name: cols[1], + name: omim_name, hpo_id, - }) + })) } /// Quick and dirty parser for development and debugging + /// + /// # Errors + /// + /// - [`HpoError::CannotOpenFile`]: Source file not present or can't be opened + /// - [`HpoError::ParseIntError`]: A line contains an invalid `omim_disease_id` + /// - [`HpoError::DoesNotExist`]: A line contains a non-existing [`HpoTermId`] pub fn parse>(file: P, ontology: &mut Ontology) -> HpoResult<()> { - let file = File::open(file).unwrap(); + let filename = file.as_ref().display().to_string(); + let file = File::open(file).map_err(|_| HpoError::CannotOpenFile(filename))?; let reader = BufReader::new(file); for line in reader.lines() { let line = line.unwrap(); - if let Some(omim) = parse_line(&line) { + if let Some(omim) = parse_line(&line)? { let omim_disease_id = ontology.add_omim_disease(omim.name, omim.id)?; ontology.link_omim_disease_term(omim.hpo_id, omim_disease_id)?; ontology .omim_disease_mut(&omim_disease_id) - .ok_or(HpoError::DoesNotExist)? + .expect("Omim disease was just added and cannot be missing") .add_term(omim.hpo_id); } } @@ -153,10 +428,65 @@ pub(crate) mod phenotype_hpoa { mod test_omim_parsing { use super::*; + #[test] + fn test_skip_comment() { + let s = "#OMIM:600171\tGonadal agenesis\t\tHP:0000055\tOMIM:600171\tTAS\tP\tHPO:skoehler[2014-11-27]"; + assert!(parse_line(s) + .expect("This line has the correct format") + .is_none()); + } + #[test] fn test_skip_not() { let s = "OMIM:600171\tGonadal agenesis\tNOT\tHP:0000055\tOMIM:600171\tTAS\tP\tHPO:skoehler[2014-11-27]"; - assert!(parse_line(s).is_none()); + assert!(parse_line(s) + .expect("This line has the correct format") + .is_none()); + } + + #[test] + fn test_skip_orpha() { + let s = "ORPHA:600171\tGonadal agenesis\t\tHP:0000055\tOMIM:600171\tTAS\tP\tHPO:skoehler[2014-11-27]"; + assert!(parse_line(s) + .expect("This line has the correct format") + .is_none()); + } + + #[test] + fn test_skip_orpha_not() { + let s = "ORPHA:600171\tGonadal agenesis\tNOT\tHP:0000055\tOMIM:600171\tTAS\tP\tHPO:skoehler[2014-11-27]"; + assert!(parse_line(s) + .expect("This line has the correct format") + .is_none()); + } + + #[test] + fn test_correct_omim() { + let s = "OMIM:600171\tGonadal agenesis\t\tHP:0000055\tOMIM:600171\tTAS\tP\tHPO:skoehler[2014-11-27]"; + let omim = parse_line(s) + .expect("This line has the correct format") + .expect("Line describes an Omim disease"); + assert_eq!(omim.name, "Gonadal agenesis"); + assert_eq!(omim.id, "600171"); + assert_eq!(omim.hpo_id, "HP:0000055"); + } + + #[test] + fn test_invalid_omim_id() { + let s = "OMIM_600171\tGonadal agenesis\t\tHP:0000055\tOMIM:600171\tTAS\tP\tHPO:skoehler[2014-11-27]"; + assert!(parse_line(s).is_err()); + } + + #[test] + fn test_invalid_hpo_id() { + let s = "OMIM:600171\tGonadal agenesis\t\tH55\tOMIM:600171\tTAS\tP\tHPO:skoehler[2014-11-27]"; + assert!(parse_line(s).is_err()); + } + + #[test] + fn test_invalid_input() { + let s = "OMIM:600171 Gonadal agenesis HP:0000055 OMIM:600171 TAS P HPO:skoehler[2014-11-27]"; + assert!(parse_line(s).is_err()); } } } @@ -168,8 +498,8 @@ pub(crate) fn load_from_jax_files_with_transivitve_genes>( ontology: &mut Ontology, ) -> HpoResult<()> { hp_obo::read_obo_file(obo_file, ontology)?; - phenotype_to_genes::parse(gene_file, ontology)?; - phenotype_hpoa::parse(disease_file, ontology)?; + gene_to_hpo::parse_phenotype_to_genes(gene_file, ontology)?; + disease_to_hpo::parse(disease_file, ontology)?; Ok(()) } @@ -180,7 +510,7 @@ pub(crate) fn load_from_jax_files>( ontology: &mut Ontology, ) -> HpoResult<()> { hp_obo::read_obo_file(obo_file, ontology)?; - genes_to_phenotype::parse(gene_file, ontology)?; - phenotype_hpoa::parse(disease_file, ontology)?; + gene_to_hpo::parse_genes_to_phenotype(gene_file, ontology)?; + disease_to_hpo::parse(disease_file, ontology)?; Ok(()) } diff --git a/src/similarity.rs b/src/similarity.rs index a85c387..6910b03 100644 --- a/src/similarity.rs +++ b/src/similarity.rs @@ -122,24 +122,34 @@ pub trait SimilarityCombiner { } /// Returns the maximum values of each row + /// + /// # Panics + /// + /// Panics when rows don't contain any values, i.e. when the matrix is empty fn row_maxes(&self, m: &Matrix) -> Vec { m.rows() .map(|row| { // I have no idea why, but I could not get a.max(b) to work // with the borrow checker - row.reduce(|a, b| if a > b { a } else { b }).unwrap() + row.reduce(|a, b| if a > b { a } else { b }) + .expect("A matrix must contain values") }) .copied() .collect() } /// Returns the maximum values of each column + /// + /// # Panics + /// + /// Panics when rows don't contain any values, i.e. when the matrix is empty fn col_maxes(&self, m: &Matrix) -> Vec { m.cols() .map(|col| { // I have no idea why, but I could not get a.max(b) to work // with the borrow checker - col.reduce(|a, b| if a > b { a } else { b }).unwrap() + col.reduce(|a, b| if a > b { a } else { b }) + .expect("A matrix must contain values") }) .copied() .collect() diff --git a/src/term/hpotermid.rs b/src/term/hpotermid.rs index 3ad5f10..8f6f9d9 100644 --- a/src/term/hpotermid.rs +++ b/src/term/hpotermid.rs @@ -54,6 +54,17 @@ impl AnnotationId for HpoTermId { impl TryFrom<&str> for HpoTermId { type Error = HpoError; + + /// Parse a str into an `HpoTermId` + /// + /// This method assumes (but does not check!) + /// that the first 3 characters are `HP:`. + /// + /// # Error + /// + /// An empty string or string with less than 4 characters + /// will return an error. + /// The characters after the 4th position must be parsable to `u32`. fn try_from(s: &str) -> HpoResult { if s.len() < 4 { return Err(HpoError::ParseIntError);