Skip to content

Commit

Permalink
Data updates for obo-db-ingest (#1252)
Browse files Browse the repository at this point in the history
This PR makes several minor updates to support making a new output of
https://github.com/biopragmatics/obo-db-ingest

1. Extend CDDS pattern to allow for unversioned records (i.e., not
ending with `\.\d+`
2. Extend COSMIC pattern to allow for dashes
3. Add ability to encode 3.4.24.B15 in EC (see
https://www.brenda-enzymes.org/enzyme.php?ecno=3.4.24.B15)
4. Add second letter in MEROPS entry so XM02.001 can be encoded (the M
was the issue). This appears in HGNC gene cross-references.
  • Loading branch information
cthoyt authored Nov 4, 2024
1 parent 75be565 commit 9573b48
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 4 deletions.
2 changes: 2 additions & 0 deletions src/bioregistry/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@
"pid.pathway",
# this uses namespace-in-namespace
"neurolex",
# Miriam needs to be extended
"ccds",
}
IDENTIFIERS_ORG_URL_PREFIX = "https://identifiers.org/"

Expand Down
17 changes: 13 additions & 4 deletions src/bioregistry/data/bioregistry.json
Original file line number Diff line number Diff line change
Expand Up @@ -13045,6 +13045,10 @@
"name": "Terence D. Murphy",
"orcid": "0000-0001-9311-9745"
},
"example": "CCDS12976",
"example_extras": [
"CCDS12976.1"
],
"fairsharing": {
"abbreviation": "CCDS",
"description": "The Consensus CDS (CCDS) project is a collaborative effort to identify a core set of human and mouse protein coding regions that are consistently annotated and of high quality. The long term goal is to support convergence towards a standard set of gene annotations.",
Expand Down Expand Up @@ -13115,6 +13119,7 @@
"prefix": "ccds",
"uri_format": "http://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&DATA=$1"
},
"pattern": "^CCDS\\d+(\\.\\d+)?$",
"prefixcommons": {
"description": "The Consensus CDS (CCDS) project is a collaborative effort to identify a core set of human and mouse protein coding regions that are consistently annotated and of high quality. The long term goal is to support convergence towards a standard set of gene annotations.",
"example": "4824",
Expand Down Expand Up @@ -21794,6 +21799,7 @@
"prefix": "Cosmic",
"uri_format": "https://cancer.sanger.ac.uk/cosmic/sample/overview?id=$1"
},
"comment": "Is this just HGNC Gene symbols?",
"contact": {
"email": "[email protected]",
"github": "jgtate",
Expand Down Expand Up @@ -21869,6 +21875,7 @@
"prefix": "cosmic",
"uri_format": "http://cancer.sanger.ac.uk/cosmic/gene/overview?ln=$1"
},
"pattern": "^[A-Z0-9][A-Z0-9-]*$",
"publications": [
{
"doi": "10.1093/nar/gky1015",
Expand Down Expand Up @@ -30210,7 +30217,8 @@
"2.3",
"2.3.1",
"2.3.1.n12",
"3.1.26.n2"
"3.1.26.n2",
"3.4.24.B15"
],
"fairsharing": {
"abbreviation": "EC Number",
Expand Down Expand Up @@ -30322,7 +30330,7 @@
"name": "Integrated relational Enzyme database",
"prefix": "106"
},
"pattern": "^\\d{1,2}(((\\.\\d{1,3}){1,3})|(\\.\\d+){2}\\.n\\d{1,3})?$",
"pattern": "^\\d{1,2}(((\\.\\d{1,3}){1,3})|(\\.\\d+){2}\\.[nB]\\d{1,3})?$",
"prefixcommons": {
"description": "IntEnz is a freely available resource focused on enzyme nomenclature. IntEnz contains the recommendations of the Nomenclature Committee of the International Union of Biochemistry and Molecular Biology (NC-IUBMB) on the nomenclature and classification of enzyme-catalysed reactions.",
"example": "17854",
Expand Down Expand Up @@ -65185,7 +65193,8 @@
},
"example": "I31.952",
"example_extras": [
"S01.001"
"S01.001",
"XM02.001"
],
"fairsharing": {
"abbreviation": "MEROPS",
Expand Down Expand Up @@ -65273,7 +65282,7 @@
"uri_format": "http://merops.sanger.ac.uk/cgi-bin/pepsum?id=$1"
},
"name": "MEROPS Entry",
"pattern": "^[SCTAGMNUI]\\d{2}\\.([AB]\\d{2}|\\d{3})$",
"pattern": "^[SCTAGMNUIX]{1,2}\\d{2}\\.([AB]\\d{2}|\\d{3})$",
"prefixcommons": {
"description": "The MEROPS database is an information resource for peptidases (also termed proteases, proteinases and proteolytic enzymes) and the proteins that inhibit them.",
"example": "S01.001",
Expand Down

0 comments on commit 9573b48

Please sign in to comment.