From 9573b48253e721ff392a0bad67a67492d8a14bfe Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 4 Nov 2024 10:59:42 +0100 Subject: [PATCH] Data updates for obo-db-ingest (#1252) This PR makes several minor updates to support making a new output of https://github.com/biopragmatics/obo-db-ingest 1. Extend CDDS pattern to allow for unversioned records (i.e., not ending with `\.\d+` 2. Extend COSMIC pattern to allow for dashes 3. Add ability to encode 3.4.24.B15 in EC (see https://www.brenda-enzymes.org/enzyme.php?ecno=3.4.24.B15) 4. Add second letter in MEROPS entry so XM02.001 can be encoded (the M was the issue). This appears in HGNC gene cross-references. --- src/bioregistry/constants.py | 2 ++ src/bioregistry/data/bioregistry.json | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/bioregistry/constants.py b/src/bioregistry/constants.py index 6041ab787..a4a10da73 100644 --- a/src/bioregistry/constants.py +++ b/src/bioregistry/constants.py @@ -130,6 +130,8 @@ "pid.pathway", # this uses namespace-in-namespace "neurolex", + # Miriam needs to be extended + "ccds", } IDENTIFIERS_ORG_URL_PREFIX = "https://identifiers.org/" diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json index b38d2b15c..c7aface95 100644 --- a/src/bioregistry/data/bioregistry.json +++ b/src/bioregistry/data/bioregistry.json @@ -13045,6 +13045,10 @@ "name": "Terence D. Murphy", "orcid": "0000-0001-9311-9745" }, + "example": "CCDS12976", + "example_extras": [ + "CCDS12976.1" + ], "fairsharing": { "abbreviation": "CCDS", "description": "The Consensus CDS (CCDS) project is a collaborative effort to identify a core set of human and mouse protein coding regions that are consistently annotated and of high quality. The long term goal is to support convergence towards a standard set of gene annotations.", @@ -13115,6 +13119,7 @@ "prefix": "ccds", "uri_format": "http://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&DATA=$1" }, + "pattern": "^CCDS\\d+(\\.\\d+)?$", "prefixcommons": { "description": "The Consensus CDS (CCDS) project is a collaborative effort to identify a core set of human and mouse protein coding regions that are consistently annotated and of high quality. The long term goal is to support convergence towards a standard set of gene annotations.", "example": "4824", @@ -21794,6 +21799,7 @@ "prefix": "Cosmic", "uri_format": "https://cancer.sanger.ac.uk/cosmic/sample/overview?id=$1" }, + "comment": "Is this just HGNC Gene symbols?", "contact": { "email": "jt6@sanger.ac.uk", "github": "jgtate", @@ -21869,6 +21875,7 @@ "prefix": "cosmic", "uri_format": "http://cancer.sanger.ac.uk/cosmic/gene/overview?ln=$1" }, + "pattern": "^[A-Z0-9][A-Z0-9-]*$", "publications": [ { "doi": "10.1093/nar/gky1015", @@ -30210,7 +30217,8 @@ "2.3", "2.3.1", "2.3.1.n12", - "3.1.26.n2" + "3.1.26.n2", + "3.4.24.B15" ], "fairsharing": { "abbreviation": "EC Number", @@ -30322,7 +30330,7 @@ "name": "Integrated relational Enzyme database", "prefix": "106" }, - "pattern": "^\\d{1,2}(((\\.\\d{1,3}){1,3})|(\\.\\d+){2}\\.n\\d{1,3})?$", + "pattern": "^\\d{1,2}(((\\.\\d{1,3}){1,3})|(\\.\\d+){2}\\.[nB]\\d{1,3})?$", "prefixcommons": { "description": "IntEnz is a freely available resource focused on enzyme nomenclature. IntEnz contains the recommendations of the Nomenclature Committee of the International Union of Biochemistry and Molecular Biology (NC-IUBMB) on the nomenclature and classification of enzyme-catalysed reactions.", "example": "17854", @@ -65185,7 +65193,8 @@ }, "example": "I31.952", "example_extras": [ - "S01.001" + "S01.001", + "XM02.001" ], "fairsharing": { "abbreviation": "MEROPS", @@ -65273,7 +65282,7 @@ "uri_format": "http://merops.sanger.ac.uk/cgi-bin/pepsum?id=$1" }, "name": "MEROPS Entry", - "pattern": "^[SCTAGMNUI]\\d{2}\\.([AB]\\d{2}|\\d{3})$", + "pattern": "^[SCTAGMNUIX]{1,2}\\d{2}\\.([AB]\\d{2}|\\d{3})$", "prefixcommons": { "description": "The MEROPS database is an information resource for peptidases (also termed proteases, proteinases and proteolytic enzymes) and the proteins that inhibit them.", "example": "S01.001",