From b5ac2701f75268f09f8fe13477ab04a18c757bbd Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 15 Oct 2020 20:13:25 -0700 Subject: [PATCH 1/3] bump version --- scispacy/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scispacy/version.py b/scispacy/version.py index 07e77d87..d1821d6a 100644 --- a/scispacy/version.py +++ b/scispacy/version.py @@ -1,6 +1,6 @@ _MAJOR = "0" -_MINOR = "2" -_REVISION = "5-unreleased" +_MINOR = "3" +_REVISION = "0" VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR) VERSION = "{0}.{1}.{2}".format(_MAJOR, _MINOR, _REVISION) From 78fc5b890aee48e6d49417d13fe56acb18f0c8f9 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Thu, 15 Oct 2020 20:14:26 -0700 Subject: [PATCH 2/3] update readme --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 2e005991..b394ed16 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ pip install scispacy to install a model (see our full selection of available models below), run a command like the following: ```bash -pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_sm-0.2.5.tar.gz +pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_core_sci_sm-0.3.0.tar.gz ``` Note: We strongly recommend that you use an isolated Python environment (such as virtualenv or conda) to install scispacy. @@ -76,13 +76,13 @@ pip install CMD-V(to paste the copied URL) | Model | Description | Install URL |:---------------|:------------------|:----------| -| en_core_sci_sm | A full spaCy pipeline for biomedical data with a ~100k vocabulary. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_sm-0.2.5.tar.gz)| -| en_core_sci_md | A full spaCy pipeline for biomedical data with a ~360k vocabulary and 50k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_md-0.2.5.tar.gz)| -| en_core_sci_lg | A full spaCy pipeline for biomedical data with a ~785k vocabulary and 600k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_lg-0.2.5.tar.gz)| -| en_ner_craft_md| A spaCy NER model trained on the CRAFT corpus.|[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_craft_md-0.2.5.tar.gz)| -| en_ner_jnlpba_md | A spaCy NER model trained on the JNLPBA corpus.| [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_jnlpba_md-0.2.5.tar.gz)| -| en_ner_bc5cdr_md | A spaCy NER model trained on the BC5CDR corpus. | [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_bc5cdr_md-0.2.5.tar.gz)| -| en_ner_bionlp13cg_md | A spaCy NER model trained on the BIONLP13CG corpus. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_bionlp13cg_md-0.2.5.tar.gz)| +| en_core_sci_sm | A full spaCy pipeline for biomedical data with a ~100k vocabulary. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_core_sci_sm-0.3.0.tar.gz)| +| en_core_sci_md | A full spaCy pipeline for biomedical data with a ~360k vocabulary and 50k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_core_sci_md-0.3.0.tar.gz)| +| en_core_sci_lg | A full spaCy pipeline for biomedical data with a ~785k vocabulary and 600k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_core_sci_lg-0.3.0.tar.gz)| +| en_ner_craft_md| A spaCy NER model trained on the CRAFT corpus.|[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_ner_craft_md-0.3.0.tar.gz)| +| en_ner_jnlpba_md | A spaCy NER model trained on the JNLPBA corpus.| [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_ner_jnlpba_md-0.3.0.tar.gz)| +| en_ner_bc5cdr_md | A spaCy NER model trained on the BC5CDR corpus. | [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_ner_bc5cdr_md-0.3.0.tar.gz)| +| en_ner_bionlp13cg_md | A spaCy NER model trained on the BIONLP13CG corpus. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_ner_bionlp13cg_md-0.3.0.tar.gz)| ## Additional Pipeline Components From ec262047f6592e757d84050e139712890f1c0291 Mon Sep 17 00:00:00 2001 From: Mark Neumann Date: Fri, 16 Oct 2020 08:40:54 -0700 Subject: [PATCH 3/3] update docs page with results --- docs/index.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/index.md b/docs/index.md index 094d3393..a6419adb 100644 --- a/docs/index.md +++ b/docs/index.md @@ -17,13 +17,13 @@ pip install | Model | Description | Install URL |:---------------|:------------------|:----------| -| en_core_sci_sm | A full spaCy pipeline for biomedical data. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_sm-0.2.5.tar.gz)| -| en_core_sci_md | A full spaCy pipeline for biomedical data with a larger vocabulary and 50k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_md-0.2.5.tar.gz)| -| en_core_sci_lg | A full spaCy pipeline for biomedical data with a larger vocabulary and 600k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_lg-0.2.5.tar.gz)| -| en_ner_craft_md| A spaCy NER model trained on the CRAFT corpus.|[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_craft_md-0.2.5.tar.gz)| -| en_ner_jnlpba_md | A spaCy NER model trained on the JNLPBA corpus.| [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_jnlpba_md-0.2.5.tar.gz)| -| en_ner_bc5cdr_md | A spaCy NER model trained on the BC5CDR corpus. | [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_bc5cdr_md-0.2.5.tar.gz)| -| en_ner_bionlp13cg_md | A spaCy NER model trained on the BIONLP13CG corpus. | [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_bionlp13cg_md-0.2.5.tar.gz)| +| en_core_sci_sm | A full spaCy pipeline for biomedical data. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_core_sci_sm-0.3.0.tar.gz)| +| en_core_sci_md | A full spaCy pipeline for biomedical data with a larger vocabulary and 50k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_core_sci_md-0.3.0.tar.gz)| +| en_core_sci_lg | A full spaCy pipeline for biomedical data with a larger vocabulary and 600k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_core_sci_lg-0.3.0.tar.gz)| +| en_ner_craft_md| A spaCy NER model trained on the CRAFT corpus.|[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_ner_craft_md-0.3.0.tar.gz)| +| en_ner_jnlpba_md | A spaCy NER model trained on the JNLPBA corpus.| [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_ner_jnlpba_md-0.3.0.tar.gz)| +| en_ner_bc5cdr_md | A spaCy NER model trained on the BC5CDR corpus. | [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_ner_bc5cdr_md-0.3.0.tar.gz)| +| en_ner_bionlp13cg_md | A spaCy NER model trained on the BIONLP13CG corpus. | [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_ner_bionlp13cg_md-0.3.0.tar.gz)| @@ -34,17 +34,17 @@ Our models achieve performance within 3% of published state of the art dependenc | model | UAS | LAS | POS | Mentions (F1) | Web UAS | |:---------------|:----|:------|:------|:---|:---| -| en_core_sci_sm | 89.26| 87.38 | 98.38 | 67.14 | 87.18 | -| en_core_sci_md | 89.92| 88.01 | 98.54 | 69.46 | 88.20 | -| en_core_sci_lg | 89.81| 88.02 | 98.57 | 69.29 | 88.11 | +| en_core_sci_sm | 89.36| 87.43 | 98.35 | 67.25 | 88.16 | +| en_core_sci_md | 89.82| 87.93 | 98.59 | 69.12 | 88.58 | +| en_core_sci_lg | 89.83| 87.85 | 98.55 | 69.07 | 88.59 | | model | F1 | Entity Types| |:---------------|:-----|:--------| -| en_ner_craft_md | 75.02|GGP, SO, TAXON, CHEBI, GO, CL| -| en_ner_jnlpba_md | 73.56| DNA, CELL_TYPE, CELL_LINE, RNA, PROTEIN | -| en_ner_bc5cdr_md | 84.94| DISEASE, CHEMICAL| -| en_ner_bionlp13cg_md | 78.09| AMINO_ACID, ANATOMICAL_SYSTEM, CANCER, CELL, CELLULAR_COMPONENT, DEVELOPING_ANATOMICAL_STRUCTURE, GENE_OR_GENE_PRODUCT, IMMATERIAL_ANATOMICAL_ENTITY, MULTI-TISSUE_STRUCTURE, ORGAN, ORGANISM, ORGANISM_SUBDIVISION, ORGANISM_SUBSTANCE, PATHOLOGICAL_FORMATION, SIMPLE_CHEMICAL, TISSUE | +| en_ner_craft_md | 77.03|GGP, SO, TAXON, CHEBI, GO, CL| +| en_ner_jnlpba_md | 73.45| DNA, CELL_TYPE, CELL_LINE, RNA, PROTEIN | +| en_ner_bc5cdr_md | 84.12| DISEASE, CHEMICAL| +| en_ner_bionlp13cg_md | 79.33| AMINO_ACID, ANATOMICAL_SYSTEM, CANCER, CELL, CELLULAR_COMPONENT, DEVELOPING_ANATOMICAL_STRUCTURE, GENE_OR_GENE_PRODUCT, IMMATERIAL_ANATOMICAL_ENTITY, MULTI-TISSUE_STRUCTURE, ORGAN, ORGANISM, ORGANISM_SUBDIVISION, ORGANISM_SUBSTANCE, PATHOLOGICAL_FORMATION, SIMPLE_CHEMICAL, TISSUE | ### Example Usage