Skip to content

Commit

Permalink
Merge pull request #55 from steppi/minor_revisions
Browse files Browse the repository at this point in the history
Minor revisions
  • Loading branch information
Albert Steppi authored Dec 3, 2020
2 parents ff3e716 + 536fba4 commit ae9bca9
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 41 deletions.
2 changes: 1 addition & 1 deletion adeft/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.9.0'
__version__ = '0.10.0'

from adeft.download import get_available_models

Expand Down
13 changes: 10 additions & 3 deletions adeft/disambiguate.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ def __init__(self, classifier, grounding_dict, names):
grounding_map in grounding_dict.items()]
self.grounding_dict = grounding_dict
self.names = names
self.labels = set(value for grounding_map in grounding_dict.values()
for value in grounding_map.values())
self.labels = (set(value for grounding_map in grounding_dict.values()
for value in grounding_map.values()) |
set(classifier.estimator.classes_))
self.pos_labels = classifier.pos_labels

def disambiguate(self, texts):
Expand Down Expand Up @@ -141,6 +142,11 @@ def update_pos_labels(self, pos_labels):
Micro-averaged precision, recall, and f1 scores are also updated.
Warning: If this method is called on a disambiguator trained with a
a version prior to 0.10.0, global precision, recall, and f1 will be set
to NaN. Older disambiguators must be retrained to update positive
labels and recompute model statistics.
Parameters
----------
pos_labels : list
Expand Down Expand Up @@ -185,7 +191,8 @@ def update_pos_labels(self, pos_labels):
stats['recall']['mean'] = float('nan')
stats['recall']['std'] = float('nan')
self.classifier.stats = stats
self.pos_labels = pos_labels
self.classifier.pos_labels = list(pos_labels)
self.pos_labels = list(pos_labels)

def modify_groundings(self, new_groundings=None, new_names=None):
"""Update groundings and standardized names
Expand Down
127 changes: 90 additions & 37 deletions notebooks/introduction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"$ python -m adeft.download\n",
"```\n",
"\n",
"Models will be stored in the users home directory in a hidden folder named ``.adeft_<version>``, where `<version>` is the version number for the installed adeft."
"By default, models will be stored in the users home directory in a hidden folder named ``.adeft``. Users may set the environment variable `ADEFT_HOME` in their shell profile to choose an alternative location."
]
},
{
Expand All @@ -46,7 +46,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"{'PC': 'PC', 'EMT': 'EMT', 'SP': 'SP', 'PE': 'PE', 'ROS': 'ROS', 'NP': 'NP:NP_S', 'NPs': 'NP:NP_S', 'MS': 'MS', 'MT': 'MT', 'BP': 'BP', 'GH': 'GH', 'AD': 'AD', 'GT': 'GT', 'DA': 'DA', 'GR': 'GR', 'IR': 'IR', 'HK2': 'HK2', 'ARF': 'ARF', 'CS': 'CS', 'EC': 'EC', 'STD': 'STD', 'PD1': 'PD1', 'TGH': 'TGH', 'PKD': 'PKD', 'RA': 'RA', 'PCP': 'PCP', 'PI': 'PI', 'PS': 'PS', 'PA': 'PA', 'MB': 'MB', 'HA': 'HA', 'AR': 'AR', 'HR': 'HR', 'NE': 'NE', 'UBC': 'UBC', 'GSC': 'GSC', 'AA': 'AA', 'NIS': 'NIS', 'GC': 'GC', 'CM': 'CM', 'RB': 'RB:R_B', 'Rb': 'RB:R_B', 'LH': 'LH', 'ER': 'ER', 'TF': 'TF', 'PGP': 'PGP', 'MCT': 'MCT', 'TG': 'TG'}\n"
"{'HA': 'HA', 'RT': 'RT:RT_S', 'RTs': 'RT:RT_S', 'APC': 'APC:APC_S', 'APCs': 'APC:APC_S', 'GH': 'GH', 'BRK': 'BRK', 'SG': 'SG:SG_S', 'SGs': 'SG:SG_S', 'CHK': 'CHK', 'SPD': 'SPD', 'PC': 'PC', 'AST': 'AST', 'EAG': 'EAG', 'SNS': 'SNS', 'HR': 'HR', 'NOP': 'NOP', 'GT': 'GT', 'TLR': 'TLR', 'CF': 'CF', 'PET': 'PET', 'HEP': 'HEP', 'ERM': 'ERM', 'FES': 'FES', 'PI': 'PI', 'TG': 'TG', 'ER': 'ER', 'HK1': 'HK1', 'MB': 'MB', 'NE': 'NE', 'SLK': 'SLK', 'GARP': 'GARP', 'EMT': 'EMT', 'AHR': 'AHR', 'TIF': 'TIF', 'HC': 'HC:HC_S', 'HCs': 'HC:HC_S', 'SD': 'SD:SD_S', 'SDs': 'SD:SD_S', 'FP': 'FP:FP_S', 'FPs': 'FP:FP_S', 'FMS': 'FMS', 'RD': 'RD', 'PAMP': 'PAMP', 'ARG': 'ARG', 'LAB': 'LAB', 'PM': 'PM:PM_S', 'PMs': 'PM:PM_S', 'TAK': 'TAK', 'ODC': 'ODC', 'GAS': 'GAS', 'TGH': 'TGH', 'BP': 'BP', 'PR': 'PR', 'HK2': 'HK2', 'PAF': 'PAF', 'COT': 'COT', 'SN': 'SN', 'LH': 'LH', 'BAL': 'BAL', 'PRK': 'PRK', 'AD': 'AD', 'NP': 'NP:NP_S', 'NPs': 'NP:NP_S', 'RSE': 'RSE', 'RPE': 'RPE', 'MS': 'MS', 'ARF': 'ARF', 'RB': 'RB:R_B', 'Rb': 'RB:R_B', 'MAC': 'MAC', 'EC': 'EC', 'NIS': 'NIS', 'LAK': 'LAK', 'AC': 'AC', 'TF': 'TF', 'MM': 'MM', 'ALK': 'ALK', 'IR': 'IR', 'TM': 'TM', 'PAH': 'PAH:PAH_S', 'PAHs': 'PAH:PAH_S', 'PKL': 'PKL', 'FPS': 'FPS', 'CK': 'CK:CK_S', 'CKs': 'CK:CK_S', 'AP1': 'AP1', 'UBC': 'UBC', 'RAS': 'RAS', 'RET': 'RET', 'GR': 'GR', 'AR': 'AR', 'SERS': 'SERS', 'CLS': 'CLS', 'MCT': 'MCT', 'DC': 'DC:DC_S', 'DCs': 'DC:DC_S', 'SAK': 'SAK', 'ZAP': 'ZAP', 'TS': 'TS', 'MV': 'MV', 'GCA': 'GCA', 'MIP': 'MIP', 'PKD': 'PKD', 'STD': 'STD', 'CS': 'CS', 'EPA': 'EPA', 'PE': 'PE', 'CLK': 'CLK', 'NS': 'NS', 'ASM': 'ASM', 'TEC': 'TEC:TEC_S', 'TECs': 'TEC:TEC_S', 'CIR': 'CIR', 'RA': 'RA', 'FGR': 'FGR', 'ROS': 'ROS', 'Lu': 'L_U', 'DA': 'DA', 'RAC': 'RAC', 'OCR': 'OCR', 'CPAP': 'CPAP', 'EK': 'EK', 'MHC': 'MHC', 'RTCA': 'RTCA', 'GL': 'GL', 'PA': 'PA', 'CM': 'CM', 'OA': 'OA', 'GSC': 'GSC:GSC_S', 'GSCs': 'GSC:GSC_S', 'CPE': 'CPE', 'AE': 'AE:AE_S', 'AEs': 'AE:AE_S', 'SPF': 'SPF', 'FRAP': 'FRAP', 'MSC': 'MSC:MSC_S', 'MSCs': 'MSC:MSC_S', 'TR': 'TR', 'MOS': 'MOS', 'DLK': 'DLK', 'CNS': 'CNS', 'AA': 'AA', 'AVP': 'AVP', 'MT': 'MT', 'DOG1': 'DOG1', 'PS': 'PS', 'FER': 'FER', 'PBT': 'PBT', 'BCR': 'BCR', 'PP': 'PP:PP_S', 'PPs': 'PP:PP_S', 'IBD': 'IBD', 'RK': 'RK', 'SP': 'SP', 'AF': 'AF', 'TEK': 'TEK', 'UFO': 'UFO', 'PCP': 'PCP', 'PCS': 'PCS', 'DSS': 'DSS', 'HIR': 'HIR', 'CAP': 'CAP:CAP_S', 'CAPs': 'CAP:CAP_S', 'HF': 'HF', 'PGP': 'PGP:PGP_S', 'PGPs': 'PGP:PGP_S', 'PD1': 'PD1', 'IRR': 'IRR', 'ACE': 'ACE', 'GC': 'GC', 'FIP': 'FIP'}\n"
]
}
],
Expand Down Expand Up @@ -119,24 +119,45 @@
"Disambiguation model for IR\n",
"\n",
"Produces the disambiguations:\n",
"\tRadiation, Ionizing*\tMESH:D011839\n",
"\tInsulin Resistance\tMESH:D007333\n",
"\tINSR*\tHGNC:6091\n",
"\tReperfusion Injury\tMESH:D015427\n",
"\tInfrared Rays\tMESH:D007259\n",
"\tIle-Arg*\tCHEBI:CHEBI:74061\n",
"\tInfrared Rays*\tMESH:D007259\n",
"\tInsulin Resistance*\tMESH:D007333\n",
"\tInterneurons*\tMESH:D007395\n",
"\tMDAMB468*\tEFO:0001216\n",
"\tREN*\tHGNC:9958\n",
"\tRadiation, Ionizing*\tMESH:D011839\n",
"\tReperfusion Injury*\tMESH:D015427\n",
"\tRetina*\tMESH:D012160\n",
"\tRhinitis*\tMESH:D012220\n",
"\tWounds and Injuries*\tMESH:D014947\n",
"\tretinal ischemia*\tDOID:DOID:12510\n",
"\troot structure\tEFO:0000989\n",
"\n",
"Training data had class balance:\n",
"\tRadiation, Ionizing*\t2704\n",
"\tInsulin Resistance\t1495\n",
"\tINSR*\t1460\n",
"\tReperfusion Injury\t924\n",
"\tUngrounded\t538\n",
"\tInfrared Rays\t189\n",
"Class level metrics:\n",
"--------------------\n",
"Grounding \tCount\tF1 \n",
"Radiation, Ionizing*\t3296\t0.98324\n",
" Insulin Resistance*\t1894\t0.95075\n",
" INSR*\t1512\t0.92161\n",
" Reperfusion Injury*\t1193\t0.94338\n",
" Ungrounded\t 784\t0.85292\n",
" Infrared Rays*\t 304\t0.87597\n",
"Wounds and Injuries*\t 34\t 0.0\n",
" Ile-Arg*\t 5\t 0.2\n",
" Rhinitis*\t 4\t 0.6\n",
" REN*\t 3\t 0.2\n",
" Retina*\t 2\t 0.0\n",
" root structure\t 1\t 0.0\n",
" Interneurons*\t 1\t 0.0\n",
" MDAMB468*\t 1\t 0.0\n",
" retinal ischemia*\t 1\t 0.0\n",
"\n",
"Classification Metrics:\n",
"\tF1 score:\t0.97321\n",
"\tPrecision:\t0.97057\n",
"\tRecall:\t\t0.97598\n",
"Global Metrics:\n",
"-----------------\n",
"\tF1 score:\t0.9509\n",
"\tPrecision:\t0.95102\n",
"\tRecall:\t\t0.95079\n",
"\n",
"* Positive labels\n",
"See Docstring for explanation\n",
Expand Down Expand Up @@ -176,12 +197,21 @@
"text/plain": [
"('MESH:D011839',\n",
" 'Radiation, Ionizing',\n",
" {'MESH:D007259': 0.0,\n",
" 'HGNC:6091': 0.0,\n",
" 'MESH:D007333': 0.0,\n",
" {'MESH:D007333': 0.0,\n",
" 'MESH:D015427': 0.0,\n",
" 'MESH:D012160': 0.0,\n",
" 'EFO:0001216': 0.0,\n",
" 'MESH:D012220': 0.0,\n",
" 'HGNC:6091': 0.0,\n",
" 'ungrounded': 0.0,\n",
" 'MESH:D011839': 1.0})"
" 'MESH:D014947': 0.0,\n",
" 'MESH:D007259': 0.0,\n",
" 'DOID:DOID:12510': 0.0,\n",
" 'MESH:D007395': 0.0,\n",
" 'HGNC:9958': 0.0,\n",
" 'CHEBI:CHEBI:74061': 0.0,\n",
" 'MESH:D011839': 1.0,\n",
" 'EFO:0000989': 0.0})"
]
},
"execution_count": 5,
Expand Down Expand Up @@ -209,8 +239,13 @@
"* [Gene Ontology](https://geneontology.org/) (GO)\n",
"* [Medical Subject Headings](https://id.nlm.nih.gov/mesh/) (MESH)\n",
"* [Chemical Entities of Biological Interest](https://www.ebi.ac.uk/chebi/) (CHEBI)\n",
"* [NCIThesaurus](https://ncithesaurus.nci.nih.gov/ncitbrowser/) (NCIT)\n",
"* [Uniprot](https://www.uniprot.org/) (UP)\n",
"* [Interpro](https://www.ebi.ac.uk/interpro/) (IP)\n",
"\n",
"and more.\n",
"\n",
"The 'ungrounded' class refers to the group of entities for which Adeft recognizes a defining pattern but for which the model has no grounding."
"The 'ungrounded' class refers to the group of entities for which Adeft recognizes a defining pattern but for which the model has no specific grounding."
]
},
{
Expand All @@ -232,12 +267,21 @@
"text/plain": [
"('MESH:D011839',\n",
" 'Radiation, Ionizing',\n",
" {'HGNC:6091': 0.0163634303237421,\n",
" 'MESH:D007259': 0.022613323179558133,\n",
" 'MESH:D007333': 0.025406625477066187,\n",
" 'MESH:D011839': 0.7351417882529113,\n",
" 'MESH:D015427': 0.0554160084966106,\n",
" 'ungrounded': 0.1450588242701117})"
" {'CHEBI:CHEBI:74061': 0.0029106450442963733,\n",
" 'DOID:DOID:12510': 0.002714648506140114,\n",
" 'EFO:0000989': 0.002790792571999619,\n",
" 'EFO:0001216': 0.002753434628070166,\n",
" 'HGNC:6091': 0.014643292414418663,\n",
" 'HGNC:9958': 0.002875775266350069,\n",
" 'MESH:D007259': 0.012579601284319343,\n",
" 'MESH:D007333': 0.006525654716170285,\n",
" 'MESH:D007395': 0.002674687838200803,\n",
" 'MESH:D011839': 0.8351663813237419,\n",
" 'MESH:D012160': 0.0028453280753981272,\n",
" 'MESH:D012220': 0.0028269596663538198,\n",
" 'MESH:D014947': 0.0020090024116930397,\n",
" 'MESH:D015427': 0.04777629432741509,\n",
" 'ungrounded': 0.058907501925432655})"
]
},
"execution_count": 6,
Expand Down Expand Up @@ -314,12 +358,21 @@
"text/plain": [
"('HGNC:6091',\n",
" 'INSR',\n",
" {'HGNC:6091': 0.9997096174422002,\n",
" 'MESH:D007259': 2.344258939099128e-06,\n",
" 'MESH:D007333': 0.00026293326086445306,\n",
" 'MESH:D011839': 1.1515771319720898e-06,\n",
" 'MESH:D015427': 1.9880271533737704e-05,\n",
" 'ungrounded': 4.073189330487242e-06})"
" {'CHEBI:CHEBI:74061': 8.653121931414589e-06,\n",
" 'DOID:DOID:12510': 1.0470370933297434e-05,\n",
" 'EFO:0000989': 6.543495465754423e-06,\n",
" 'EFO:0001216': 6.727432817661731e-06,\n",
" 'HGNC:6091': 0.9970957622075577,\n",
" 'HGNC:9958': 6.628812950508269e-06,\n",
" 'MESH:D007259': 6.224876365680733e-06,\n",
" 'MESH:D007333': 0.0002892344560520544,\n",
" 'MESH:D007395': 6.361983101386227e-06,\n",
" 'MESH:D011839': 2.7108083646021005e-06,\n",
" 'MESH:D012160': 6.854179224808941e-06,\n",
" 'MESH:D012220': 6.8208962358360116e-06,\n",
" 'MESH:D014947': 3.936096295151697e-06,\n",
" 'MESH:D015427': 0.00042591480328742605,\n",
" 'ungrounded': 0.002117156459416963})"
]
},
"execution_count": 9,
Expand Down Expand Up @@ -349,7 +402,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.37 ms ± 82.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
"1.84 ms ± 81.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
Expand All @@ -366,7 +419,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.82 ms ± 33.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
"2.69 ms ± 74 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
Expand Down Expand Up @@ -400,7 +453,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.8.5"
}
},
"nbformat": 4,
Expand Down

0 comments on commit ae9bca9

Please sign in to comment.