Skip to content

Commit

Permalink
feat: added trainable biaffine dependency parser and metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
percevalw committed Nov 29, 2024
1 parent 7fd284a commit 5dcd4db
Show file tree
Hide file tree
Showing 15 changed files with 890 additions and 30 deletions.
1 change: 1 addition & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- Support a specific model output dir path for trainings (`output_model_dir`), and whether to save the model or not (`save_model`)
- Specify whether to log the validation results or not (`logger=False`)
- Added support for the CoNLL format with `edsnlp.data.read_conll` and with a specific `eds.conll_dict2doc` converter
- Added a Trainable Biaffine Dependency Parser (`eds.biaffine_dep_parser`) component and metrics

### Fixed

Expand Down
8 changes: 8 additions & 0 deletions docs/pipes/trainable/biaffine-dependency-parser.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Trainable Biaffine Dependency Parser {: #edsnlp.pipes.trainable.biaffine_dep_parser.factory.create_component }

::: edsnlp.pipes.trainable.biaffine_dep_parser.factory.create_component
options:
heading_level: 2
show_bases: false
show_source: false
only_class_level: true
17 changes: 9 additions & 8 deletions docs/pipes/trainable/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ All trainable components implement the [`TorchComponent`][edsnlp.core.torch_comp

<!-- --8<-- [start:components] -->

| Name | Description |
|-----------------------|-----------------------------------------------------------------------|
| `eds.transformer` | Embed text with a transformer model |
| `eds.text_cnn` | Contextualize embeddings with a CNN |
| `eds.span_pooler` | A span embedding component that aggregates word embeddings |
| `eds.ner_crf` | A trainable component to extract entities |
| `eds.span_classifier` | A trainable component for multi-class multi-label span classification |
| `eds.span_linker` | A trainable entity linker (i.e. to a list of concepts) |
| Name | Description |
|---------------------------|-----------------------------------------------------------------------|
| `eds.transformer` | Embed text with a transformer model |
| `eds.text_cnn` | Contextualize embeddings with a CNN |
| `eds.span_pooler` | A span embedding component that aggregates word embeddings |
| `eds.ner_crf` | A trainable component to extract entities |
| `eds.span_classifier` | A trainable component for multi-class multi-label span classification |
| `eds.span_linker` | A trainable entity linker (i.e. to a list of concepts) |
| `eds.biaffine_dep_parser` | A trainable biaffine dependency parser |

<!-- --8<-- [end:components] -->
23 changes: 23 additions & 0 deletions docs/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,26 @@ @article{petitjean_2024
url = {https://doi.org/10.1093/jamia/ocae069},
eprint = {https://academic.oup.com/jamia/article-pdf/31/6/1280/57769016/ocae069.pdf},
}

@misc{dozat2017deepbiaffineattentionneural,
title={Deep Biaffine Attention for Neural Dependency Parsing},
author={Timothy Dozat and Christopher D. Manning},
year={2017},
eprint={1611.01734},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/1611.01734},
}

@inproceedings{grobol:hal-03223424,
title = {{Analyse en dépendances du français avec des plongements contextualisés}},
author = {Grobol, Loïc and Crabbé, Benoît},
url = {https://hal.archives-ouvertes.fr/hal-03223424},
year = {2021},
booktitle = {{Actes de la 28ème Conférence sur le Traitement Automatique des Langues Naturelles}},
eventtitle = {{TALN-RÉCITAL 2021}},
venue = {Lille, France},
pdf = {https://hal.archives-ouvertes.fr/hal-03223424/file/HOPS_final.pdf},
hal_id = {hal-03223424},
hal_version = {v1},
}
55 changes: 55 additions & 0 deletions edsnlp/metrics/dep_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from typing import Any, Optional

from edsnlp import registry
from edsnlp.metrics import Examples, make_examples, prf


def dependency_parsing_metric(
examples: Examples,
filter_expr: Optional[str] = None,
):
"""
Compute the UAS and LAS scores for dependency parsing.
Parameters
----------
examples : Examples
The examples to score, either a tuple of (golds, preds) or a list of
spacy.training.Example objects
filter_expr : Optional[str]
The filter expression to use to filter the documents
Returns
-------
Dict[str, float]
"""
items = {
"uas": (set(), set()),
"las": (set(), set()),
}
examples = make_examples(examples)
if filter_expr is not None:
filter_fn = eval(f"lambda doc: {filter_expr}")
examples = [eg for eg in examples if filter_fn(eg.reference)]

for eg_idx, eg in enumerate(examples):
for token in eg.reference:
items["uas"][0].add((eg_idx, token.i, token.head.i))
items["las"][0].add((eg_idx, token.i, token.head.i, token.dep_))

for token in eg.predicted:
items["uas"][1].add((eg_idx, token.i, token.head.i))
items["las"][1].add((eg_idx, token.i, token.head.i, token.dep_))

return {name: prf(pred, gold)["f"] for name, (pred, gold) in items.items()}


@registry.metrics.register("eds.dep_parsing")
class DependencyParsingMetric:
def __init__(self, filter_expr: Optional[str] = None):
self.filter_expr = filter_expr

__init__.__doc__ = dependency_parsing_metric.__doc__

def __call__(self, *examples: Any):
return dependency_parsing_metric(examples, self.filter_expr)
1 change: 1 addition & 0 deletions edsnlp/pipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
from .qualifiers.reported_speech.factory import create_component as reported_speech
from .qualifiers.reported_speech.factory import create_component as rspeech
from .trainable.ner_crf.factory import create_component as ner_crf
from .trainable.biaffine_dep_parser.factory import create_component as biaffine_dep_parser
from .trainable.span_classifier.factory import create_component as span_classifier
from .trainable.span_linker.factory import create_component as span_linker
from .trainable.embeddings.span_pooler.factory import create_component as span_pooler
Expand Down
1 change: 1 addition & 0 deletions edsnlp/pipes/trainable/biaffine_dep_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .factory import create_component
Loading

0 comments on commit 5dcd4db

Please sign in to comment.