diff --git a/changelog.md b/changelog.md index 1601e5141..71dac6ec1 100644 --- a/changelog.md +++ b/changelog.md @@ -6,6 +6,8 @@ - `eds.tables` accepts a minimum_table_size (default 2) argument to reduce pollution - `RuleBasedQualifier` now expose a `process` method that only returns qualified entities and token without actually tagging them, defering this task to the `__call__` method. +- Added new patterns for metastasis detection. Developed on CT-Scan reports. +- Added citation of articles ### Fixed diff --git a/docs/pipes/ner/disorders/index.md b/docs/pipes/ner/disorders/index.md index d8e65515d..e261fcd98 100644 --- a/docs/pipes/ner/disorders/index.md +++ b/docs/pipes/ner/disorders/index.md @@ -3,6 +3,9 @@ ## Presentation The following components extract 16 different conditions from the [Charlson Comorbidity Index](https://www.rdplf.org/calculateurs/pages/charlson/charlson.html). Each component is based on the ContextualMatcher component. + +The components were developed by AP-HP's Data Science team with a team of medical experts, following the insights of the algorithm proposed by [@petitjean_2024] + Some general considerations about those components: - Extracted entities are stored in `doc.ents` and `doc.spans`. For instance, the `eds.tobacco` component stores matches in `doc.spans["tobacco"]`. diff --git a/docs/references.bib b/docs/references.bib index db8e3c6a9..00d5094e5 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -145,3 +145,19 @@ @misc{terminologie-adicap AUTHOR = {Agence du numérique en santé}, DETAILS = {https://smt.esante.gouv.fr/wp-json/ans/terminologies/document?terminologyId=terminologie-adicap&fileName=cgts_sem_adicap_fiche-detaillee.pdf}, } + +@article{petitjean_2024, + author = {Petit-Jean, Thomas and Gérardin, Christel and Berthelot, Emmanuelle and Chatellier, Gilles and Frank, Marie and Tannier, Xavier and Kempf, Emmanuelle and Bey, Romain}, + title = "{Collaborative and privacy-enhancing workflows on a clinical data warehouse: an example developing natural language processing pipelines to detect medical conditions}", + journal = {Journal of the American Medical Informatics Association}, + volume = {31}, + number = {6}, + pages = {1280-1290}, + year = {2024}, + month = {04}, + abstract = "{To develop and validate a natural language processing (NLP) pipeline that detects 18 conditions in French clinical notes, including 16 comorbidities of the Charlson index, while exploring a collaborative and privacy-enhancing workflow.The detection pipeline relied both on rule-based and machine learning algorithms, respectively, for named entity recognition and entity qualification, respectively. We used a large language model pre-trained on millions of clinical notes along with annotated clinical notes in the context of 3 cohort studies related to oncology, cardiology, and rheumatology. The overall workflow was conceived to foster collaboration between studies while respecting the privacy constraints of the data warehouse. We estimated the added values of the advanced technologies and of the collaborative setting.The pipeline reached macro-averaged F1-score positive predictive value, sensitivity, and specificity of 95.7 (95\\%CI 94.5-96.3), 95.4 (95\\%CI 94.0-96.3), 96.0 (95\\%CI 94.0-96.7), and 99.2 (95\\%CI 99.0-99.4), respectively. F1-scores were superior to those observed using alternative technologies or non-collaborative settings. The models were shared through a secured registry.We demonstrated that a community of investigators working on a common clinical data warehouse could efficiently and securely collaborate to develop, validate and use sensitive artificial intelligence models. In particular, we provided an efficient and robust NLP pipeline that detects conditions mentioned in clinical notes.}", + issn = {1527-974X}, + doi = {10.1093/jamia/ocae069}, + url = {https://doi.org/10.1093/jamia/ocae069}, + eprint = {https://academic.oup.com/jamia/article-pdf/31/6/1280/57769016/ocae069.pdf}, +} diff --git a/edsnlp/pipes/ner/behaviors/alcohol/alcohol.py b/edsnlp/pipes/ner/behaviors/alcohol/alcohol.py index 8c86e8260..ca543fa6e 100644 --- a/edsnlp/pipes/ner/behaviors/alcohol/alcohol.py +++ b/edsnlp/pipes/ner/behaviors/alcohol/alcohol.py @@ -81,9 +81,9 @@ class AlcoholMatcher(DisorderMatcher): Authors and citation -------------------- - The `eds.alcohol` component was developed by AP-HP's Data Science team with a team - of medical experts. A paper describing in details the development of those - components is being drafted and will soon be available. + The `eds.alcohol` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. """ def __init__( diff --git a/edsnlp/pipes/ner/behaviors/tobacco/tobacco.py b/edsnlp/pipes/ner/behaviors/tobacco/tobacco.py index 34d41bc7e..36c0701cc 100644 --- a/edsnlp/pipes/ner/behaviors/tobacco/tobacco.py +++ b/edsnlp/pipes/ner/behaviors/tobacco/tobacco.py @@ -89,9 +89,9 @@ class TobaccoMatcher(AlcoholMatcher): Authors and citation -------------------- - The `eds.tobacco` component was developed by AP-HP's Data Science team with a team - of medical experts. A paper describing in details the development of those - components is being drafted and will soon be available. + The `eds.tobacco` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. """ def __init__( diff --git a/edsnlp/pipes/ner/disorders/aids/aids.py b/edsnlp/pipes/ner/disorders/aids/aids.py index 65ce35d67..f12e7a911 100644 --- a/edsnlp/pipes/ner/disorders/aids/aids.py +++ b/edsnlp/pipes/ner/disorders/aids/aids.py @@ -91,9 +91,9 @@ class AIDSMatcher(DisorderMatcher): Authors and citation -------------------- - The `eds.aids` component was developed by AP-HP's Data Science team with a team of - medical experts. A paper describing in details the development of those components - is being drafted and will soon be available. + The `eds.aids` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. """ def __init__( diff --git a/edsnlp/pipes/ner/disorders/cerebrovascular_accident/cerebrovascular_accident.py b/edsnlp/pipes/ner/disorders/cerebrovascular_accident/cerebrovascular_accident.py index 479f1f37c..6d562b8e0 100644 --- a/edsnlp/pipes/ner/disorders/cerebrovascular_accident/cerebrovascular_accident.py +++ b/edsnlp/pipes/ner/disorders/cerebrovascular_accident/cerebrovascular_accident.py @@ -78,10 +78,10 @@ class CerebrovascularAccidentMatcher(DisorderMatcher): Authors and citation -------------------- - The `eds.cerebrovascular_accident` component was developed by AP-HP's Data Science - team with a team of medical experts. A paper describing in details the development - of those components is being drafted and will soon be available. - """ + The `eds.cerebrovascular_accident` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. + """ # noqa: E501 def __init__( self, diff --git a/edsnlp/pipes/ner/disorders/ckd/ckd.py b/edsnlp/pipes/ner/disorders/ckd/ckd.py index 050068bcc..e4a4ca20d 100644 --- a/edsnlp/pipes/ner/disorders/ckd/ckd.py +++ b/edsnlp/pipes/ner/disorders/ckd/ckd.py @@ -91,9 +91,9 @@ class CKDMatcher(DisorderMatcher): Authors and citation -------------------- - The `eds.CKD` component was developed by AP-HP's Data Science team with a team of - medical experts. A paper describing in details the development of those components - is being drafted and will soon be available. + The `eds.ckd` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. """ def __init__( diff --git a/edsnlp/pipes/ner/disorders/congestive_heart_failure/congestive_heart_failure.py b/edsnlp/pipes/ner/disorders/congestive_heart_failure/congestive_heart_failure.py index 98002ef16..2ad275336 100644 --- a/edsnlp/pipes/ner/disorders/congestive_heart_failure/congestive_heart_failure.py +++ b/edsnlp/pipes/ner/disorders/congestive_heart_failure/congestive_heart_failure.py @@ -1,4 +1,5 @@ """`eds.congestive_heart_failure` pipeline""" + from typing import Any, Dict, List, Optional, Union from edsnlp.core import PipelineProtocol @@ -71,10 +72,10 @@ class CongestiveHeartFailureMatcher(DisorderMatcher): Authors and citation -------------------- - The `eds.congestive_heart_failure` component was developed by AP-HP's Data Science - team with a team of medical experts. A paper describing in details the development - of those components is being drafted and will soon be available. - """ + The `eds.congestive_heart_failure` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. + """ # noqa: E501 def __init__( self, diff --git a/edsnlp/pipes/ner/disorders/connective_tissue_disease/connective_tissue_disease.py b/edsnlp/pipes/ner/disorders/connective_tissue_disease/connective_tissue_disease.py index 52a95606f..1819c4a13 100644 --- a/edsnlp/pipes/ner/disorders/connective_tissue_disease/connective_tissue_disease.py +++ b/edsnlp/pipes/ner/disorders/connective_tissue_disease/connective_tissue_disease.py @@ -75,10 +75,10 @@ class ConnectiveTissueDiseaseMatcher(DisorderMatcher): Authors and citation -------------------- - The `eds.connective_tissue_disease` component was developed by AP-HP's Data Science - team with a team of medical experts. A paper describing in details the development - of those components is being drafted and will soon be available. - """ + The `eds.connective_tissue_disease` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. + """ # noqa: E501 def __init__( self, diff --git a/edsnlp/pipes/ner/disorders/copd/copd.py b/edsnlp/pipes/ner/disorders/copd/copd.py index 8b8de3f7c..9ebc6fd00 100644 --- a/edsnlp/pipes/ner/disorders/copd/copd.py +++ b/edsnlp/pipes/ner/disorders/copd/copd.py @@ -78,9 +78,9 @@ class COPDMatcher(DisorderMatcher): Authors and citation -------------------- - The `eds.copd` component was developed by AP-HP's Data Science team with a team of - medical experts. A paper describing in details the development of those components - is being drafted and will soon be available. + The `eds.copd` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. """ def __init__( diff --git a/edsnlp/pipes/ner/disorders/dementia/dementia.py b/edsnlp/pipes/ner/disorders/dementia/dementia.py index f5d4c6c2d..a33c4824e 100644 --- a/edsnlp/pipes/ner/disorders/dementia/dementia.py +++ b/edsnlp/pipes/ner/disorders/dementia/dementia.py @@ -72,9 +72,9 @@ class DementiaMatcher(DisorderMatcher): Authors and citation -------------------- - The `eds.dementia` component was developed by AP-HP's Data Science team with a team - of medical experts. A paper describing in details the development of those - components is being drafted and will soon be available. + The `eds.dementia` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. """ def __init__( diff --git a/edsnlp/pipes/ner/disorders/diabetes/diabetes.py b/edsnlp/pipes/ner/disorders/diabetes/diabetes.py index 049591eb7..1eef8a885 100644 --- a/edsnlp/pipes/ner/disorders/diabetes/diabetes.py +++ b/edsnlp/pipes/ner/disorders/diabetes/diabetes.py @@ -8,9 +8,7 @@ from edsnlp.matchers.regex import RegexMatcher from edsnlp.matchers.utils import get_text from edsnlp.pipes.base import SpanSetterArg -from edsnlp.pipes.core.contextual_matcher.contextual_matcher import ( - get_window, -) +from edsnlp.pipes.core.contextual_matcher.contextual_matcher import get_window from ..base import DisorderMatcher from .patterns import COMPLICATIONS, default_patterns @@ -86,9 +84,9 @@ class DiabetesMatcher(DisorderMatcher): # Authors and citation - The `eds.diabetes` component was developed by AP-HP's Data Science team with a team - of medical experts. A paper describing in details the development of those - components is being drafted and will soon be available. + The `eds.diabetes` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. """ def __init__( diff --git a/edsnlp/pipes/ner/disorders/hemiplegia/hemiplegia.py b/edsnlp/pipes/ner/disorders/hemiplegia/hemiplegia.py index c53ec7f74..7baa64f07 100644 --- a/edsnlp/pipes/ner/disorders/hemiplegia/hemiplegia.py +++ b/edsnlp/pipes/ner/disorders/hemiplegia/hemiplegia.py @@ -73,8 +73,8 @@ class HemiplegiaMatcher(DisorderMatcher): # Authors and citation The `eds.hemiplegia` component was developed by AP-HP's Data Science team with a - team of medical experts. A paper describing in details the development of those - components is being drafted and will soon be available. + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. """ def __init__( diff --git a/edsnlp/pipes/ner/disorders/leukemia/leukemia.py b/edsnlp/pipes/ner/disorders/leukemia/leukemia.py index 9f697388b..7da1533cc 100644 --- a/edsnlp/pipes/ner/disorders/leukemia/leukemia.py +++ b/edsnlp/pipes/ner/disorders/leukemia/leukemia.py @@ -72,9 +72,9 @@ class LeukemiaMatcher(DisorderMatcher): Authors and citation -------------------- - The `eds.leukemia` component was developed by AP-HP's Data Science team with a team - of medical experts. A paper describing in details the development of those - components is being drafted and will soon be available. + The `eds.leukemia` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. """ def __init__( diff --git a/edsnlp/pipes/ner/disorders/liver_disease/liver_disease.py b/edsnlp/pipes/ner/disorders/liver_disease/liver_disease.py index cc7e7c10e..1c5f1b76a 100644 --- a/edsnlp/pipes/ner/disorders/liver_disease/liver_disease.py +++ b/edsnlp/pipes/ner/disorders/liver_disease/liver_disease.py @@ -77,8 +77,8 @@ class LiverDiseaseMatcher(DisorderMatcher): Authors and citation -------------------- The `eds.liver_disease` component was developed by AP-HP's Data Science team with a - team of medical experts. A paper describing in details the development of those - components is being drafted and will soon be available. + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. """ def __init__( diff --git a/edsnlp/pipes/ner/disorders/lymphoma/lymphoma.py b/edsnlp/pipes/ner/disorders/lymphoma/lymphoma.py index 6ff299cd3..b4e130dda 100644 --- a/edsnlp/pipes/ner/disorders/lymphoma/lymphoma.py +++ b/edsnlp/pipes/ner/disorders/lymphoma/lymphoma.py @@ -76,9 +76,9 @@ class LymphomaMatcher(DisorderMatcher): Authors and citation -------------------- - The `eds.lymphoma` component was developed by AP-HP's Data Science team with a team - of medical experts. A paper describing in details the development of those - components is being drafted and will soon be available. + The `eds.lymphoma` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. """ def __init__( diff --git a/edsnlp/pipes/ner/disorders/myocardial_infarction/myocardial_infarction.py b/edsnlp/pipes/ner/disorders/myocardial_infarction/myocardial_infarction.py index fd0fe691b..51f0f41db 100644 --- a/edsnlp/pipes/ner/disorders/myocardial_infarction/myocardial_infarction.py +++ b/edsnlp/pipes/ner/disorders/myocardial_infarction/myocardial_infarction.py @@ -80,10 +80,10 @@ class MyocardialInfarctionMatcher(DisorderMatcher): Authors and citation -------------------- - The `eds.myocardial_infarction` component was developed by AP-HP's Data Science - team with a team of medical experts. A paper describing in details the development - of those components is being drafted and will soon be available. - """ + The `eds.myocardial_infarction` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. + """ # noqa: E501 def __init__( self, diff --git a/edsnlp/pipes/ner/disorders/peptic_ulcer_disease/peptic_ulcer_disease.py b/edsnlp/pipes/ner/disorders/peptic_ulcer_disease/peptic_ulcer_disease.py index 095e708ae..3bd55440f 100644 --- a/edsnlp/pipes/ner/disorders/peptic_ulcer_disease/peptic_ulcer_disease.py +++ b/edsnlp/pipes/ner/disorders/peptic_ulcer_disease/peptic_ulcer_disease.py @@ -75,10 +75,10 @@ class PepticUlcerDiseaseMatcher(DisorderMatcher): Authors and citation -------------------- - The `eds.peptic_ulcer_disease` component was developed by AP-HP's Data Science team - with a team of medical experts. A paper describing in details the development of - those components is being drafted and will soon be available. - """ + The `eds.peptic_ulcer_disease` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. + """ # noqa: E501 def __init__( self, diff --git a/edsnlp/pipes/ner/disorders/peripheral_vascular_disease/peripheral_vascular_disease.py b/edsnlp/pipes/ner/disorders/peripheral_vascular_disease/peripheral_vascular_disease.py index 6b59c7aa6..99c0e9cbd 100644 --- a/edsnlp/pipes/ner/disorders/peripheral_vascular_disease/peripheral_vascular_disease.py +++ b/edsnlp/pipes/ner/disorders/peripheral_vascular_disease/peripheral_vascular_disease.py @@ -76,10 +76,10 @@ class PeripheralVascularDiseaseMatcher(DisorderMatcher): Authors and citation -------------------- - The `eds.peripheral_vascular_disease` component was developed by AP-HP's Data - Science team with a team of medical experts. A paper describing in details the - development of those components is being drafted and will soon be available. - """ + The `eds.peripheral_vascular_disease` component was developed by AP-HP's Data Science team with a + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024]. + """ # noqa: E501 def __init__( self, diff --git a/edsnlp/pipes/ner/disorders/solid_tumor/patterns.py b/edsnlp/pipes/ner/disorders/solid_tumor/patterns.py index 0d5fac5e2..64b0db0b8 100644 --- a/edsnlp/pipes/ner/disorders/solid_tumor/patterns.py +++ b/edsnlp/pipes/ner/disorders/solid_tumor/patterns.py @@ -83,6 +83,30 @@ ), ) +# Patterns developed for CT-Scan reports +metastasis_ct_scan = dict( + source="metastasis_ct_scan", + regex=[ + r"(?i)(m[ée]tasta(se|tique)s?)", + r"(diss[ée]min[ée]e?s?)", + r"(carcinose)", + r"(((allure|l[ée]sion|localisation|progression)s?\s)(suspecte?s?)?.{0,50}(secondaire)s?)", + r"(l(a|â)ch(é|e|er)\sde\sballons?)", + r"(l[ée]sions?\s(non\s)?cibles?)", + r"(rupture.{1,20}corticale)", + r"(envahissement.{0,15}parties\smolles)", + r"((l[i,y]se).{1,20}os)|ost[eé]ol[i,y]|rupture.{1,20}corticale|envahissement.{1,20}parties\smolles|ost[eé]ocondensa.{1,20}(suspect|secondaire|[ée]volutive)", + r"(l[ée]sion|anomalie|image).{1,20}os.{1,30}(suspect|secondaire|[ée]volutive)", + r"os.{1,30}(l[ée]sion|anomalie|image).{1,20}(suspect|secondaire|[ée]volutive)", + r"(l[ée]sion|anomalie|image).{1,20}l[i,y]tique", + r"(l[ée]sion|anomalie|image).{1,20}condensant.{1,20}(suspect|secondaire|[ée]volutive)", + r"fracture.{1,30}(suspect|secondaire|[ée]volutive)", + r"((l[ée]sion|anomalie|image|nodule).{1,80}(secondaire))", + r"((l[ée]sion|anomalie|image|nodule)s.{1,40}suspec?ts?)", + ], + regex_attr="NORM", +) + default_patterns = [ main_pattern, metastasis_pattern, diff --git a/edsnlp/pipes/ner/disorders/solid_tumor/solid_tumor.py b/edsnlp/pipes/ner/disorders/solid_tumor/solid_tumor.py index 88bad618d..27ec96813 100644 --- a/edsnlp/pipes/ner/disorders/solid_tumor/solid_tumor.py +++ b/edsnlp/pipes/ner/disorders/solid_tumor/solid_tumor.py @@ -9,7 +9,7 @@ from edsnlp.utils.numbers import parse_digit from ..base import DisorderMatcher -from .patterns import default_patterns +from .patterns import default_patterns, metastasis_ct_scan class SolidTumorMatcher(DisorderMatcher): @@ -79,12 +79,15 @@ class SolidTumorMatcher(DisorderMatcher): How to set matches on the doc use_tnm : bool Whether to use TNM scores matching as well + use_patterns_metastasis_ct_scan : bool + Whether to use the metastasis patterns developed for the CT-Scans Authors and citation -------------------- The `eds.solid_tumor` component was developed by AP-HP's Data Science team with a - team of medical experts. A paper describing in details the development of those - components is being drafted and will soon be available. + team of medical experts, following the insights of the algorithm proposed + by [@petitjean_2024] and [@kempf:hal-03519085]. + """ def __init__( @@ -94,9 +97,13 @@ def __init__( *, patterns: Union[Dict[str, Any], List[Dict[str, Any]]] = default_patterns, use_tnm: bool = False, + use_patterns_metastasis_ct_scan: bool = False, label: str = "solid_tumor", span_setter: SpanSetterArg = {"ents": True, "solid_tumor": True}, ): + if use_patterns_metastasis_ct_scan: + patterns.append(metastasis_ct_scan) + super().__init__( nlp=nlp, name=name, @@ -130,7 +137,7 @@ def process_tnm(self, doc): def process(self, doc: Doc) -> List[Span]: for span in super().process(doc): - if (span._.source == "metastasis") or ( + if (span._.source in ["metastasis", "metastasis_ct_scan"]) or ( "metastasis" in span._.assigned.keys() ): span._.status = 2 diff --git a/tests/pipelines/ner/disorders/solid_tumor.py b/tests/pipelines/ner/disorders/solid_tumor.py index 21a85463a..2a20f3c56 100644 --- a/tests/pipelines/ner/disorders/solid_tumor.py +++ b/tests/pipelines/ner/disorders/solid_tumor.py @@ -1,13 +1,5 @@ results_solid_tumor = dict( - has_match=[ - True, - True, - False, - True, - True, - True, - True, - ], + has_match=[True, True, False, True, True, True, True, True, True], detailled_status=[ "LOCALIZED", "LOCALIZED", @@ -16,6 +8,8 @@ "METASTASIS", "LOCALIZED", "METASTASIS", + "METASTASIS", + "METASTASIS", ], assign=None, texts=[ @@ -26,5 +20,9 @@ "Cancer du poumon au stade 4", "Cancer du poumon au stade 2", "Présence de nombreuses lésions secondaires", + "Patient avec fracture abcddd secondaire. Cancer de", + "Patient avec lesions non ciblées", ], ) + +solid_tumor_config = dict(use_patterns_metastasis_ct_scan=True) diff --git a/tests/pipelines/ner/disorders/test_all.py b/tests/pipelines/ner/disorders/test_all.py index afc686826..7eca71125 100644 --- a/tests/pipelines/ner/disorders/test_all.py +++ b/tests/pipelines/ner/disorders/test_all.py @@ -16,9 +16,11 @@ from myocardial_infarction import results_myocardial_infarction from peptic_ulcer_disease import results_peptic_ulcer_disease from peripheral_vascular_disease import results_peripheral_vascular_disease -from solid_tumor import results_solid_tumor +from solid_tumor import results_solid_tumor, solid_tumor_config from tobacco import results_tobacco +config = dict(solid_tumor=solid_tumor_config) + results = dict( aids=results_aids, ckd=results_ckd, @@ -59,6 +61,7 @@ def __init__( detailled_status, negation=None, assign=None, + config=dict(), ): self.disorder = disorder self.nlp = nlp @@ -74,7 +77,7 @@ def __init__( self.assign = assign if assign is not None else len(texts) * [None] self.negation = negation if negation is not None else len(texts) * [None] - self.nlp.add_pipe(f"eds.{disorder}") + self.nlp.add_pipe(f"eds.{disorder}", config=config) def check(self): for text, has_match, detailled_status, assign, negation in zip( @@ -91,7 +94,7 @@ def check(self): doc = self.nlp(text) ents = doc.spans[self.disorder] - assert len(ents) == int(has_match) + assert len(ents) >= int(has_match) for ent in ents: assert ent.label_ == self.disorder @@ -116,10 +119,12 @@ def check(self): ) def test_disorder(normalized_nlp, disorder): result = results[disorder] + config_disorder = config.get(disorder, dict()) expect = DisorderTester( disorder, normalized_nlp, + config=config_disorder, **result, )