From fdacdf2c8aac1c174db04d9f132bf79cfc397148 Mon Sep 17 00:00:00 2001 From: Joshua Wendland <80349780+joshuawe@users.noreply.github.com> Date: Fri, 9 Feb 2024 17:54:41 +0100 Subject: [PATCH] fix errors --- requirements.txt | 2 +- src/delphi/eval/token_labelling.py | 28 +++++++--------------------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/requirements.txt b/requirements.txt index 07a340c6..7bda2f6c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -# torch==2.1.2 +torch==2.1.2 datasets==2.16.1 transformers==4.36.2 tqdm==4.66.1 diff --git a/src/delphi/eval/token_labelling.py b/src/delphi/eval/token_labelling.py index 5ec134e1..163d9bb1 100644 --- a/src/delphi/eval/token_labelling.py +++ b/src/delphi/eval/token_labelling.py @@ -4,7 +4,7 @@ Additionally, it can visualize the sentences and their poart-of-speech (POS) tags. """ -from typing import Callable, Optional, Union +from typing import Callable, Optional import spacy # pylint: disable=import-error from spacy.tokens import Doc # pylint: disable=import-error @@ -98,7 +98,7 @@ def label_single_Token(token: Token) -> dict[str, bool]: return labels -def label_sentence(tokens: Union[Doc, list[Token]]) -> list[dict[str, bool]]: +def label_sentence(tokens: Doc | list[Token]) -> list[dict[str, bool]]: """ Labels spaCy Tokens in a sentence. Takes the context of the token into account for dependency labels (e.g. subject, object, ...), IF dependency labels are turned on. @@ -121,7 +121,7 @@ def label_sentence(tokens: Union[Doc, list[Token]]) -> list[dict[str, bool]]: def label_batch_sentences( - sentences: Union[list[str], list[list[str]]], + sentences: list[str] | list[list[str]], tokenized: bool = True, verbose: bool = False, ) -> list[list]: @@ -146,7 +146,6 @@ def label_batch_sentences( corresponding token length where each entry provides the labels/categories for the token. Sentence -> Token -> Labels """ - assert isinstance(sentences, list) # Load english language model nlp = spacy.load("en_core_web_sm") # labelled tokens, list holding sentences holding tokens holding corresponding token labels @@ -168,18 +167,10 @@ def label_batch_sentences( labelled_tokens = list() # list holding labels for all tokens of sentence labelled_tokens = label_sentence(doc) - # go through each token in the sentence - for token, labelled_token in zip(doc, labelled_tokens): - # labelled_token = label_single_Token(token) - # labels = list() # The list holding labels of a single token - # for _, category_check in TOKEN_LABELS.items(): - # label = category_check(token) - # labels.append(label) - # add current token's to the list - # labelled_tokens.append(labelled_token) - - # print the token and its labels to console - if verbose is True: + # print the token and its labels to console + if verbose is True: + # go through each token in the sentence + for token, labelled_token in zip(doc, labelled_tokens): print(f"Token: {token}") print(" | ".join(list(TOKEN_LABELS.keys()))) printable = [ @@ -198,11 +189,6 @@ def label_batch_sentences( if __name__ == "__main__": - # result = label_tokens( - # ["Hi, my name is Joshua.".split(" "), "The highway is full of car s, Peter.".split(" ")], - # tokenized=True, - # verbose=True, - # ) result = label_batch_token( ["Hi, my name is Joshua.", "The highway is full of car s, Peter."], tokenized=False,