Skip to content

Commit

Permalink
fix errors
Browse files Browse the repository at this point in the history
  • Loading branch information
joshuawe committed Feb 9, 2024
1 parent a71e2a8 commit fdacdf2
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 22 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# torch==2.1.2
torch==2.1.2
datasets==2.16.1
transformers==4.36.2
tqdm==4.66.1
Expand Down
28 changes: 7 additions & 21 deletions src/delphi/eval/token_labelling.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Additionally, it can visualize the sentences and their poart-of-speech (POS) tags.
"""

from typing import Callable, Optional, Union
from typing import Callable, Optional

import spacy # pylint: disable=import-error
from spacy.tokens import Doc # pylint: disable=import-error
Expand Down Expand Up @@ -98,7 +98,7 @@ def label_single_Token(token: Token) -> dict[str, bool]:
return labels


def label_sentence(tokens: Union[Doc, list[Token]]) -> list[dict[str, bool]]:
def label_sentence(tokens: Doc | list[Token]) -> list[dict[str, bool]]:
"""
Labels spaCy Tokens in a sentence. Takes the context of the token into account
for dependency labels (e.g. subject, object, ...), IF dependency labels are turned on.
Expand All @@ -121,7 +121,7 @@ def label_sentence(tokens: Union[Doc, list[Token]]) -> list[dict[str, bool]]:


def label_batch_sentences(
sentences: Union[list[str], list[list[str]]],
sentences: list[str] | list[list[str]],
tokenized: bool = True,
verbose: bool = False,
) -> list[list]:
Expand All @@ -146,7 +146,6 @@ def label_batch_sentences(
corresponding token length where each entry provides the labels/categories
for the token. Sentence -> Token -> Labels
"""
assert isinstance(sentences, list)
# Load english language model
nlp = spacy.load("en_core_web_sm")
# labelled tokens, list holding sentences holding tokens holding corresponding token labels
Expand All @@ -168,18 +167,10 @@ def label_batch_sentences(
labelled_tokens = list() # list holding labels for all tokens of sentence
labelled_tokens = label_sentence(doc)

# go through each token in the sentence
for token, labelled_token in zip(doc, labelled_tokens):
# labelled_token = label_single_Token(token)
# labels = list() # The list holding labels of a single token
# for _, category_check in TOKEN_LABELS.items():
# label = category_check(token)
# labels.append(label)
# add current token's to the list
# labelled_tokens.append(labelled_token)

# print the token and its labels to console
if verbose is True:
# print the token and its labels to console
if verbose is True:
# go through each token in the sentence
for token, labelled_token in zip(doc, labelled_tokens):
print(f"Token: {token}")
print(" | ".join(list(TOKEN_LABELS.keys())))
printable = [
Expand All @@ -198,11 +189,6 @@ def label_batch_sentences(


if __name__ == "__main__":
# result = label_tokens(
# ["Hi, my name is Joshua.".split(" "), "The highway is full of car s, Peter.".split(" ")],
# tokenized=True,
# verbose=True,
# )
result = label_batch_token(
["Hi, my name is Joshua.", "The highway is full of car s, Peter."],
tokenized=False,
Expand Down

0 comments on commit fdacdf2

Please sign in to comment.