Skip to content

Commit

Permalink
fix: better warning for overlapping spans in eds.ner_crf
Browse files Browse the repository at this point in the history
  • Loading branch information
percevalw committed Feb 23, 2024
1 parent 0aa5fe7 commit 6bfa4cc
Showing 1 changed file with 1 addition and 11 deletions.
12 changes: 1 addition & 11 deletions edsnlp/pipes/trainable/ner_crf/ner_crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def preprocess_supervised(self, doc):

if discarded:
warnings.warn(
f"Some spans in {doc._.note_id} were discarded ("
f"Some spans in were discarded {doc._.note_id} ("
f"{', '.join(repr(d.text) for d in discarded)}) because they "
f"were overlapping with other spans with the same label."
)
Expand Down Expand Up @@ -481,16 +481,6 @@ def forward(self, batch: NERBatchInput) -> NERBatchOutput:
# tags = scores.argmax(-1).masked_fill(~mask.unsqueeze(-1), 0)
if loss is not None and loss.item() > 100000:
warnings.warn("The loss is very high, this is likely a tag encoding issue.")
losses = self.crf(
scores,
mask,
batch["targets"].unsqueeze(-1) == torch.arange(5).to(scores.device),
).view(-1)
print("LOSSES", losses.tolist())
print(
batch["targets"].transpose(1, 2).reshape(-1, num_words)[losses.argmax()]
)
print(batch["targets"])
return {
"loss": loss,
"tags": tags,
Expand Down

0 comments on commit 6bfa4cc

Please sign in to comment.