Skip to content

Commit

Permalink
Merge pull request #38 from clulab/headAndLabels
Browse files Browse the repository at this point in the history
Aligned label and head predictions in predictWithScores. This is needed
  • Loading branch information
kwalcock authored Aug 25, 2023
2 parents 4cf79d9 + d47574f commit 02b6e4a
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 46 deletions.
4 changes: 2 additions & 2 deletions encoder/src/main/python/clu_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def compute_metrics(self, eval_pred: EvalPrediction) -> Dict[str, float]:
ShortTaskDef("NER", "conll-ner/", "train.txt", "dev.txt", "test.txt"),
ShortTaskDef("POS", "pos/", "train.txt", "dev.txt", "test.txt"),
ShortTaskDef("Chunking", "chunking/", "train.txt", "test.txt", "test.txt"), # this dataset has no dev
ShortTaskDef("Deps Head", "deps-wsj/", "train.heads", "dev.heads", "test.heads"),
ShortTaskDef("Deps Label", "deps-wsj/", "train.labels", "dev.labels", "test.labels", dual_mode=True)
ShortTaskDef("Deps Head", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.heads", "dev.heads", "test.heads"),
ShortTaskDef("Deps Label", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.labels", "dev.labels", "test.labels", dual_mode=True)
])
CluTrainer(tokenizer).train(tasks)
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class LinearLayer(

/** Predict all labels and their scores per token */
def predictWithScores(inputSentence: DenseMatrix[Float],
heads: Option[Array[Int]],
heads: Option[Array[Array[Int]]],
masks: Option[Array[Boolean]]): Array[Array[(String, Float)]] = {
val batchSentences = Array(inputSentence)
val batchHeads = heads.map(Array(_))
Expand All @@ -66,7 +66,7 @@ class LinearLayer(

/** Predict all labels and their scores per token in each sentence in the batch */
def predictWithScores(inputBatch: Array[DenseMatrix[Float]],
batchHeads: Option[Array[Array[Int]]],
batchHeads: Option[Array[Array[Array[Int]]]],
batchMasks: Option[Array[Array[Boolean]]]): Array[Array[Array[(String, Float)]]] = {
if (dual) predictDualWithScores(inputBatch, batchHeads, batchMasks)
else predictPrimalWithScores(inputBatch)
Expand All @@ -77,7 +77,7 @@ class LinearLayer(
headRelativePositions: Array[Int]): DenseMatrix[Float] = {

// this matrix concatenates the hidden states of modifier + corresponding head
// rows = number of tokens in the sentence; cols = hidden state size
// rows = number of tokens in the sentence; cols = hidden state size x 2
val concatMatrix = DenseMatrix.zeros[Float](rows = sentenceHiddenStates.rows, cols = 2 * sentenceHiddenStates.cols)

// traverse all modifiers
Expand All @@ -100,6 +100,37 @@ class LinearLayer(
concatMatrix
}

/**
* Generates a 1-row matrix containing a concatenation of the modifier and head embeddings
*
*/
def concatenateModifierAndHead(
sentenceHiddenStates: DenseMatrix[Float],
modifierAbsolutePosition: Int,
headRelativePosition: Int): DenseMatrix[Float] = {

// this matrix concatenates the hidden states of modifier + corresponding head
// rows = 1; cols = hidden state size x 2
val concatMatrix = DenseMatrix.zeros[Float](rows = 1, cols = 2 * sentenceHiddenStates.cols)

// embedding of the modifier
val modHiddenState = sentenceHiddenStates(modifierAbsolutePosition, ::)

// embedding of the head
val rawHeadAbsPos = modifierAbsolutePosition + headRelativePosition
val headAbsolutePosition =
if(rawHeadAbsPos >= 0 && rawHeadAbsPos < sentenceHiddenStates.rows) rawHeadAbsPos
else modifierAbsolutePosition // if the absolute position is invalid (e.g., root node or incorrect prediction) duplicate the mod embedding
val headHiddenState = sentenceHiddenStates(headAbsolutePosition, ::)

// concatenation of the modifier and head embeddings
// vector concatenation in Breeze operates over vertical vectors, hence the transposing here
val concatState = DenseVector.vertcat(modHiddenState.t, headHiddenState.t).t

concatMatrix(0, ::) :+= concatState
concatMatrix
}

/** Predict the top label for each combination of modifier token and corresponding head token */
def predictDual(inputBatch: Array[DenseMatrix[Float]],
batchHeads: Option[Array[Array[Int]]] = None,
Expand All @@ -108,67 +139,58 @@ class LinearLayer(
assert(batchMasks.isDefined)
val indexToLabel = labelsOpt.getOrElse(throw new RuntimeException("ERROR: can't predict without labels!"))

val outputBatch = new Array[Array[String]](inputBatch.length)

// we process one sentence at a time because the dual setting makes it harder to batch
for(i <- inputBatch.indices) {
val input = inputBatch(i)
val heads = batchHeads.get(i)

val outputBatch = inputBatch.zip(batchHeads.get).map { case (input, heads) =>
// generate a matrix that is twice as wide to concatenate the embeddings of the mod + head
val concatInput = concatenateModifiersAndHeads(input, heads)

// get the logits for the current sentence produced by this linear layer
val logitsPerSentence = forward(Array(concatInput))(0)

// one token per row; pick argmax per token
val bestLabels = Range(0, logitsPerSentence.rows).map { i =>
val row = logitsPerSentence(i, ::) // picks line i from a 2D matrix
val bestIndex = argmax(row.t)

indexToLabel(bestIndex)
}

outputBatch(i) = bestLabels.toArray
bestLabels.toArray
}

outputBatch
}

// predicts the top label for each of the candidate heads
// out dimensions: sentence in batch x token in sentence x label/score per token
// batchHeads dimensions: sentence in batch x token in sentence x heads per token
// labels are sorted in descending order of their scores
def predictDualWithScores(inputBatch: Array[DenseMatrix[Float]],
batchHeads: Option[Array[Array[Int]]] = None,
batchHeads: Option[Array[Array[Array[Int]]]] = None,
batchMasks: Option[Array[Array[Boolean]]] = None): Array[Array[Array[(String, Float)]]] = {
assert(batchHeads.isDefined)
assert(batchMasks.isDefined)
val indexToLabel = labelsOpt.getOrElse(throw new RuntimeException("ERROR: can't predict without labels!"))

val outputBatch = new Array[Array[Array[(String, Float)]]](inputBatch.length)

// dimensions: sent in batch x token in sentence x label per candidate head
// we process one sentence at a time because the dual setting makes it harder to batch
for (i <- inputBatch.indices) {
val input = inputBatch(i)
val heads = batchHeads.get(i)

// generate a matrix that is twice as wide to concatenate the embeddings of the mod + head
val concatInput = concatenateModifiersAndHeads(input, heads)

// get the logits for the current sentence produced by this linear layer
val logitsPerSentence = forward(Array(concatInput))(0)

// one token per row; store scores for all labels for this token
val allLabels = Range(0, logitsPerSentence.rows).map { i =>
// picks line i from a 2D matrix and converts it to Array
val scores = logitsPerSentence(i, ::).t.toArray
// extract the label at each position in the row and its score
val labelsAndScores = indexToLabel.zip(scores)

// keep scores in descending order (largest first)
labelsAndScores.sortBy(-_._2)
}

outputBatch(i) = allLabels.toArray
}
val outputBatch = inputBatch.zip(batchHeads.get).map { case (input, headCandidatesPerSentence) =>
// now process each token separately
headCandidatesPerSentence.zipWithIndex.map { case (headCandidatesPerToken, modifierAbsolutePosition) =>
// process each head candidate for this token
headCandidatesPerToken.map { headRelativePosition =>
// generate a matrix that is twice as wide to concatenate the embeddings of the mod + head
val concatInput = concatenateModifierAndHead(input, modifierAbsolutePosition, headRelativePosition)
// get the logits for the current pair of modifier and head
val logitsPerSentence = forward(Array(concatInput))(0)
val labelScores = logitsPerSentence(0, ::)
val bestIndex = argmax(labelScores.t)
val bestScore = labelScores(bestIndex)
val bestLabel = indexToLabel(bestIndex)

// println(s"Top prediction for mod $modifierAbsolutePosition and relative head $headRelativePosition is $bestLabel with score $bestScore")
(bestLabel, bestScore)
} // end head candidates for this token
} // end this token
} // end sentence batch

outputBatch
}
Expand Down Expand Up @@ -206,7 +228,7 @@ class LinearLayer(
val labelsAndScores = labels.zip(scores)

// keep scores in descending order (largest first)
labelsAndScores.sortBy(_._2)
labelsAndScores.sortBy(- _._2) // - score guarantees sorting in descending order of scores
}

allLabels.toArray
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,16 @@ class TokenClassifier(
val tokenization = LongTokenization(tokenizer.tokenize(words.toArray))
val inputIds = tokenization.tokenIds
val wordIds = tokenization.wordIds
//val tokens = tokenization.tokens

// run the sentence through the transformer encoder
val encOutput = encoder.forward(inputIds)

// outputs for all tasks stored here: task x tokens in sentence x scores per token
val allLabels = new Array[Array[Array[(String, Float)]]](tasks.length)
var heads: Option[Array[Int]] = None
// all heads predicted for every token
// dimensions: token x heads
var heads: Option[Array[Array[Int]]] = None

// now generate token label predictions for all primary tasks (not dual!)
for (i <- tasks.indices) {
Expand All @@ -61,17 +64,19 @@ class TokenClassifier(
allLabels(i) = wordLabels

// if this is the task that predicts head positions, then save them for the dual tasks
// here we save only the head predicted with the highest score (hence the .head)
// we save all the heads predicted for each token
if (tasks(i).name == headTaskName) {
heads = Some(tokenLabels.map(_.head._1.toInt))
heads = Some(tokenLabels.map(_.map(_._1.toInt)))
}
}
}

// generate outputs for the dual tasks, if heads were predicted by one of the primary tasks
// the dual task(s) must be aligned with the heads.
// that is, we predict the top label for each of the head candidates
if (heads.isDefined) {
//println("Tokens: " + tokens.mkString(", "))
//println("Heads: " + heads.get.mkString(", "))
//println("Heads:\n\t" + heads.get.map(_.slice(0, 3).mkString(", ")).mkString("\n\t"))
//println("Masks: " + TokenClassifier.mkTokenMask(wordIds).mkString(", "))
val masks = Some(TokenClassifier.mkTokenMask(wordIds))

Expand Down Expand Up @@ -102,6 +107,7 @@ class TokenClassifier(
val tokenization = LongTokenization(tokenizer.tokenize(words.toArray))
val inputIds = tokenization.tokenIds
val wordIds = tokenization.wordIds
//val tokens = tokenization.tokens

// run the sentence through the transformer encoder
val encOutput = encoder.forward(inputIds)
Expand Down
2 changes: 1 addition & 1 deletion trainer.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash

PYTHONHASHSEED=1 python encoder/src/main/python/trainer.py
PYTHONHASHSEED=1 python encoder/src/main/python/clu_trainer.py

0 comments on commit 02b6e4a

Please sign in to comment.