Skip to content

Commit

Permalink
🚀 perf: QuerySelf & ValueSelf (#112)
Browse files Browse the repository at this point in the history
  • Loading branch information
jean-francoisreboud authored Jan 3, 2024
1 parent c2988f1 commit 4969db6
Show file tree
Hide file tree
Showing 19 changed files with 3,260 additions and 293 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.

## [unreleased]

🚀 **perf:** QuerySelf & ValueSelf ([112](https://github.com/owkin/GrAIdient/pull/112))\
🚀 **perf:** benchmark ViT base model ([111](https://github.com/owkin/GrAIdient/pull/111))\
🐛 **fix:** run on Apple Silicon ([110](https://github.com/owkin/GrAIdient/pull/110))\
⚙️ **core:** initForward,Backward model API ([109](https://github.com/owkin/GrAIdient/pull/109))\
Expand Down
1 change: 1 addition & 0 deletions Sources/GrAIdient/GrAI.swift
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ fileprivate class GrAIContext
case GPU
}

/// Used to select GPU device.
var gpuNamedPriority = [String]()

//--------------------------------------------------------------------------
Expand Down
51 changes: 43 additions & 8 deletions Sources/GrAIdient/LayerSeq/FullyConnectedSeq.swift
Original file line number Diff line number Diff line change
Expand Up @@ -837,8 +837,24 @@ public class FullyConnectedSeq: ActivationSeq,
let pNbBatch: [UInt32] = [UInt32(batchSize)]
let pSequence: [UInt32] = [UInt32(sequence)]

let kernel = layerPrev.nbNeurons % 4 == 0 ?
"flSeq4Forward" : "flSeqForward"
let kernel: String
let coeff: Int
if layerPrev.nbNeurons % 4 == 0 && batchSize % 8 == 0
{
kernel = "flSeq48Forward"
coeff = 8
}
else if layerPrev.nbNeurons % 4 == 0
{
kernel = "flSeq4Forward"
coeff = 1
}
else
{
kernel = "flSeqForward"
coeff = 1
}

let command = MetalKernel.get.createCommand(
kernel, deviceID: deviceID
)
Expand All @@ -853,7 +869,7 @@ public class FullyConnectedSeq: ActivationSeq,

command.dispatchThreads(
width: nbNeurons,
height: batchSize * sequence
height: (batchSize / coeff) * sequence
)
command.enqueue()
}
Expand Down Expand Up @@ -978,9 +994,28 @@ public class FullyConnectedSeq: ActivationSeq,
let pSequence: [UInt32] = [UInt32(sequence)]
let pDirty: [UInt32] = layerPrev.dirty ? [1] : [0]

let kernel = layerPrev.nbNeurons % 4 == 0 ?
"flSeq4Backward" : "flSeqBackward"
let coeff = layerPrev.nbNeurons % 4 == 0 ? 4 : 1
let kernel: String
let coeff1: Int
let coeff2: Int
if layerPrev.nbNeurons % 4 == 0 && batchSize % 8 == 0
{
kernel = "flSeq48Backward"
coeff1 = 4
coeff2 = 8
}
else if layerPrev.nbNeurons % 4 == 0
{
kernel = "flSeq4Backward"
coeff1 = 4
coeff2 = 1
}
else
{
kernel = "flSeqBackward"
coeff1 = 1
coeff2 = 1
}

let command = MetalKernel.get.createCommand(
kernel, deviceID: deviceID
)
Expand All @@ -994,8 +1029,8 @@ public class FullyConnectedSeq: ActivationSeq,
command.setBuffer(layerPrev.delta.metal, atIndex: 7)

command.dispatchThreads(
width: weightWidth / coeff,
height: batchSize * sequence
width: weightWidth / coeff1,
height: (batchSize / coeff2) * sequence
)
command.enqueue()

Expand Down
Loading

0 comments on commit 4969db6

Please sign in to comment.