Skip to content

Commit

Permalink
gpu sharing for frigate
Browse files Browse the repository at this point in the history
  • Loading branch information
andrew-codes committed Nov 14, 2024
1 parent 19e501b commit b1b2050
Show file tree
Hide file tree
Showing 9 changed files with 223 additions and 10 deletions.
21 changes: 21 additions & 0 deletions .pnp.cjs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions apps/alexa/deployment/index.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ local sttContainer = k.core.v1.container.new(name='whisper', image=std.extVar('w
{
resources: {
limits: {
'nvidia.com/gpu': 1,
'aliyun.com/gpu-mem': 4,
},
},
}
Expand All @@ -58,7 +58,6 @@ local sttDeployment = k.apps.v1.deployment.new(name='whisper', containers=[sttCo
template+: {

spec+: {
runtimeClassName: 'nvidia',
// tolerations: [
// { key: 'nvidia.com/gpu', operator: 'Exists', effect: 'NoSchedule' },
// ],
Expand Down
7 changes: 0 additions & 7 deletions apps/alexa/project.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,6 @@
"cwd": "apps/alexa",
"module": "scripts/deploy.ts"
}
},
"image/push": {
"executor": "@ha/nx-executors:invoke",
"options": {
"module": "scripts/image-push.ts",
"cwd": "apps/alexa"
}
}
},
"tags": [],
Expand Down
5 changes: 4 additions & 1 deletion apps/frigate/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ coral:
gpu:
nvidia:
enabled: true
runtimeClassName: nvidia

resources:
limits:
aliyun.com/gpu-mem: 4
env:
YOLO_MODELS: yolov7x-640
extraVolumeMounts:
Expand Down
131 changes: 131 additions & 0 deletions apps/gpu-scheduler/deployment/index.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# rbac.yaml
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: gpushare-schd-extender
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- apiGroups:
- ""
resources:
- pods
verbs:
- update
- patch
- get
- list
- watch
- apiGroups:
- ""
resources:
- bindings
- pods/binding
verbs:
- create
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
- list
- watch
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: gpushare-schd-extender
namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: gpushare-schd-extender
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: gpushare-schd-extender
subjects:
- kind: ServiceAccount
name: gpushare-schd-extender
namespace: kube-system

# deployment yaml
---
kind: Deployment
apiVersion: apps/v1
metadata:
name: gpushare-schd-extender
namespace: kube-system
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: gpushare
component: gpushare-schd-extender
template:
metadata:
labels:
app: gpushare
component: gpushare-schd-extender
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ""
spec:
hostNetwork: true
tolerations:
- effect: NoSchedule
operator: Exists
key: node-role.kubernetes.io/master
- effect: NoSchedule
operator: Exists
key: node.cloudprovider.kubernetes.io/uninitialized
nodeName: "k8s-node-3"
serviceAccount: gpushare-schd-extender
containers:
- name: gpushare-schd-extender
image: registry.cn-hangzhou.aliyuncs.com/acs/k8s-gpushare-schd-extender:1.11-d170d8a
env:
- name: LOG_LEVEL
value: debug
- name: PORT
value: "12345"

# service.yaml
---
apiVersion: v1
kind: Service
metadata:
name: gpushare-schd-extender
namespace: kube-system
labels:
app: gpushare
component: gpushare-schd-extender
spec:
type: NodePort
ports:
- port: 12345
name: http
targetPort: 12345
nodePort: 32766
selector:
# select app=ingress-nginx pods
app: gpushare
component: gpushare-schd-extender
15 changes: 15 additions & 0 deletions apps/gpu-scheduler/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"private": true,
"name": "@ha/gpu-scheduler",
"version": "0.0.1",
"license": "MIT",
"devDependencies": {
"@ha/configuration-api": "workspace:^0.0.1",
"@ha/configuration-workspace": "workspace:^0.0.1",
"@ha/docker": "workspace:^0.0.1",
"@ha/jsonnet": "workspace:^0.0.1",
"@ha/kubectl": "workspace:^1.0.0",
"@ha/nx-executors": "workspace:^0.1.0",
"shelljs": "^0.8.5"
}
}
17 changes: 17 additions & 0 deletions apps/gpu-scheduler/project.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"name": "gpu-scheduler",
"$schema": "../../node_modules/nx/schemas/project-schema.json",
"sourceRoot": "apps/gpu-scheduler",
"projectType": "application",
"targets": {
"deploy": {
"executor": "@ha/nx-executors:invoke",
"options": {
"cwd": "apps/gpu-scheduler",
"module": "scripts/deploy.ts"
}
}
},
"tags": [],
"implicitDependencies": []
}
20 changes: 20 additions & 0 deletions apps/gpu-scheduler/scripts/deploy.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import type { ConfigurationApi } from "@ha/configuration-api"
import type { Configuration } from "@ha/configuration-workspace"
import path from "path"
import sh from "shelljs"

const run = async (
configurationApi: ConfigurationApi<Configuration>,
): Promise<void> => {
// https://github.com/AliyunContainerService/gpushare-scheduler-extender/blob/master/docs/install.md
sh.exec(
`kubectl create -f ${path.join(
__dirname,
"..",
"deployment",
"index.yaml",
)}`,
)
}

export default run
14 changes: 14 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3600,6 +3600,20 @@ __metadata:
languageName: unknown
linkType: soft

"@ha/gpu-scheduler@workspace:apps/gpu-scheduler":
version: 0.0.0-use.local
resolution: "@ha/gpu-scheduler@workspace:apps/gpu-scheduler"
dependencies:
"@ha/configuration-api": "workspace:^0.0.1"
"@ha/configuration-workspace": "workspace:^0.0.1"
"@ha/docker": "workspace:^0.0.1"
"@ha/jsonnet": "workspace:^0.0.1"
"@ha/kubectl": "workspace:^1.0.0"
"@ha/nx-executors": "workspace:^0.1.0"
shelljs: "npm:^0.8.5"
languageName: unknown
linkType: soft

"@ha/guest-db@workspace:apps/guest-db":
version: 0.0.0-use.local
resolution: "@ha/guest-db@workspace:apps/guest-db"
Expand Down

0 comments on commit b1b2050

Please sign in to comment.