-
Notifications
You must be signed in to change notification settings - Fork 0
/
dask_config.yaml
143 lines (136 loc) · 3.38 KB
/
dask_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
labextension:
factory:
module: dask_kubernetes
class: KubeCluster
args: []
kwargs: {}
default:
workers: 3
adapt:
minimum: 0
maximum: 1000
initial: []
distributed:
dashboard:
link: "https://n4dask.af.uchicago.edu/{host}/status"
# link: "{scheme}://{host}:{port}/status"
scheduler:
work-stealing: false
kubernetes:
name: "dask-{user}-{uuid}"
namespace: af-jupyter
env: {}
count:
start: 1
min: 1
max: 5000
image: "ghcr.io/dask/dask:latest"
resources: {}
scheduler-service-wait-timeout: 30
scheduler-service-name-resolution-retries: 20
scheduler-service-template:
apiVersion: v1
kind: Service
spec:
selector:
dask.org/cluster-name: "" # Cluster name will be added automatically
dask.org/component: scheduler
ports:
- name: tcp-comm
protocol: TCP
port: 8786
targetPort: 8786
- name: http-dashboard
protocol: TCP
port: 8787
targetPort: 8787
scheduler-pdb-template:
apiVersion: policy/v1
kind: PodDisruptionBudget
spec:
minAvailable: 1
selector:
matchLabels:
dask.org/cluster-name: "" # Cluster name will be added automatically
dask.org/component: scheduler
worker-template-path: null
scheduler-template:
kind: Pod
metadata:
spec:
restartPolicy: Never
serviceAccountName: jupyter
containers:
- name: dask-scheduler
# image: ghcr.io/dask/dask:latest
image: hub.opensciencegrid.org/usatlas/analysis-dask-base:dev
imagePullPolicy: Always
env:
- name: DASK_BOKEH_PREFIX
value: "{host}"
args: ["dask", "scheduler", "--port", "8786", "--bokeh-port", "8787"]
resources:
limits:
cpu: "2.0"
memory: 20G
requests:
cpu: "1.5"
memory: 10G
worker-template:
kind: Pod
metadata:
spec:
restartPolicy: Never
serviceAccountName: jupyter
containers:
- name: dask-worker
image: hub.opensciencegrid.org/usatlas/analysis-dask-base:dev
# image: ghcr.io/dask/dask:latest
imagePullPolicy: Always
args:
[
"dask",
"worker",
"--nworkers",
"1",
"--nthreads",
"1",
"--memory-limit",
"3GB",
"--death-timeout",
"60",
]
resources:
limits:
cpu: "4"
memory: 10G
requests:
cpu: "1.5"
memory: "3.5G"
volumeMounts:
- mountPath: /home
name: nfs-home
- mountPath: /data
name: ceph-data
subPath: data
- mountPath: /cvmfs
mountPropagation: HostToContainer
name: cvmfs
volumes:
- name: nfs-home
nfs:
path: /export/home
server: nfs.af.uchicago.edu
- cephfs:
monitors:
- 192.170.240.135:6789
- 192.170.240.148:6789
- 192.170.240.192:6789
secretRef:
name: ceph-secret
user: admin
name: ceph-data
- hostPath:
path: /cvmfs
type: ""
name: cvmfs