Skip to content

Commit

Permalink
Merge 0212202 into 3d8a630
Browse files Browse the repository at this point in the history
  • Loading branch information
yuvipanda authored Jan 21, 2025
2 parents 3d8a630 + 0212202 commit 94eb603
Show file tree
Hide file tree
Showing 18 changed files with 434 additions and 23 deletions.
1 change: 1 addition & 0 deletions WISDOM.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@

- When you are in an outage, focus only on fixing the outage - do not try to do anything else.
- Prefer minor annoyances happening infrequently but at regular intervals, rather than major annoyances happening rarely but at unpredictable intervals.
- Sometimes, surviving is winning.
137 changes: 137 additions & 0 deletions config/hetzner-2i2c.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
projectName: hetzner-2i2c

registry:
enabled: true
config:
storage:
# Uncomment this and comment out the s3 config to use filesystem
# filesystem:
# rootdirectory: /var/lib/registry
s3:
regionendpoint: https://fsn1.your-objectstorage.com
bucket: mybinder-2i2c-registry-hetzner
region: does-not-matter
storage:
filesystem:
storageClassName: "local-path"
ingress:
hosts:
- registry.2i2c.mybinder.org

cryptnono:
detectors:
monero:
enabled: false

binderhub:
config:
BinderHub:
hub_url: https://hub.2i2c.mybinder.org
badge_base_url: https://mybinder.org
sticky_builds: true
image_prefix: registry.2i2c.mybinder.org/i-
# image_prefix: quay.io/mybinder-hetzner-2i2c/image-
# build_docker_host: /var/run/dind/docker.sock
# TODO: we should have CPU requests, too
# use this to limit the number of builds per node
# complicated: dind memory request + KubernetesBuildExecutor.memory_request * builds_per_node ~= node memory
KubernetesBuildExecutor:
memory_request: "2G"
docker_host: /var/run/dind/docker.sock

LaunchQuota:
total_quota: 300

# DockerRegistry:
# token_url: "https://2lmrrh8f.gra7.container-registry.ovh.net/service/token?service=harbor-registry"

replicas: 1

extraVolumes:
- name: secrets
secret:
secretName: events-archiver-secrets
extraVolumeMounts:
- name: secrets
mountPath: /secrets
readOnly: true
extraEnv:
GOOGLE_APPLICATION_CREDENTIALS: /secrets/service-account.json

dind: {}

ingress:
hosts:
- 2i2c.mybinder.org

jupyterhub:
# proxy:
# chp:
# resources:
# requests:
# cpu: "1"
# limits:
# cpu: "1"
ingress:
hosts:
- hub.2i2c.mybinder.org
tls:
- secretName: kubelego-tls-hub
hosts:
- hub.2i2c.mybinder.org

imageCleaner:
# Use 40GB as upper limit, size is given in bytes
imageGCThresholdHigh: 40e9
imageGCThresholdLow: 30e9
imageGCThresholdType: "absolute"

grafana:
ingress:
hosts:
- grafana.2i2c.mybinder.org
tls:
- hosts:
- grafana.2i2c.mybinder.org
secretName: kubelego-tls-grafana
datasources:
datasources.yaml:
apiVersion: 1
datasources:
- name: prometheus
orgId: 1
type: prometheus
url: https://prometheus.2i2c.mybinder.org
access: direct
isDefault: true
editable: false
# persistence:
# storageClassName: csi-cinder-high-speed

prometheus:
server:
persistentVolume:
size: 50Gi
retention: 30d
ingress:
hosts:
- prometheus.2i2c.mybinder.org
tls:
- hosts:
- prometheus.2i2c.mybinder.org
secretName: kubelego-tls-prometheus

ingress-nginx:
controller:
replicas: 1
scope:
enabled: true
service:
loadBalancerIP: 116.203.245.43

static:
ingress:
hosts:
- static.2i2c.mybinder.org
tls:
secretName: kubelego-tls-static
10 changes: 8 additions & 2 deletions config/prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,16 @@ federationRedirect:
weight: 0
health: https://gke.mybinder.org/health
versions: https://gke.mybinder.org/versions
gesis:
hetzner-2i2c:
prime: true
url: https://notebooks.gesis.org/binder
url: https://2i2c.mybinder.org
weight: 60
health: https://2i2c.mybinder.org/health
versions: https://2i2c.mybinder.org/versions
gesis:
prime: false
url: https://notebooks.gesis.org/binder
weight: 40
health: https://notebooks.gesis.org/binder/health
versions: https://notebooks.gesis.org/binder/versions
ovh2:
Expand Down
29 changes: 12 additions & 17 deletions deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
"prod": "us-central1",
}

# Projects using raw KUBECONFIG files
KUBECONFIG_CLUSTERS = {"ovh2", "hetzner-2i2c"}

# Mapping of config name to cluster name for AWS EKS deployments
AWS_DEPLOYMENTS = {"curvenote": "binderhub"}

Expand Down Expand Up @@ -100,17 +103,15 @@ def setup_auth_azure(cluster, dry_run=False):
print(stdout)


def setup_auth_ovh(release, cluster, dry_run=False):
def setup_auth_kubeconfig(release, cluster, dry_run=False):
"""
Set up authentication with 'ovh' K8S from the ovh-kubeconfig.yml
Setup authentication with a pure kubeconfig file
"""
print(f"Setup the OVH authentication for namespace {release}")
print(f"Setup authentication for namespace {release} with kubeconfig")

ovh_kubeconfig = os.path.join(ABSOLUTE_HERE, "secrets", f"{release}-kubeconfig.yml")
os.environ["KUBECONFIG"] = ovh_kubeconfig
print(f"Current KUBECONFIG='{ovh_kubeconfig}'")
stdout = check_output(["kubectl", "config", "use-context", cluster], dry_run)
print(stdout)
kubeconfig = os.path.join(ABSOLUTE_HERE, "secrets", f"{release}-kubeconfig.yml")
os.environ["KUBECONFIG"] = kubeconfig
print(f"Current KUBECONFIG='{kubeconfig}'")


def setup_auth_gcloud(release, cluster=None, dry_run=False):
Expand Down Expand Up @@ -436,13 +437,7 @@ def main():
argparser.add_argument(
"release",
help="Release to deploy",
choices=[
"staging",
"prod",
"ovh",
"ovh2",
"curvenote",
],
choices=["staging", "prod", "ovh", "ovh2", "curvenote", "hetzner-2i2c"],
)
argparser.add_argument(
"--name",
Expand Down Expand Up @@ -511,8 +506,8 @@ def main():
# script is running on CI, proceed with auth and helm setup

if args.stage in ("all", "auth"):
if cluster.startswith("ovh"):
setup_auth_ovh(args.release, cluster, args.dry_run)
if cluster in KUBECONFIG_CLUSTERS:
setup_auth_kubeconfig(args.release, cluster, args.dry_run)
patch_coredns(args.dry_run, args.diff)
elif cluster in AZURE_RGs:
setup_auth_azure(cluster, args.dry_run)
Expand Down
1 change: 1 addition & 0 deletions docs/source/deployment/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ Deployment and Operation
prereqs
how
what
k3s
85 changes: 85 additions & 0 deletions docs/source/deployment/k3s.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Deploy a new mybinder.org federation member on a bare VM with `k3s`

[k3s](https://k3s.io/) is a popular kubernetes distribution that we can use
to build _single node_ kubernetes installations that satisfy the needs of the
mybinder project. By focusing on the simplest possible kubernetes installation,
we can get all the benefits of kubernetes (simplified deployment, cloud agnosticity,
unified tooling, etc) **except** autoscaling, and deploy **anywhere we can get a VM
with root access**. This is vastly simpler than managing an autoscaling kubernetes
cluster, and allows expansion of the mybinder federation in ways that would otherwise
be more difficult.

## VM requirements

The k3s project publishes [their requirements](https://docs.k3s.io/installation/requirements?),
but we have a slightly more opinionated list.

1. We must have full `root` access.
2. Runs latest Ubuntu LTS (currently 24.04). Debian is acceptable.
3. Direct internet access, inbound (public IP) and outbound.
4. "As big as possible", as we will be using all the capacity of this one VM
5. Ability to grant same access to the VM to all the operators of the mybinder federation.

## Installing `k3s`

We can use the [quickstart](https://docs.k3s.io/quick-start) on the `k3s` website, with the added
config of _disabling traefik_ that comes built in. We deploy nginx as part of our deployment, so we
do not need traefik.

1. Create a Kubelet Config file in `/etc/kubelet.yaml` so we can
tweak various kubelet options, including maximum number of pods on a single
node:

```yaml
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
maxPods: 300
```
We will need to develop better intuition for how many pods per node, but given we offer about
450M of RAM per user, and RAM is the limiting factor (not CPU), let's roughly start with the
following formula to determine this:
maxPods = 1.75 \* amount of ram in GB
This adds a good amount of margin. We can tweak this later
2. Install `k3s`!

```bash
curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server --kubelet-arg=config=/etc/kubelet.yaml" sh -s - --disable=traefik
```

This runs for a minute, but should set up latest `k3s` on that node! You can verify that by running
`kubectl get node` and `kubectl version`.

## Extracting authentication information via a `KUBECONFIG` file

Follow https://docs.k3s.io/cluster-access#accessing-the-cluster-from-outside-with-kubectl

## Setup DNS entries

There's only one IP to set DNS entries for - the public IP of the VM. No loadbalancers or similar here.

mybinder.org's DNS is managed via Cloudflare. You should have access, or ask someone in the mybinder team who does!

Add the following entries:

- An `A` record for `X.mybinder.org` pointing to wards the public IP. `X` should be an organizational identifier that identifies and thanks whoever is donating this.
- Another `A` record for `*.X.mybinder.org` to the same public IP

Give this a few minutes because it may take a while to propagate.

## Make a config copy for this new member

TODO

## Make a secret config for this new member

TODO

## Deploy binder!

## Test and validate

## Add to the redirector
4 changes: 2 additions & 2 deletions mybinder/templates/netpol.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ spec:
to:
- podSelector:
matchLabels:
app: nginx-ingress
component: controller
app.kubernetes.io/component: controller
app.kubernetes.io/name: ingress-nginx

{{- end }}
13 changes: 13 additions & 0 deletions mybinder/templates/registry/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{{- if .Values.registry.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: registry-config
labels:
app: registry
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
data:
config.yml: |
{{ .Values.registry.config | toJson }}
{{- end }}
62 changes: 62 additions & 0 deletions mybinder/templates/registry/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{{- if .Values.registry.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: registry
labels:
app: registry
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
component: registry
spec:
replicas: {{ .Values.registry.replicas }}
selector:
matchLabels:
app: registry
release: {{ .Release.Name }}
component: registry
template:
metadata:
annotations:
checksum/registry-config: {{ include (print $.Template.BasePath "/registry/configmap.yaml") . | sha256sum }}
labels:
app: registry
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
component: registry
spec:
automountServiceAccountToken: false
nodeSelector: {{ toJson .Values.registry.nodeSelector }}
volumes:
- name: registry-config
configMap:
name: registry-config
- name: registry-secret
secret:
secretName: registry-secret
- name: registry-storage
persistentVolumeClaim:
claimName: registry
containers:
- name: registry
image: registry:3.0.0-rc.2
volumeMounts:
- name: registry-config
# This path is what registry documentation *says* we should put
# our config files in
mountPath: /etc/distribution/config.yml
subPath: config.yml
- name: registry-config
# This path is what registry *actually* seems to read lol
mountPath: /etc/docker/registry/config.yml
subPath: config.yml
- name: registry-storage
mountPath: /var/lib/registry
- name: registry-secret
mountPath: /etc/distribution/auth.htpasswd
subPath: auth.htpasswd
{{- with .Values.registry.resources }}
resources:
{{- . | toYaml | nindent 10 }}
{{- end }}
{{- end }}
Loading

0 comments on commit 94eb603

Please sign in to comment.