diff --git a/CHANGELOG.md b/CHANGELOG.md index e7178784..851852a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ ## unreleased +## v1.1.2 - 2020.05.25 + +* Fix: Handle max. volumes per node limit correctly. +* Introduce new option `CLOUDSCALE_MAX_CSI_VOLUMES_PER_NODE`. + ## v1.1.1 - 2020.04.28 * Fix a problem with resizing luks-encrypted volumes while they are attached and mounted. diff --git a/README.md b/README.md index 8e4b67fd..9ab2174a 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ secret `my-pvc-luks-key`. ## Releases The cloudscale.ch CSI plugin follows [semantic versioning](https://semver.org/). -The current version is: **`v1.1.1`**. The project is still under active development and may not be +The current version is: **`v1.1.2`**. The project is still under active development and may not be production ready. * Bug fixes will be released as a `PATCH` update. @@ -101,10 +101,10 @@ cloudscale Opaque 1 18h Before you continue, be sure to checkout to a [tagged release](https://github.com/cloudscale-ch/csi-cloudscale/releases). Always use the [latest stable version](https://github.com/cloudscale-ch/csi-cloudscale/releases/latest) -For example, to use the latest stable version (`v1.1.1`) you can execute the following command: +For example, to use the latest stable version (`v1.1.2`) you can execute the following command: ``` -$ kubectl apply -f https://raw.githubusercontent.com/cloudscale-ch/csi-cloudscale/master/deploy/kubernetes/releases/csi-cloudscale-v1.1.1.yaml +$ kubectl apply -f https://raw.githubusercontent.com/cloudscale-ch/csi-cloudscale/master/deploy/kubernetes/releases/csi-cloudscale-v1.1.2.yaml ``` There are also `dev` images available: @@ -193,6 +193,25 @@ $ kubectl exec -ti my-csi-app /bin/sh hello-world ``` + +## Advanced Configuration + +Please use the following options with care. + +### Max. Number of CSI Volumes per Node + +By default a limit of 23 CSI volumes per node applies. If you want to use a different +value you can set the following environment variable for the `csi-cloudscale-plugin` container +in the `csi-cloudscale-node` DaemonSet: + +``` +env: + - name: CLOUDSCALE_MAX_CSI_VOLUMES_PER_NODE + value: '10' +``` + +Note that there is currently a hard-limit of 26 volumes (including root) per Node. + ## Development Requirements: @@ -243,15 +262,15 @@ $ git push origin After it's merged to master, [create a new Github release](https://github.com/cloudscale-ch/csi-cloudscale/releases/new) from -master with the version `v1.1.1` and then publish a new docker build: +master with the version `v1.1.2` and then publish a new docker build: ``` $ git checkout master $ make publish ``` -This will create a binary with version `v1.1.1` and docker image pushed to -`cloudscalech/cloudscale-csi-plugin:v1.1.1` +This will create a binary with version `v1.1.2` and docker image pushed to +`cloudscalech/cloudscale-csi-plugin:v1.1.2` ## Contributing diff --git a/VERSION b/VERSION index 56130fb3..0f1acbd5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -v1.1.1 +v1.1.2 diff --git a/deploy/kubernetes/releases/csi-cloudscale-v1.1.2.yaml b/deploy/kubernetes/releases/csi-cloudscale-v1.1.2.yaml new file mode 100644 index 00000000..972962a2 --- /dev/null +++ b/deploy/kubernetes/releases/csi-cloudscale-v1.1.2.yaml @@ -0,0 +1,526 @@ +# Copyright cloudscale.ch +# Copyright 2018 DigitalOcean +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Configuration to deploy release version of the CSI cloudscale.ch +# plugin (https://github.com/cloudscale-ch/csi-cloudscale) compatible with +# Kubernetes >=v1.13.0 +# +# example usage: kubectl create -f + +#################################################### +########### ############ +########### CSI Node and Driver CRDs ############ +########### ############ +#################################################### +--- + +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + creationTimestamp: null + name: csinodeinfos.csi.storage.k8s.io +spec: + group: csi.storage.k8s.io + names: + kind: CSINodeInfo + plural: csinodeinfos + scope: Cluster + validation: + openAPIV3Schema: + properties: + csiDrivers: + description: List of CSI drivers running on the node and their properties. + items: + properties: + driver: + description: The CSI driver that this object refers to. + type: string + nodeID: + description: The node from the driver point of view. + type: string + topologyKeys: + description: List of keys supported by the driver. + items: + type: string + type: array + type: array + version: v1alpha1 +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] + +--- + +########################################## +########### ############ +########### Storage Class ############ +########### ############ +########################################## + +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: cloudscale-volume-ssd + namespace: kube-system + annotations: + storageclass.kubernetes.io/is-default-class: "true" +provisioner: ch.cloudscale.csi +allowVolumeExpansion: true +parameters: + csi.cloudscale.ch/volume-type: ssd + +--- + +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: cloudscale-volume-bulk + namespace: kube-system +provisioner: ch.cloudscale.csi +allowVolumeExpansion: true +parameters: + csi.cloudscale.ch/volume-type: bulk + +--- + +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: cloudscale-volume-ssd-luks + namespace: kube-system +provisioner: ch.cloudscale.csi +allowVolumeExpansion: true +parameters: + csi.cloudscale.ch/volume-type: ssd + csi.cloudscale.ch/luks-encrypted: "true" + csi.cloudscale.ch/luks-cipher: "aes-xts-plain64" + csi.cloudscale.ch/luks-key-size: "512" + csi.storage.k8s.io/node-stage-secret-namespace: ${pvc.namespace} + csi.storage.k8s.io/node-stage-secret-name: ${pvc.name}-luks-key + +--- + +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: cloudscale-volume-bulk-luks + namespace: kube-system +provisioner: ch.cloudscale.csi +allowVolumeExpansion: true +parameters: + csi.cloudscale.ch/volume-type: bulk + csi.cloudscale.ch/luks-encrypted: "true" + csi.cloudscale.ch/luks-cipher: "aes-xts-plain64" + csi.cloudscale.ch/luks-key-size: "512" + csi.storage.k8s.io/node-stage-secret-namespace: ${pvc.namespace} + csi.storage.k8s.io/node-stage-secret-name: ${pvc.name}-luks-key + +--- + +############################################## +########### ############ +########### Controller plugin ############ +########### ############ +############################################## + +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: csi-cloudscale-controller + namespace: kube-system +spec: + serviceName: "csi-cloudscale" + selector: + matchLabels: + app: csi-cloudscale-controller + replicas: 1 + template: + metadata: + labels: + app: csi-cloudscale-controller + role: csi-cloudscale + spec: + hostNetwork: true + priorityClassName: system-cluster-critical + serviceAccount: csi-cloudscale-controller-sa + containers: + - name: csi-provisioner + image: quay.io/k8scsi/csi-provisioner:v1.0.1 + imagePullPolicy: "Always" + args: + - "--provisioner=ch.cloudscale.csi" + - "--csi-address=$(ADDRESS)" + - "--v=5" + env: + - name: ADDRESS + value: /var/lib/csi/sockets/pluginproxy/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-attacher + image: quay.io/k8scsi/csi-attacher:v1.0.1 + imagePullPolicy: "Always" + args: + - "--v=5" + - "--csi-address=$(ADDRESS)" + env: + - name: ADDRESS + value: /var/lib/csi/sockets/pluginproxy/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-resizer + image: quay.io/k8scsi/csi-resizer:v0.3.0 + args: + - "--v=5" + - "--csi-address=$(ADDRESS)" + - "--csiTimeout=30s" + env: + - name: ADDRESS + value: /var/lib/csi/sockets/pluginproxy/csi.sock + imagePullPolicy: "IfNotPresent" + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-cluster-driver-registrar + image: quay.io/k8scsi/csi-cluster-driver-registrar:v1.0.1 + args: + - "--v=5" + - "--pod-info-mount-version=\"v1\"" + - "--csi-address=$(ADDRESS)" + env: + - name: ADDRESS + value: /var/lib/csi/sockets/pluginproxy/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-cloudscale-plugin + image: cloudscalech/cloudscale-csi-plugin:v1.1.2 + args : + - "--endpoint=$(CSI_ENDPOINT)" + - "--url=$(CLOUDSCALE_API_URL)" + env: + - name: CSI_ENDPOINT + value: unix:///var/lib/csi/sockets/pluginproxy/csi.sock + - name: CLOUDSCALE_API_URL + value: https://api.cloudscale.ch/ + - name: CLOUDSCALE_ACCESS_TOKEN + valueFrom: + secretKeyRef: + name: cloudscale + key: access-token + imagePullPolicy: "Always" + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ + volumes: + - name: socket-dir + emptyDir: {} + +--- + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: csi-cloudscale-controller-sa + namespace: kube-system + +--- + +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-provisioner-role +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "list"] + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "create", "delete"] + - apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "list", "watch", "update"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshots"] + verbs: ["get", "list"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents"] + verbs: ["get", "list"] + # cluster-driver-registrar currently needs permissions to create the CSIDriver CRD + # see https://github.com/kubernetes-csi/cluster-driver-registrar/issues/3 + - apiGroups: ["apiextensions.k8s.io"] + resources: ["customresourcedefinitions"] + verbs: ["get", "list", "create"] + + +--- + +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-provisioner-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-controller-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-provisioner-role + apiGroup: rbac.authorization.k8s.io + +--- +# Attacher must be able to work with PVs, nodes and VolumeAttachments +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-attacher-role +rules: + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch"] + - apiGroups: ["csi.storage.k8s.io"] + resources: ["csinodeinfos"] + verbs: ["get", "list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattachments"] + verbs: ["get", "list", "watch", "update"] +--- + +# Resizer must be able to work with PVCs, PVs, SCs. +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-resizer-role +rules: + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["persistentvolumeclaims/status"] + verbs: ["update", "patch"] + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] + +--- + +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-resizer-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-controller-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-resizer-role + apiGroup: rbac.authorization.k8s.io + +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-attacher-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-controller-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-attacher-role + apiGroup: rbac.authorization.k8s.io + +--- + +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-cluster-driver-registrar-role +rules: + - apiGroups: ["csi.storage.k8s.io"] + resources: ["csidrivers"] + verbs: ["create", "delete"] + +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-cluster-driver-registrar-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-controller-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-cluster-driver-registrar-role + apiGroup: rbac.authorization.k8s.io + +--- + + +######################################## +########### ############ +########### Node plugin ############ +########### ############ +######################################## + +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: csi-cloudscale-node + namespace: kube-system +spec: + selector: + matchLabels: + app: csi-cloudscale-node + template: + metadata: + labels: + app: csi-cloudscale-node + role: csi-cloudscale + spec: + priorityClassName: system-node-critical + serviceAccount: csi-cloudscale-node-sa + hostNetwork: true + containers: + - name: driver-registrar + image: quay.io/k8scsi/csi-node-driver-registrar:v1.0.2 + args: + - "--v=5" + - "--csi-address=$(ADDRESS)" + - "--kubelet-registration-path=$(DRIVER_REG_SOCK_PATH)" + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "rm -rf /registration/csi.cloudscale.ch /registration/csi.cloudscale.ch-reg.sock"] + env: + - name: ADDRESS + value: /csi/csi.sock + - name: DRIVER_REG_SOCK_PATH + value: /var/lib/kubelet/plugins/csi.cloudscale.ch/csi.sock + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: plugin-dir + mountPath: /csi/ + - name: registration-dir + mountPath: /registration/ + - name: csi-cloudscale-plugin + image: cloudscalech/cloudscale-csi-plugin:v1.1.2 + imagePullPolicy: "Always" + args : + - "--endpoint=$(CSI_ENDPOINT)" + - "--url=$(CLOUDSCALE_API_URL)" + env: + - name: CSI_ENDPOINT + value: unix:///csi/csi.sock + - name: CLOUDSCALE_API_URL + value: https://api.cloudscale.ch/ + - name: CLOUDSCALE_ACCESS_TOKEN + valueFrom: + secretKeyRef: + name: cloudscale + key: access-token + securityContext: + privileged: true + capabilities: + add: ["SYS_ADMIN"] + allowPrivilegeEscalation: true + volumeMounts: + - name: plugin-dir + mountPath: /csi + - name: pods-mount-dir + mountPath: /var/lib/kubelet + # needed so that any mounts setup inside this container are + # propagated back to the host machine. + mountPropagation: "Bidirectional" + - name: device-dir + mountPath: /dev + - name: tmpfs + mountPath: /tmp + volumes: + - name: registration-dir + hostPath: + path: /var/lib/kubelet/plugins_registry/ + type: DirectoryOrCreate + - name: plugin-dir + hostPath: + path: /var/lib/kubelet/plugins/csi.cloudscale.ch + type: DirectoryOrCreate + - name: pods-mount-dir + hostPath: + path: /var/lib/kubelet + type: Directory + - name: device-dir + hostPath: + path: /dev + # to make sure temporary stored luks keys never touch a disk + - name: tmpfs + emptyDir: + medium: Memory + +--- + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: csi-cloudscale-node-sa + namespace: kube-system + +--- + +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-driver-registrar-role + namespace: kube-system +rules: + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] + +--- + +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-cloudscale-driver-registrar-binding +subjects: + - kind: ServiceAccount + name: csi-cloudscale-node-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-cloudscale-driver-registrar-role + apiGroup: rbac.authorization.k8s.io diff --git a/driver/controller.go b/driver/controller.go index 9902d876..58e837ec 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -21,6 +21,7 @@ import ( "context" "fmt" "net/http" + "regexp" "strconv" "strings" @@ -61,6 +62,10 @@ var ( supportedAccessMode = &csi.VolumeCapability_AccessMode{ Mode: csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER, } + + // maxVolumesPerServerErrorMessage is the error message returned by the cloudscale.ch + // API when the per-server volume limit would be exceeded. + maxVolumesPerServerErrorMessageRe = regexp.MustCompile("Due to internal limitations, it is currently not possible to attach more than \\d+ volumes") ) // CreateVolume creates a new volume from the given request. The function is @@ -254,6 +259,10 @@ func (d *Driver) ControllerPublishVolume(ctx context.Context, req *csi.Controlle } err := d.cloudscaleClient.Volumes.Update(ctx, req.VolumeId, attachRequest) if err != nil { + if maxVolumesPerServerErrorMessageRe.MatchString(err.Error()) { + return nil, status.Errorf(codes.ResourceExhausted, err.Error()) + } + return nil, reraiseNotFound(err, ll, "attaching volume") } diff --git a/driver/driver_test.go b/driver/driver_test.go index db33be66..012ed79d 100644 --- a/driver/driver_test.go +++ b/driver/driver_test.go @@ -71,6 +71,7 @@ func TestDriverSuite(t *testing.T) { TargetPath: targetDir, StagingPath: stagingDir, } + cfg.TestNodeVolumeAttachLimit = true sanity.Test(t, cfg) } @@ -202,12 +203,23 @@ func (f FakeVolumeServiceOperations) Update(ctx context.Context, volumeID string if len(serverUUIDs) > 1 { return errors.New("multi attach is not implemented") } - for _, serverUUID := range serverUUIDs { - _, err := f.fakeClient.Servers.Get(nil, serverUUID) - if err != nil { - return err + if len(serverUUIDs) == 1 { + for _, serverUUID := range serverUUIDs { + _, err := f.fakeClient.Servers.Get(nil, serverUUID) + if err != nil { + return err + } + + volumesCount := getVolumesPerServer(f, serverUUID) + if volumesCount >= defaultMaxVolumesPerNode { + return &cloudscale.ErrorResponse{ + StatusCode: 400, + Message: map[string]string{"detail": "Due to internal limitations, it is currently not possible to attach more than 26 volumes"}, + } + } } } + vol.ServerUUIDs = &serverUUIDs return nil } @@ -219,6 +231,18 @@ func (f FakeVolumeServiceOperations) Update(ctx context.Context, volumeID string panic("implement me") } +func getVolumesPerServer(f FakeVolumeServiceOperations, serverUUID string) int { + volumesCount := 0 + for _, v := range f.volumes { + for _, uuid := range *v.ServerUUIDs { + if uuid == serverUUID { + volumesCount++ + } + } + } + return volumesCount +} + func (f FakeVolumeServiceOperations) Delete(ctx context.Context, volumeID string) error { delete(f.volumes, volumeID) return nil diff --git a/driver/node.go b/driver/node.go index 217c809f..9e8a3ba1 100644 --- a/driver/node.go +++ b/driver/node.go @@ -27,6 +27,8 @@ package driver import ( "context" + "os" + "strconv" "github.com/container-storage-interface/spec/lib/go/csi" "github.com/sirupsen/logrus" @@ -37,10 +39,11 @@ import ( ) const ( - // TODO we're not sure yet what our limit is, so just use this for now. - // It's the limit for Google Compute Engine and I don't see what limits - // this more in OpenStack, except per User Quotas. - maxVolumesPerNode = 128 + // Current technical limit is 26 (letter a-z) + // - 1 for root + // - 1 for /var/lib/docker + // - 1 additional volume outside of CSI + defaultMaxVolumesPerNode = 23 ) // NodeStageVolume mounts the volume to a staging path on the node. This is @@ -318,12 +321,24 @@ func (d *Driver) NodeGetCapabilities(ctx context.Context, req *csi.NodeGetCapabi }, nil } +func getEnvAsInt(key string, fallback int64) int64 { + if valueStr, ok := os.LookupEnv(key); ok { + if value, err := strconv.ParseInt(valueStr, 10, 64); err == nil { + return value + } + } + return fallback +} + // NodeGetInfo returns the supported capabilities of the node server. This // should eventually return the droplet ID if possible. This is used so the CO // knows where to place the workload. The result of this function will be used // by the CO in ControllerPublishVolume. func (d *Driver) NodeGetInfo(ctx context.Context, req *csi.NodeGetInfoRequest) (*csi.NodeGetInfoResponse, error) { d.log.WithField("method", "node_get_info").Info("node get info called") + + maxVolumesPerNode := getEnvAsInt("CLOUDSCALE_MAX_CSI_VOLUMES_PER_NODE", defaultMaxVolumesPerNode) + return &csi.NodeGetInfoResponse{ NodeId: d.serverId, MaxVolumesPerNode: maxVolumesPerNode,