Skip to content

Commit

Permalink
Merge pull request #115 from mlinfra-io/deploy-milvus-on-kind-cluster
Browse files Browse the repository at this point in the history
Deploy milvus on kind cluster
  • Loading branch information
aliabbasjaffri authored Oct 7, 2024
2 parents 7a8679f + 75c4582 commit aa21d5b
Show file tree
Hide file tree
Showing 22 changed files with 175 additions and 44 deletions.
2 changes: 1 addition & 1 deletion .github/release-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ changelog:
- dependencies
- title: Documentation Updates
labels:
- docs
- documentation
2 changes: 1 addition & 1 deletion examples/kubernetes/complete/aws-complete-advanced.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ deployment:
enable_nat_gateway: true
one_nat_gateway_per_az: false
kubernetes:
k8s_version: "1.28"
k8s_version: "1.30"
cluster_endpoint_public_access: true
spot_instance: false
tags:
Expand Down
2 changes: 1 addition & 1 deletion examples/kubernetes/lakefs/aws-lakefs-advanced.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ deployment:
enable_nat_gateway: true
one_nat_gateway_per_az: false
kubernetes:
k8s_version: "1.28"
k8s_version: "1.30"
cluster_endpoint_public_access: true
spot_instance: false
tags:
Expand Down
2 changes: 1 addition & 1 deletion examples/kubernetes/mlflow/aws-mlflow-advanced.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ deployment:
enable_nat_gateway: true
one_nat_gateway_per_az: false
kubernetes:
k8s_version: "1.28"
k8s_version: "1.30"
cluster_endpoint_public_access: true
spot_instance: false
tags:
Expand Down
2 changes: 1 addition & 1 deletion examples/kubernetes/prefect/aws-prefect-advanced.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ deployment:
enable_nat_gateway: true
one_nat_gateway_per_az: false
kubernetes:
k8s_version: "1.29"
k8s_version: "1.30"
cluster_endpoint_public_access: true
spot_instance: false
tags:
Expand Down
4 changes: 3 additions & 1 deletion examples/local/kind-advanced.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ deployment:
type: kind
config:
kubernetes:
k8s_version: 1.29
k8s_version: "1.30"
stack:
- data_versioning:
name: lakefs
- experiment_tracking:
name: mlflow
- orchestrator:
name: prefect
- vector_database:
name: milvus
2 changes: 2 additions & 0 deletions examples/local/kind.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ stack:
name: mlflow
- orchestrator:
name: prefect
- vector_database:
name: milvus
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
inputs:
- name: milvus_chart_version
user_facing: true
description: Version of the Milvus Helm chart to use-
default: "4.2.12"
outputs:
- name: milvus_endpoint
description: Milvus access endpoint.
export: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
locals {
milvus_helmchart_set = [{
name = "cluster.enabled"
value = "false"
type = "auto"
}, {
name = "pulsar.enabled"
value = "false"
type = "auto"
}, {
name = "etcd.replicaCount"
value = "1"
type = "auto"
}, {
name = "minio.mode"
value = "standalone"
type = "auto"
}, {
name = "ingress.enabled"
value = "true"
type = "auto"
}, {
name = "ingress.rules[0].host"
value = var.milvus_endpoint
type = "auto"
}, {
name = "ingress.rules[0].path"
value = "/"
type = "auto"
}, {
name = "ingress.rules[0].pathType"
value = "Prefix"
type = "auto"
}, {
name = "attu.enabled"
value = "true"
type = "auto"
}, {
name = "attu.ingress.enabled"
value = "true"
type = "auto"
}, {
name = "attu.ingress.hosts[0]"
value = "milvus-attu.localhost"
type = "auto"
}]
}

module "milvus_helmchart" {
source = "../../../../../cloud/aws/helm_chart"
name = "milvus"
namespace = "milvus"
create_namespace = true
repository = "https://zilliztech.github.io/milvus-helm/"
chart = "milvus"
chart_version = var.milvus_chart_version
values = templatefile("${path.module}/values.yaml", {
resources = jsonencode(var.resources)
})
set = local.milvus_helmchart_set
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
output "milvus_endpoint" {
value = var.milvus_endpoint
description = ""
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
resources: ${resources}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
variable "milvus_chart_version" {
type = string
description = "Version of the Milvus Helm chart to use"
default = "4.2.12"
}

variable "resources" {
type = object({
requests = object({
cpu = string
memory = string
})
limits = object({
cpu = string
memory = string
})
})
description = "Resource requests and limits for Milvus pods"
default = {
requests = {
cpu = "100m"
memory = "128Mi"
}
limits = {
cpu = "500m"
memory = "512Mi"
}
}
}

variable "milvus_endpoint" {
type = string
default = "milvus-attu.localhost"
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,42 @@ module "mlflow_data_artifacts_bucket" {

resource "aws_iam_policy" "mlflow_s3_iam_policy" {
count = var.remote_tracking ? 1 : 0
name_prefix = "mlflowS3AccessPolicy"
description = "Allows mlflow server access to the S3 bucket"
name_prefix = "MLFlowS3Access-"
description = "Allows MLflow server access to the S3 bucket for artifact storage"

policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "mlflowBucketAccess",
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:ListBucket",
"s3:GetBucketLocation",
"s3:AbortMultipartUpload",
"s3:ListMultipartUploadParts"
],
"Resource": [
"${module.mlflow_data_artifacts_bucket[0].bucket_arn}",
"${module.mlflow_data_artifacts_bucket[0].bucket_arn}/*"
]
}
]
}
EOF
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Sid = "MLFlowBucketAccess"
Effect = "Allow"
Action = [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
"s3:ListBucket",
"s3:GetBucketLocation",
"s3:AbortMultipartUpload",
"s3:ListMultipartUploadParts"
]
Resource = [
module.mlflow_data_artifacts_bucket[0].bucket_arn,
"${module.mlflow_data_artifacts_bucket[0].bucket_arn}/*"
]
},
{
Sid = "MLFlowBucketList"
Effect = "Allow"
Action = ["s3:ListAllMyBuckets"]
Resource = ["*"]
}
]
})

tags = merge(var.tags, {
Name = "MLFlowS3AccessPolicy"
Purpose = "MLflow artifact storage"
})
}

# create rds instance
Expand Down
4 changes: 2 additions & 2 deletions src/mlinfra/modules/cloud/aws/eks/eks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ inputs:
- name: k8s_version
user_facing: true
description: "EKS Cluster version"
default: "1.28"
default: "1.30"
- name: cluster_endpoint_private_access
user_facing: true
description: "Indicates whether the Amazon EKS private API server endpoint is enabled. Default is true. Read more here: https://docs.aws.amazon.com/eks/latest/userguide/cluster-endpoint.html"
Expand Down Expand Up @@ -83,7 +83,7 @@ inputs:
user_facing: true
description: "Tags for EKS Cluster"
default:
cluster_version: "1.28"
cluster_version: "1.30"
outputs:
clouds:
- aws
2 changes: 1 addition & 1 deletion src/mlinfra/modules/cloud/aws/eks/tf_module/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ variable "cluster_name" {
variable "k8s_version" {
type = string
description = "EKS Cluster version"
default = "1.28"
default = "1.30"
}

variable "vpc_id" {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ variable "cluster_name" {
variable "cluster_version" {
type = string
description = "EKS Cluster version"
default = "1.28"
default = "1.30"
}

variable "nodegroup_name" {
Expand Down
1 change: 1 addition & 0 deletions src/mlinfra/modules/cloud/aws/helm_chart/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ resource "helm_release" "helm_chart" {
version = var.chart_version
cleanup_on_fail = var.cleanup_on_fail
atomic = true
timeout = var.timeout
values = [var.values]

dynamic "set" {
Expand Down
6 changes: 6 additions & 0 deletions src/mlinfra/modules/cloud/aws/helm_chart/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ variable "atomic" {
default = true
}

variable "timeout" {
type = number
description = "Time in seconds to wait for any individual kubernetes operation. Defaults to 300 seconds."
default = 300
}

variable "values" {
type = any
description = "Chart values"
Expand Down
2 changes: 1 addition & 1 deletion src/mlinfra/modules/local/kind/k8s/k8s.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ inputs:
- name: k8s_version
user_facing: true
description: "KinD Cluster version"
default: "1.28"
default: "1.30"
- name: wait_for_control_plane
user_facing: true
description: "Defines wether or not the provider will wait for the control plane to be ready. Defaults to false."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,20 @@ variable "cluster_name" {

variable "node_image" {
type = map(any)
description = "Sets Kubernetes image version for the KinD cluster"
description = "Sets Kubernetes image version for the KinD cluster. See versions on https://hub.docker.com/r/kindest/node/tags"
default = {
"1.27" = "kindest/node:v1.27.11"
"1.28" = "kindest/node:v1.28.7"
"1.29" = "kindest/node:v1.29.2"
"1.30" = "kindest/node:v1.30.0"
"1.27" = "kindest/node:v1.27.16"
"1.28" = "kindest/node:v1.28.13"
"1.29" = "kindest/node:v1.29.8"
"1.30" = "kindest/node:v1.30.4"
"1.31" = "kindest/node:v1.31.0"
}
}

variable "k8s_version" {
type = string
description = "Defines kubernetes version for the KinD cluster"
default = "1.29"
default = "1.30"
}

variable "wait_for_control_plane" {
Expand Down
2 changes: 1 addition & 1 deletion src/mlinfra/modules/local/kind/k8s/tf_module/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ variable "cluster_name" {
variable "k8s_version" {
type = string
description = "Defines kubernetes version for the KinD cluster"
default = "1.29"
default = "1.30"
}

variable "wait_for_control_plane" {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_specified_provider_not_supported(self):
"subnet_cidr_blocks": ["10.0.1.0/24", "10.0.2.0/24"],
},
"kubernetes": {
"cluster_version": "1.28",
"cluster_version": "1.30",
"node_groups": [
{
"name": "worker-group",
Expand Down

0 comments on commit aa21d5b

Please sign in to comment.