add .gitignore

Signed-off-by: Spolti <[email protected]>
kubernetes-sigs · Nov 11, 2024 · 7616735 · 7616735
1 parent 00ccccb
commit 7616735
Show file tree

Hide file tree

Showing 13 changed files with 185 additions and 27 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,50 @@
+# IntelliJ IDEA
+.idea/
+*.iws
+*.iml
+*.ipr
+
+# User-specific files
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
+
+# Logs
+logs
+*.log
+
+# OS-specific files
+.DS_Store
+Thumbs.db
+
+# Compiled source
+*.class
+*.war
+*.ear
+
+# Generated files
+*.jar
+*.dll
+*.exe
+*.o
+*.pyc
+*.pyo
+*.pyd
+*.pid
+*.gz
+*.out
+*.log
+
+# Directories
+bin/
+gen/
+out/
+target/
+build/
+dist/
+node_modules/
+coverage/
+.idea/
+.vscode/
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/.idea/wg-serving.iml b/.idea/wg-serving.iml
diff --git a/go.mod b/go.mod
diff --git a/serving-catalog/catalog.md b/serving-catalog/catalog.md
@@ -6,3 +6,6 @@
 | Deployment | [vLLM](https://github.com/vllm-project/vllm) | llama3-8b | GKE | [README](./core/deployment/vllm/llama3-8b/gke/README.md) |
 | Deployment | [vLLM](https://github.com/vllm-project/vllm) | llama3-70b | GKE | [README](./core/deployment/vllm/llama3-70b/gke/README.md) |
 | Deployment | [JetStream](https://github.com/google/JetStream) | gemma-7b-it | GKE | [README](./core/deployment/jetstream/gemma-7b-it/gke/README.md) |
+| InferenceService | [KServe sklearn](https://github.com/kserve/kserve) | iris     |  | [README](./kserve/inferenceservice/sklearn-iris/README.md) |
+
+
diff --git a/serving-catalog/kserve/README.md b/serving-catalog/kserve/README.md
@@ -1,20 +1,37 @@
 # KServe Serving
 
 For the KServe examples exposed in this Serving Catalog, we will focus on the simplicity and detachment of most of the
-underlying infrastructure. This will allow us to focus on the pure Serving aspect of the examples. 
+underlying infrastructure. This will allow us to focus on the pure Serving aspect of the examples.
 
 The `RawDeployment` method will be used to deploy the models. This method is the simplest way to deploy a model which
 does not have any dependency, like the `Serveless` method, which depends on Istio and Knative.
 
 For more information about `RawDeployment` and other deployment methods, please refer to the [KServe documentation](https://kserve.github.io/website/latest/admin/kubernetes_deployment/).
 
 
+## Cert Manager and KServe
+
+The installation script will install the `cert-manager` and `KServe` controller, plus, configure the default\
+`Ingress Class` to `NGinx`, which is the default used on Minikube.
+
+```shell
+install/perform.sh
+```
+
+Cleanup:
+```shell
+install/perform.sh cleanup
+```
+
+After it is installed, patch the `Ingress Class` to `nginx` or the one you have selected:
+```shell
+export INGRESS_CLASS_NAME=nginx
+kubectl patch cm inferenceservice-config -n kserve --type=merge --patch-file=patches/ingress-patch.yaml
+```
 
 # TODOs
 
-- [ ] Add the inference services
-- [ ] Use kustomize to deploy the runtimes and inference services
-- [ ] Update the catalog once the examples are ready
-- [ ] Update the validate script to check the KServe examples
-- [ ] Update the catalog.md once the inference services and the kustomize deployment are ready
+- [ ] Add a inference service for vllm
+- [X] Add script to install KServe and the CertManager dependency
+- [ ] find a way to deploy the serving runtime together the example
 - [ ] Add custom Serving Runtimes
diff --git a/serving-catalog/kserve/inferenceservice/README.md b/serving-catalog/kserve/inferenceservice/README.md
@@ -1,3 +1,29 @@
 # KServe Inference Services
 
 This directory contains the list of KServe InferenceServices.
+
+
+
+# sklearn-iris inference call
+
+To consume the inference service, you can use the following command:
+
+```shell
+curl -X POST  -H "Content-Type: application/json" \
+  -d @payloads/sklearn-iris.json \
+  --resolve "sklearn-iris-kserve-test.example.com:80:192.168.105.2"  \ 
+  http://sklearn-iris-kserve-test.example.com/v1/models/sklearn-iris:predict
+```
+
+The `resolve` parameter is used to resolve the dns name for the endpoint managed by the IngressClass in your cluster.
+
+The output should be something similar to:
+
+```JSON
+{
+  "predictions": [
+    1, 1
+  ]
+}
+```
+
diff --git a/serving-catalog/kserve/inferenceservice/payloads/sklearn-iris.json b/serving-catalog/kserve/inferenceservice/payloads/sklearn-iris.json
@@ -0,0 +1,6 @@
+{
+  "instances": [
+    [6.8, 2.8, 4.8, 1.4],
+    [6.0, 3.4, 4.5, 1.6]
+  ]
+}
diff --git a/serving-catalog/kserve/inferenceservice/sklearn-iris.yaml b/serving-catalog/kserve/inferenceservice/sklearn-iris.yaml
@@ -0,0 +1,13 @@
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: "sklearn-iris"
+  annotations:
+    serving.kserve.io/deploymentMode: "RawDeployment"
+    kubernetes.io/ingress.class: "nginx"
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: sklearn
+      storageUri: "gs://kfserving-examples/models/sklearn/1.0/model"
diff --git a/serving-catalog/kserve/install/perform.sh b/serving-catalog/kserve/install/perform.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+set -eo pipefail
+
+export CERT_MANAGER_VERSION=v1.9.0
+
+
+# Function to clean up resources
+cleanup() {
+  echo "Cleaning up resources..."
+  kubectl delete -f https://github.com/kserve/kserve/releases/download/v0.13.1/kserve.yaml || true
+  kubectl delete -f https://github.com/jetstack/cert-manager/releases/download/${CERT_MANAGER_VERSION}/cert-manager.yaml
+  echo "Cleanup completed."
+}
+
+
+if [ "$1" == "cleanup" ]; then
+  cleanup
+  exit 0
+fi
+
+# Install cert-manager
+kubectl apply --validate=true -f https://github.com/jetstack/cert-manager/releases/download/${CERT_MANAGER_VERSION}/cert-manager.yaml
+kubectl wait --for=condition=available --timeout=600s deployment/cert-manager-webhook -n cert-manager
+
+# Install KServe
+kubectl apply --validate=true -f https://github.com/kserve/kserve/releases/download/v0.13.1/kserve.yaml
+kubectl wait --for=condition=ready pod -l control-plane=kserve-controller-manager -n kserve --timeout=300s
+
+# default to raw deployment
+cat <<EOF > deploy-config-patch.yaml
+data:
+  deploy: |
+    {
+      "defaultDeploymentMode": "RawDeployment"
+    }
+EOF
+kubectl patch cm inferenceservice-config -n kserve --type=merge --patch-file=deploy-config-patch.yaml
+
diff --git a/serving-catalog/kserve/patches/ingress-patch.yaml b/serving-catalog/kserve/patches/ingress-patch.yaml
@@ -0,0 +1,20 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: inferenceservice-config
+  namespace: kserve
+data:
+  ingress: |
+    {
+      "ingressGateway": "knative-serving/knative-ingress-gateway",
+      "ingressService": "istio-ingressgateway.istio-system.svc.cluster.local",
+      "localGateway": "knative-serving/knative-local-gateway",
+      "localGatewayService": "knative-local-gateway.istio-system.svc.cluster.local",
+      "ingressDomain": "example.com",
+      "ingressClassName": "nginx",
+      "domainTemplate": "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}",
+      "urlScheme": "http",
+      "disableIstioVirtualHost": false,
+      "disableIngressCreation": false
+    }
+
diff --git a/serving-catalog/kserve/patches/kustomization.yaml b/serving-catalog/kserve/patches/kustomization.yaml
@@ -0,0 +1,5 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+patches:
+- path: ingress-patch.yaml