diff --git a/README.org b/README.org index 8650b69..b3dfc5b 100644 --- a/README.org +++ b/README.org @@ -49,6 +49,8 @@ huggingface-cli login --token "${HUGGINGFACE_TOKEN}" git clone https://huggingface.co/instructlab/granite-7b-lab #+end_src +** TODO Create Minio Bucket `models` + ** Upload model to on-prem cluster minio TODO Run aws configure and pull values out of that automatically. diff --git a/data/hackathon/README.org b/data/hackathon/README.org deleted file mode 100644 index c290ccd..0000000 --- a/data/hackathon/README.org +++ /dev/null @@ -1,618 +0,0 @@ -#+TITLE: OpenShift Workshops -#+AUTHOR: James Blair -#+DATE: <2024-04-14 Sun> - -This directory contains a set of scenarios to be used for an [[https://www.redhat.com/en/technologies/cloud-computing/openshift/virtualization][OpenShift Virtualisation]] hackathon and was created for an internal enablement exercise at Red Hat. - -* Pre-requisites - -The hackathon is run by breaking attendees into small teams of 2-4 and assigning each team a bare metal OpenShift 4 cluster with OpenShift Virtualisation already installed. - -For our purposes we have clusters running in Equinix Metal provisioned via the [[https://demo.redhat.com/catalog?item=babylon-catalog-prod/equinix-metal.roadshow-ocpvirt.prod&utm_source=webapp&utm_medium=share-link][Red Hat Demo System]]. - - -* Cluster setup - -Follow the steps below to prepare each cluster in advance of the hackathon. - -#+begin_src tmux -# Create the exercise three namespace -oc create namespace demotestwtf17 - -# Create the exercise three virtual machine -cat << EOF | oc apply --namespace demotestwtf17 --filename - -apiVersion: kubevirt.io/v1 -kind: VirtualMachine -metadata: - name: fedora - namespace: demotestwtf17 - finalizers: - - kubevirt.io/virtualMachineControllerFinalize - labels: - app: fedora - vm.kubevirt.io/template: fedora-server-small - vm.kubevirt.io/template.namespace: openshift - vm.kubevirt.io/template.revision: '1' - vm.kubevirt.io/template.version: v0.25.0 -spec: - dataVolumeTemplates: - - apiVersion: cdi.kubevirt.io/v1beta1 - kind: DataVolume - metadata: - creationTimestamp: null - name: fedora - spec: - sourceRef: - kind: DataSource - name: fedora - namespace: openshift-virtualization-os-images - storage: - resources: - requests: - storage: 30Gi - running: true - template: - metadata: - annotations: - vm.kubevirt.io/flavor: small - vm.kubevirt.io/os: fedora - vm.kubevirt.io/workload: server - creationTimestamp: null - labels: - kubevirt.io/domain: fedora - kubevirt.io/size: small - spec: - domain: - cpu: - model: Superlegitcpu9000 - cores: 1 - sockets: 1 - threads: 1 - devices: - disks: - - disk: - bus: virtio - name: rootdisk - - disk: - bus: virtio - name: cloudinitdisk - interfaces: - - macAddress: '02:d5:73:00:00:07' - masquerade: {} - model: virtio - name: default - networkInterfaceMultiqueue: true - rng: {} - features: - acpi: {} - smm: - enabled: true - firmware: - bootloader: - efi: {} - machine: - type: pc-q35-rhel9.2.0 - resources: - requests: - memory: 2Gi - evictionStrategy: LiveMigrate - networks: - - name: default - pod: {} - nodeSelector: - cpumodel: totallylegitipromise - terminationGracePeriodSeconds: 180 - volumes: - - dataVolume: - name: fedora - name: rootdisk - - cloudInitNoCloud: - userData: |- - #cloud-config - user: fedora - password: fedora - chpasswd: { expire: False } - name: cloudinitdisk -EOF -#+end_src - -#+begin_src tmux -# Create the exercise five namespace -oc create namespace super-important-dont-deleteme - -# Create the exercise five virtual machine -cat << EOF | oc --namespace super-important-dont-deleteme apply --filename - -apiVersion: kubevirt.io/v1 -kind: VirtualMachine -metadata: - name: cryto-carnivore-cpuminer3000 - finalizers: - - kubevirt.io/virtualMachineControllerFinalize - labels: - app: cryto-carnivore-cpuminer3000 - vm.kubevirt.io/template: centos7-server-small - vm.kubevirt.io/template.namespace: openshift - vm.kubevirt.io/template.revision: '1' - vm.kubevirt.io/template.version: v0.25.0 -spec: - dataVolumeTemplates: - - apiVersion: cdi.kubevirt.io/v1beta1 - kind: DataVolume - metadata: - creationTimestamp: null - name: cryto-carnivore-cpuminer3000 - spec: - sourceRef: - kind: DataSource - name: centos7 - namespace: openshift-virtualization-os-images - storage: - resources: - requests: - storage: 30Gi - running: true - template: - metadata: - annotations: - vm.kubevirt.io/flavor: small - vm.kubevirt.io/os: centos7 - vm.kubevirt.io/workload: server - creationTimestamp: null - labels: - kubevirt.io/domain: cryto-carnivore-cpuminer3000 - kubevirt.io/size: small - spec: - domain: - cpu: - cores: 1 - sockets: 1 - threads: 1 - devices: - disks: - - disk: - bus: virtio - name: rootdisk - - disk: - bus: virtio - name: cloudinitdisk - interfaces: - - macAddress: '02:d5:73:00:00:0b' - masquerade: {} - model: virtio - name: default - networkInterfaceMultiqueue: true - rng: {} - machine: - type: pc-q35-rhel9.2.0 - resources: - requests: - memory: 2Gi - evictionStrategy: LiveMigrate - networks: - - name: default - pod: {} - terminationGracePeriodSeconds: 180 - volumes: - - dataVolume: - name: cryto-carnivore-cpuminer3000 - name: rootdisk - - cloudInitNoCloud: - userData: |- - #cloud-config - user: centos - password: 123456 - chpasswd: { expire: False } - name: cloudinitdisk -EOF -#+end_src - -#+begin_src tmux -# Break the storage class for exercise five -oc patch storageclass ocs-storagecluster-ceph-rbd --type='merge' --patch-file /dev/stdin <<-EOF -allowVolumeExpansion: false -EOF -#+end_src - - - -* Automated scenario population - -To quickly setup an example environment with all solutions populated you can use the following source blocks. - -** Exercise two - What about my legacy technical debt? - -#+begin_src tmux -# Create namespace -oc create namespace crusty-corp - -# Create the virtual machine template -#+begin_src tmux -cat << 'EOF' | oc apply --namespace crusty-corp --filename - -kind: Template -apiVersion: template.openshift.io/v1 -metadata: - name: centos5-server-small - namespace: crusty-corp - labels: - app.kubernetes.io/part-of: hyperconverged-cluster - os.template.kubevirt.io/centos5.0: 'true' - flavor.template.kubevirt.io/small: 'true' - template.kubevirt.io/version: v0.25.0 - app.kubernetes.io/version: 4.13.8 - template.kubevirt.io/type: base - app.kubernetes.io/component: templating - app.kubernetes.io/managed-by: ssp-operator - template.kubevirt.io/default-os-variant: 'true' - app.kubernetes.io/name: common-templates - workload.template.kubevirt.io/server: 'true' - annotations: - template.kubevirt.io/provider: Red Hat - name.os.template.kubevirt.io/centos5.0: CentOS 5 or higher - template.kubevirt.io/provider-url: 'https://www.centos.org' - template.kubevirt.io/containerdisks: | - quay.io/containerdisks/centos:7-2009 - template.kubevirt.io/version: v1alpha1 - openshift.io/display-name: CentOS 5 VM - openshift.io/documentation-url: 'https://github.com/kubevirt/common-templates' - template.kubevirt.io/images: > - https://cloud.centos.org/centos/7/images/CentOS-7-x86_64-GenericCloud.qcow2 - operator-sdk/primary-resource-type: SSP.ssp.kubevirt.io - defaults.template.kubevirt.io/disk: rootdisk - template.kubevirt.io/editable: | - /objects[0].spec.template.spec.domain.cpu.sockets - /objects[0].spec.template.spec.domain.cpu.cores - /objects[0].spec.template.spec.domain.cpu.threads - /objects[0].spec.template.spec.domain.resources.requests.memory - /objects[0].spec.template.spec.domain.devices.disks - /objects[0].spec.template.spec.volumes - /objects[0].spec.template.spec.networks - template.openshift.io/bindable: 'false' - openshift.kubevirt.io/pronounceable-suffix-for-name-expression: 'true' - operator-sdk/primary-resource: openshift-cnv/ssp-kubevirt-hyperconverged - tags: 'hidden,kubevirt,virtualmachine,linux,centos' - template.kubevirt.io/provider-support-level: Community - description: >- - Template for CentOS 5 VM or newer. A PVC with the CentOS disk image must - be available. - openshift.io/support-url: 'https://github.com/kubevirt/common-templates/issues' - iconClass: icon-centos - openshift.io/provider-display-name: Red Hat -objects: - - apiVersion: kubevirt.io/v1 - kind: VirtualMachine - metadata: - annotations: - vm.kubevirt.io/validations: | - [ - { - "name": "minimal-required-memory", - "path": "jsonpath::.spec.domain.resources.requests.memory", - "rule": "integer", - "message": "This VM requires more memory.", - "min": 1073741824 - } - ] - labels: - app: '${NAME}' - vm.kubevirt.io/template: centos5-server-small - vm.kubevirt.io/template.revision: '1' - vm.kubevirt.io/template.version: v0.25.0 - name: '${NAME}' - spec: - dataVolumeTemplates: - - apiVersion: cdi.kubevirt.io/v1beta1 - kind: DataVolume - metadata: - name: '${NAME}' - spec: - sourceRef: - kind: DataSource - name: '${DATA_SOURCE_NAME}' - namespace: '${DATA_SOURCE_NAMESPACE}' - storage: - resources: - requests: - storage: 30Gi - running: false - template: - metadata: - annotations: - vm.kubevirt.io/flavor: small - vm.kubevirt.io/os: centos5 - vm.kubevirt.io/workload: server - labels: - kubevirt.io/domain: '${NAME}' - kubevirt.io/size: small - spec: - domain: - cpu: - cores: 1 - sockets: 1 - threads: 1 - devices: - disks: - - disk: - bus: virtio - name: rootdisk - - disk: - bus: virtio - name: cloudinitdisk - interfaces: - - masquerade: {} - model: virtio - name: default - networkInterfaceMultiqueue: true - rng: {} - machine: - type: pc-q35-rhel9.2.0 - resources: - requests: - memory: 2Gi - evictionStrategy: LiveMigrate - networks: - - name: default - pod: {} - terminationGracePeriodSeconds: 180 - volumes: - - dataVolume: - name: '${NAME}' - name: rootdisk - - cloudInitNoCloud: - userData: |- - #cloud-config - user: centos - password: ${CLOUD_USER_PASSWORD} - chpasswd: { expire: False } - name: cloudinitdisk -parameters: - - name: NAME - description: VM name - generate: expression - from: 'centos5-[a-z0-9]{16}' - - name: DATA_SOURCE_NAME - description: Name of the DataSource to clone - value: centos5 - - name: DATA_SOURCE_NAMESPACE - description: Namespace of the DataSource - value: openshift-virtualization-os-images - - name: CLOUD_USER_PASSWORD - description: Randomized password for the cloud-init user centos - generate: expression - from: '[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}' -EOF - -# Create the virtual machine from template -cat << 'EOF' | oc apply --namespace crusty-corp --filename - -apiVersion: 'kubevirt.io/v1' -kind: 'VirtualMachine' -metadata: - labels: - app: 'crusty-corp-fun-financial-appliance' - vm.kubevirt.io/template: 'centos5-server-small' - vm.kubevirt.io/template.namespace: 'crusty-corp' - vm.kubevirt.io/template.revision: '1' - vm.kubevirt.io/template.version: 'v0.25.0' - name: 'crusty-corp-fun-financial-appliance' - namespace: 'crusty-corp' -spec: - dataVolumeTemplates: - - apiVersion: 'cdi.kubevirt.io/v1beta1' - kind: 'DataVolume' - metadata: - annotations: - cdi.kubevirt.io/storage.bind.immediate.requested: 'true' - creationTimestamp: null - name: 'crusty-corp-fun-financial-appliance' - spec: - source: - blank: {} - storage: - resources: - requests: - storage: '30Gi' - - metadata: - creationTimestamp: null - name: 'crusty-corp-fun-financial-appliance-installation-cdrom' - spec: - source: - http: - url: 'https://vault.centos.org/5.11/isos/x86_64/CentOS-5.11-x86_64-netinstall.iso' - storage: - resources: - requests: - storage: '5Gi' - running: false - template: - metadata: - annotations: - vm.kubevirt.io/flavor: 'small' - vm.kubevirt.io/os: 'centos5' - vm.kubevirt.io/workload: 'server' - creationTimestamp: null - labels: - kubevirt.io/domain: 'crusty-corp-fun-financial-appliance' - kubevirt.io/size: 'small' - spec: - domain: - cpu: - cores: 1 - sockets: 1 - threads: 1 - devices: - disks: - - bootOrder: 2 - disk: - bus: 'virtio' - name: 'rootdisk' - - bootOrder: 3 - disk: - bus: 'virtio' - name: 'cloudinitdisk' - - bootOrder: 1 - cdrom: - bus: 'sata' - name: 'installation-cdrom' - interfaces: - - macAddress: '02:d5:73:00:00:06' - masquerade: {} - model: 'virtio' - name: 'default' - networkInterfaceMultiqueue: true - rng: {} - machine: - type: 'pc-q35-rhel9.2.0' - resources: - requests: - memory: '2Gi' - evictionStrategy: 'LiveMigrate' - networks: - - name: 'default' - pod: {} - terminationGracePeriodSeconds: 180 - volumes: - - dataVolume: - name: 'crusty-corp-fun-financial-appliance' - name: 'rootdisk' - - cloudInitNoCloud: - userData: "#cloud-config\nuser: centos\npassword: cqud-lhel-rd0b\nchpasswd: { expire: False }" - name: 'cloudinitdisk' - - dataVolume: - name: 'crusty-corp-fun-financial-appliance-installation-cdrom' - name: 'installation-cdrom' -EOF -#+end_src - - -** Exercise three - But can it do live migration? - -#+begin_src tmux -# Patch the bogus virtual machine nodeselector & cpumodel -oc patch --namespace demotestwtf17 VirtualMachine fedora --type='merge' --patch-file /dev/stdin <<-EOF -spec: - template: - spec: - domain: - cpu: - model: - nodeSelector: -EOF -#+end_src - -# Restart vm manually - -#+begin_src tmux -# Initiate the live migration -cat << EOF | oc create --namespace demotestwtf17 --filename - -apiVersion: kubevirt.io/v1 -kind: VirtualMachineInstanceMigration -metadata: - name: fedora-migration-hackathon - namespace: demotestwtf17 - finalizers: - - kubevirt.io/migrationJobFinalize - labels: - kubevirt.io/vmi-name: fedora -spec: - vmiName: fedora -EOF - -# Check the node virtual machine migrated to -oc --namespace demotestwtf17 get VirtualMachineInstance fedora -#+end_src - - -** Exercise four - What about balancing vm workloads? - -#+begin_src tmux -# Create required namespace for vm -oc create namespace itsjustyaml - -# Create the suggested namespace for descheduler operator -oc create namespace openshift-kube-descheduler-operator - -# Create the subscription for the kube deschedular operator -cat << EOF | oc apply --namespace openshift-kube-descheduler-operator --filename - -apiVersion: operators.coreos.com/v1alpha1 -kind: Subscription -metadata: - name: cluster-kube-descheduler-operator - namespace: openshift-kube-descheduler-operator -spec: - channel: stable - installPlanApproval: Automatic - name: cluster-kube-descheduler-operator - source: redhat-operators - sourceNamespace: openshift-marketplace -EOF - -# Create the instance of descheduler -cat << EOF | oc apply --namespace openshift-kube-descheduler-operator --filename - -apiVersion: operator.openshift.io/v1 -kind: KubeDescheduler -metadata: - name: cluster - namespace: openshift-kube-descheduler-operator -spec: - deschedulingIntervalSeconds: 3600 - logLevel: Normal - managementState: Managed - mode: Automatic - operatorLogLevel: Normal - profileCustomizations: - devLowNodeUtilizationThresholds: Medium - profiles: - - AffinityAndTaints - - DevPreviewLongLifecycle -EOF -#+end_src - -TODO: Create CentOS9 VM With deshcheduler turned on. - -** Exercise five - How do I resize virtual machine disks again? - -#+begin_src tmux -# Patch the storageclass to enable volume expansion -oc patch storageclass ocs-storagecluster-ceph-rbd --type='merge' --patch-file /dev/stdin <<-EOF -allowVolumeExpansion: true -EOF -#+end_src - -#+begin_src tmux -# Patch the claim to increase sizea -oc --namespace super-important-dont-deleteme patch persistentvolumeclaim cryto-carnivore-cpuminer3000 --type='merge' --patch-file /dev/stdin <<-EOF -spec: - resources: - requests: - storage: 60Gi -EOF -#+end_src - -#+begin_src tmux -# Create new claim for wannacry volume -cat << EOF | oc --namespace super-important-dont-deleteme apply --filename - -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: wannacry - namespace: super-important-dont-deleteme -spec: - accessModes: - - ReadWriteMany - volumeMode: Block - resources: - requests: - storage: 20Gi -EOF -#+end_src - -TODO: Patch the vm to include the new volume - - - -* Automated scenario cleanup - -If you need to quickly reset an example environment to have no solutions populated you can use the following source blocks. - -#+begin_src tmux -oc delete namespace --ignore-not-found crusty-corp demotestwtf17 itsjustyaml super-important-dont-deleteme acme-bank -#+end_src diff --git a/data/workshop/scenario1.mdx b/data/hackathon/scenario1.mdx similarity index 100% rename from data/workshop/scenario1.mdx rename to data/hackathon/scenario1.mdx diff --git a/data/hackathon/scenario2.mdx b/data/hackathon/scenario2.mdx new file mode 100644 index 0000000..4093dc8 --- /dev/null +++ b/data/hackathon/scenario2.mdx @@ -0,0 +1,53 @@ +--- +title: Enabling GPU accelerators +exercise: 2 +date: '2024-06-06' +tags: ['openshift','ai','kubernetes'] +draft: false +authors: ['default'] +summary: "How do we use GPU accelerators??" +--- + +As a sales team you've got an upcoming demo with the Acme Financial Services data science team, who have asked you to show them how to enable GPU support in Red Hat OpenShift Service on AWS (ROSA). + +You've spun up a demo environment to show them how it's done. + +## 2.1 - Add Cluster GPU Machine Pool +Your first task for this challenge is to add a new Machine Pool using the instance type `g5.8xlarge` +Name it `gpu`. Set the count to 1. + +You can do this either through +- the Red Hat Hybrid Cloud Console (https://console.redhat.com/openshift) +- or the ROSA CLI (https://console.redhat.com/openshift/token/show) + + +Documentation you may find helpful is: +- https://cloud.redhat.com/experts/rosa/gpu/ + + +## 2.2 - Install required operators + +While the GPU machine is provisioning, the next step is to install the two required operators: +- Node Feature Discovery (NFD) +- Nvidia GPU Operator + +Install the following Custom resources +- NodeFeatureDiscovery +- ClusterPolicy + +The next steps should not be done until the GPU node is fully provisioned +You'll know this is complete using the following command +```bash +oc get node -l nvidia.com/gpu.present +``` + + + + +## 2.3 - Check your work + +If your GPU is now running and labeled successfully, please post a message in `#event-anz-ocp-ai-hackathon` with the message: + +> Please review [team name] solution for exercise 2. + +This exercise is worth `25` points. The event team will reply in slack to confirm your updated team total score. diff --git a/data/workshop/scenario3.mdx b/data/hackathon/scenario3.mdx similarity index 68% rename from data/workshop/scenario3.mdx rename to data/hackathon/scenario3.mdx index 73b42e9..ba5041d 100644 --- a/data/workshop/scenario3.mdx +++ b/data/hackathon/scenario3.mdx @@ -18,10 +18,29 @@ For this task, your team are required to use the `granite-7b-lab` model availabl After locating the model in on premises object storage, your team need to replicate this model to the ACME Financial Services cloud cluster object storage so that it could be served in future. +Documentation you may find helpful is: +- https://min.io/docs/minio/linux/index.html -## 3.2 - Install openshift ai + + +## 3.2 - Install Openshift AI related operators Now that you've helped the ACME team replicate their chosen model to their cloud OpenShift Cluster, they want to serve the model ASAP. For this challenge your team must demonstrate to ACME how to install OpenShift AI, and serve the existing model called `granite-7b-lab` via OpenShift AI. +Install the following opertors (do not install any custom resources) +- OpenShift AI +- OpenShift Service Mesh +- OpenShift Serverless + +## 3.2 - Install Openshift AI + + ingressGateway: + certificate: + secretName: knative-serving-cert + + +Documentation you may find helpful is: +- https://access.redhat.com/documentation/en-us/red_hat_openshift_ai_self-managed/2.9/html/installing_and_uninstalling_openshift_ai_self-managed/index + diff --git a/data/workshop/scenario2.mdx b/data/workshop/scenario2.mdx deleted file mode 100644 index 05b97b4..0000000 --- a/data/workshop/scenario2.mdx +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Enabling GPU accelerators -exercise: 2 -date: '2024-06-06' -tags: ['openshift','ai','kubernetes'] -draft: false -authors: ['default'] -summary: "How do we use GPU accelerators??" ---- - -As a sales team you've got an upcoming demo with the Acme Financial Services data science team, who have asked you to show them how to enable GPU support in Red Hat OpenShift Service on AWS (ROSA). - -You've spun up a demo environment to show them how it's done. - -## 3.1 - Replicate Model to Cloud Storage - -For this task, your team are required to use the `granite-7b-lab` model available in the object storage running in the ACME Financial Services on prem cluster which is based on Minio. - -After locating the model in on premises object storage, your team need to replicate this model to the ACME Financial Services cloud cluster object storage so that it could be served in future. - - -## 3.2 - Install openshift ai - -Now that you've helped the ACME team replicate their chosen model to their cloud OpenShift Cluster, they want to serve the model ASAP. - -For this challenge your team must demonstrate to ACME how to install OpenShift AI, and serve the existing model called `granite-7b-lab` via OpenShift AI. - diff --git a/public/feed.xml b/public/feed.xml index a73eac3..f99fae7 100644 --- a/public/feed.xml +++ b/public/feed.xml @@ -7,7 +7,7 @@ en-us jablair@redhat.com (Red Hat) jablair@redhat.com (Red Hat) - Sun, 14 Apr 2024 00:00:00 GMT + Wed, 05 Jun 2024 00:00:00 GMT @@ -15,9 +15,19 @@ Understanding the hackathon environment https://odh-labs.github.io/workshop/workshop/scenario1 Let's get underway with the hackathon. - Sun, 14 Apr 2024 00:00:00 GMT + Wed, 05 Jun 2024 00:00:00 GMT jablair@redhat.com (Red Hat) - openshiftvirtualisationkuberneteskubevirt + openshiftaikubernetes + + + + https://odh-labs.github.io/workshop/workshop/scenario2 + Hybrid Cloud AI model deployment + https://odh-labs.github.io/workshop/workshop/scenario2 + Let's deploy the first model across the hybrid cloud. + Wed, 05 Jun 2024 00:00:00 GMT + jablair@redhat.com (Red Hat) + openshiftaikubernetes