redhat-ai-services · arckrish · Aug 9, 2024 · Aug 9, 2024 · Aug 12, 2024 · Aug 12, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 .vscode
 .obsidian
+.DS_Store
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/source_code/40_pipelines/train-car-rekon.py b/source_code/40_pipelines/train-car-rekon.py
@@ -1,12 +1,9 @@
 import os
-
 import kfp.compiler
-from dotenv import load_dotenv
 from kfp import dsl
 
-load_dotenv(override=True)
-
 kubeflow_endpoint = os.environ["KUBEFLOW_ENDPOINT"]
+#kubeflow_endpoint = 'https://ds-pipeline-dspa.parasol-insurance.svc.cluster.local:8443'
 base_image = os.getenv(
     "BASE_IMAGE",
     "image-registry.openshift-image-registry.svc:5000/openshift/python:latest")
@@ -80,7 +77,7 @@ def upload_to_s3(model_onnx: dsl.Input[dsl.Model]):
 
     print("configuring s3 instance")
     # Configuration
-    minio_url = "http://minio.object-datastore.svc.cluster.local:9000"
+    minio_url = "http://minio.parasol-insurance.svc.cluster.local:9000"
     access_key = "minio"
     secret_key = "minio123"
 
@@ -112,18 +109,18 @@ def accident_detection_pipeline(model_obc: str = "accident-detection"):
 if __name__ == "__main__":
     print(f"Connecting to kfp: {kubeflow_endpoint}")
 
-    sa_token_path = "/run/secrets/kubernetes.io/serviceaccount/token"
-    if os.path.isfile(sa_token_path):
+    sa_token_path = "/run/secrets/kubernetes.io/serviceaccount/token"  # noqa: S105
+    if "BEARER_TOKEN" in os.environ:
+        bearer_token = os.environ["BEARER_TOKEN"]
+    elif os.path.isfile(sa_token_path):
         with open(sa_token_path) as f:
             bearer_token = f.read().rstrip()
-    else:
-        bearer_token = os.environ["BEARER_TOKEN"]
 
     # Check if the script is running in a k8s pod
     # Get the CA from the service account if it is
     # Skip the CA if it is not
     sa_ca_cert = "/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"
-    if os.path.isfile(sa_ca_cert):
+    if os.path.isfile(sa_ca_cert) and "svc" in kubeflow_endpoint:
         ssl_ca_cert = sa_ca_cert
     else:
         ssl_ca_cert = None
@@ -133,9 +130,7 @@ def accident_detection_pipeline(model_obc: str = "accident-detection"):
         existing_token=bearer_token,
         ssl_ca_cert=ssl_ca_cert,
     )
-    result = client.create_run_from_pipeline_func(
-        accident_detection_pipeline, arguments={},
-        experiment_name="accident detection")
+    result = client.create_run_from_pipeline_func(accident_detection_pipeline, arguments={}, experiment_name="accident_detection")
     print(f"Starting pipeline run with run_id: {result.run_id}")
     # Wait 20 minutes for the pipeline to complete
     client.wait_for_run_completion(run_id=result.run_id, timeout=1200)
diff --git a/workshop/content/31_custom_notebook.adoc b/workshop/content/31_custom_notebook.adoc
@@ -2,7 +2,7 @@
 
 ## Need for custom notebooks
 
-RHOAI provides many pre-built notebooks like Standard Data Science notebook, Tensorflow notebook, Pytorch notebook etc. Data scientists can spin up these notebooks and start runnings their experiments and creating their machine learning models without much set up.
+RHOAI provides many pre-built notebooks like Standard Data Science notebook, Tensorflow notebook, Pytorch notebook etc. Data scientists can spin up these notebooks and start running their experiments and creating their machine learning models without much set up.
 
 These inbuilt notebooks come with most of the packages that one would need to run their Data Science projects. But the world of Data Science and Machine Learning is vast and there might be a need to download other python packages.
 

diff --git a/workshop/content/34_boto3.adoc b/workshop/content/34_boto3.adoc
@@ -79,7 +79,7 @@ resources:
   - ../../base
 ----
 
-+
+
 [TIP]
 ====
 The same content will work for both overlays (dev and prod)
@@ -423,7 +423,7 @@ def upload_file(file_path, bucket_name, object_name):
 from ultralytics import YOLO
 model = YOLO("https://rhods-public.s3.amazonaws.com/demo-models/ic-models/accident/accident_detect.onnx", task="detect")
 # Upload the file
-upload_file('accident_detect.onnx', 'models', 'accident_model/accident_detect.onnx')
+upload_file('weights/accident_detect.onnx', 'models', 'accident_model/accident_detect.onnx')
 ----
 
 . View the contents of the bucket

diff --git a/workshop/content/40_setup_pipeline_server.adoc b/workshop/content/40_setup_pipeline_server.adoc
@@ -84,7 +84,7 @@ spec:
 +
 [TIP]
 ====
-Your Data Science Cluster requires of a dspa, so that you can create pipelines.
+Your Data Science Project requires a DSPA, so that you can create pipelines.
 
 Before creating the dspa object, your Data Science Project shows a button to *Configure pipeline server*:
 

diff --git a/workshop/content/41_introduction_to_kfp_pipelines.adoc b/workshop/content/41_introduction_to_kfp_pipelines.adoc
@@ -67,8 +67,10 @@ image::images/Compiled_pipeline.png[]
 
 +
 `python pipelines/02_submitted_pipeline_via_route.py`
-
-This will create a pipeline and also submit a run. To view this run go to RHOAI into Data Science Pipelines and check the Runs section.
++
+This will create a pipeline and also submit a run. To view this run go to `RHOAI Dashboard` -> `Experiments` -> `Experiments and runs`.
++
+image::images/41_working_with_pipelines/pipeline_runs.png[]
 
 [start=4]
 . The next file, `03_outputs_pipeline.py`, introduces the output capability. Run it in the notebook terminal with the below command and check the output in the Runs section in RHOAI:

diff --git a/workshop/content/42_working_with_pipelines.adoc b/workshop/content/42_working_with_pipelines.adoc
@@ -81,6 +81,7 @@ def accident_detection_pipeline(model_obc: str = "accident-detection"):
 [source,python]
 ----
 print(f"Connecting to kfp: {KUBEFLOW_ENDPOINT}")
+import os
 
 bearer_token = "sha256~P0wEh46fxWa4uzPKR-b3fhcnsyXvCju4GovRd2YNNKM"
 
@@ -109,7 +110,7 @@ Provide your own token, you can find the token in the OpenShift Web Console by c
 
 . Validate that the pipeline is running, using the RHOAI dashboard, navigate to the *pipeline runs* of the *parasol-insurance* data science project: You should find a run with a name starting with *accident-detection*
 
-. Create a cell to train the model, organize this cell to appear before the cell that defines the pipeline (`@kfp.dsl.pipeline`). The contents of this cell were crafter after from a combination of functions from https://github.com/rh-aiservices-bu/parasol-insurance/blob/dev/lab-materials/04/04-03-model-retraining.ipynb[04-03-model-retraining.ipynb]; we recommend you to compare this cell with the original notebook contents:
+. Create a cell to train the model, organize this cell to appear before the cell that defines the pipeline (`@kfp.dsl.pipeline`). The contents of this cell were crafted after from a combination of functions from https://github.com/rh-aiservices-bu/parasol-insurance/blob/dev/lab-materials/04/04-03-model-retraining.ipynb[04-03-model-retraining.ipynb]; we recommend you to compare this cell with the original notebook contents:
 
 +
 [source,python]
@@ -173,7 +174,7 @@ Notice how we use the output of the `download_data` task as the input of the `tr
 
 . Save and run your notebook.
 
-. Validate that the pipeline is running, using the RHOAI dashboard, navigate to the *pipeline runs* of the *parasol-insurance* data science project: You should find a run with a name starting with *accident-detection*
+. Validate that the pipeline is running, using the RHOAI dashboard, navigate to the *Experiments* -> *Experiments and runs*: You should find a run with a name starting with *accident-detection*
 
 . Create a cell to upload the model to s3 using the `boto3` package, organize this cell to appear before the cell that defines the pipeline (`@kfp.dsl.pipeline`):
 
@@ -227,7 +228,7 @@ def accident_detection_pipeline(model_obc: str = "accident-detection"):
 
 . Save and run your notebook.
 
-. Validate that the pipeline is running, using the RHOAI dashboard, navigate to the *pipeline runs* of the *parasol-insurance* data science project: You should find a run with a name starting with *accident-detection*
+. Validate that the pipeline is running, using the RHOAI dashboard, navigate to *Experiments* -> *Experiments and runs*: You should find a run with a name starting with *accident-detection*
 
 . Validate that the model is uploaded to the s3 bucket, by navigating to the s3 bucket in the OpenShift console.
 
@@ -479,10 +480,53 @@ resources:
 
 . Click on the `model-retrain` pipeline, and validate that there is a pipeline run, wait the pipeline run to complete
 
-. Navigate to the RHOAI dashboard, and validate that the *Data Science Pipelines > Runs* has a new pipeline run with a name starting with `accident detection`.
+. Navigate to the RHOAI dashboard -> *Experiments* -> *Experiments and runs*: You should find a run with a name starting with *accident-detection*
 
 +
 [TIP]
 ====
 Validate your code against https://github.com/redhat-ai-services/ai-accelerator-qa/pull/new/42_working_with_pipeline[Branch for model_retrain pipeline config]
 ====
+
+## Experiments 
+A pipeline experiment is a workspace where you can try different configurations of your pipelines. You can use experiments to organize your runs into logical groups. 
+
+### Experiments and Runs
+
+. Navigate to RHOAI Dasboard, click on *Experiments* > *Experiments and Runs*. Validate that new experiment `accident_detection` is created
+
+image::images/41_working_with_pipelines/experiments_runs.png[]
+
+. Click on the experiment `accident_detection` to view pipeline runs. 
+
+image::images/41_working_with_pipelines/pipeline_runs.png[]
+
+. Click on each pipeline run to view more details. 
+
+image::images/41_working_with_pipelines/pipeline_run_details.png[]
+
+. We can schedule periodic pipeline runs for an experiment. Click on 'Schedules'. Click on 'Create Schedule'. Please fill following details: 
+* *Experiment:* We can choose an existing Experiment or create a new Experiment. 
+* *Name:* Name for the schedule 
+* *Trigger Type:* Periodic
+* *Run Every:* 1 hour
+* *Start Date:* Start date for the schedule
+* *End Date:* End Date for the schedule
+* *Pipeline:* Name of the pipeline 
+* *Pipeline version:* Version of the pipeline
+
+image::images/41_working_with_pipelines/schedule-1.png[]
+
+image::images/41_working_with_pipelines/schedule-2.png[]
+
+## Executions
+
+On Executions page, you can view the execution details of each pipeline task execution, such as its name, status, unique ID, and execution type.
+
+image::images/41_working_with_pipelines/executions.png[]
+
+## Artifacts
+
+On Artifacts page, you can view the pipeline artifacts. It helps you to evaluate the performance of your pipeline runs.
+
+image::images/41_working_with_pipelines/artifacts.png[]
diff --git a/workshop/content/50_distributed_training.adoc b/workshop/content/50_distributed_training.adoc
@@ -526,7 +526,7 @@ You can iterate over the `client.get_job_status(submission_id)` instruction to c
 
 [source,python]
 ----
-while client.get_job_status(submission_id) != "Succeeded":
+while client.get_job_status(submission_id).upper() != "SUCCEEDED":
     pass
 print("Job Succeeded")
 ----

diff --git a/workshop/content/70_rag_llm.adoc b/workshop/content/70_rag_llm.adoc
@@ -498,7 +498,7 @@ Depended on which workbench image you are using, we have to make some changes to
 2. Update the _Base Parameters and PostgreSQL info_
 
 ```
-product_version = 2.9
+product_version = 2.11
 CONNECTION_STRING = "postgresql+psycopg://vectordb:[email protected]:5432/vectordb"
 COLLECTION_NAME = f"rhoai-doc-{product_version}"
 ```
@@ -513,7 +513,7 @@ NOTE: _Create the index and ingest the documents_ will take more than 5 minutes
 
 ```
 CONNECTION_STRING = "postgresql+psycopg://vectordb:[email protected]:5432/vectordb"
-COLLECTION_NAME = "rhoai-doc-2.9"
+COLLECTION_NAME = "rhoai-doc-2.11"
 ```
 
 Run through the notebook
@@ -530,7 +530,7 @@ TEMPERATURE=0.01
 PRESENCE_PENALTY=1.03
 
 CONNECTION_STRING = "postgresql+psycopg://vectordb:[email protected]:5432/vectordb"
-COLLECTION_NAME = "rhoai-doc-2.9"
+COLLECTION_NAME = "rhoai-doc-2.11"
 ```
 
 At the end, should have a successful RAG with LLM sample that you can query.

diff --git a/workshop/content/images/41_working_with_pipelines/artifacts.png b/workshop/content/images/41_working_with_pipelines/artifacts.png
diff --git a/workshop/content/images/41_working_with_pipelines/executions.png b/workshop/content/images/41_working_with_pipelines/executions.png
diff --git a/workshop/content/images/41_working_with_pipelines/experiments_runs.png b/workshop/content/images/41_working_with_pipelines/experiments_runs.png
diff --git a/workshop/content/images/41_working_with_pipelines/pipeline_run_details.png b/workshop/content/images/41_working_with_pipelines/pipeline_run_details.png
diff --git a/workshop/content/images/41_working_with_pipelines/pipeline_runs.png b/workshop/content/images/41_working_with_pipelines/pipeline_runs.png
diff --git a/workshop/content/images/41_working_with_pipelines/schedule-1.png b/workshop/content/images/41_working_with_pipelines/schedule-1.png
diff --git a/workshop/content/images/41_working_with_pipelines/schedule-2.png b/workshop/content/images/41_working_with_pipelines/schedule-2.png
diff --git a/workshop/workshop.yaml b/workshop/workshop.yaml
@@ -1,4 +1,4 @@
-name: Red Hat OpenShift Virtualization Roadshow 
+name: AI Accelerate Bootcamp
 
 modules:
     activate:
-Original file line number
+Diff line change
@@ Expand Up / @@ -84,7 +84,7 @@ spec: @@
     +
     [TIP]
     ====
-    Your Data Science Cluster requires of a dspa, so that you can create pipelines.
+    Your Data Science Project requires a DSPA, so that you can create pipelines.
     Before creating the dspa object, your Data Science Project shows a button to *Configure pipeline server*:
@@ Expand Down @@