From 7162c564bc269f97584728b9de51d6afea6c01d4 Mon Sep 17 00:00:00 2001
From: Abolfazl Shahbazi <abolfazl.shahbazi@intel.com>
Date: Tue, 23 Apr 2024 14:47:30 -0700
Subject: [PATCH 1/8] Pin versions in Dockerfile and remove duplicate
 dependencies (#482)

* Pin versions in Dockerfile and remove duplicate dependencies

Signed-off-by: Abolfazl Shahbazi <abolfazl.shahbazi@intel.com>

* Minor .bashrc correction

Signed-off-by: Abolfazl Shahbazi <abolfazl.shahbazi@intel.com>

---------

Signed-off-by: Abolfazl Shahbazi <abolfazl.shahbazi@intel.com>
---
 docker/hf_k8s/Dockerfile | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/docker/hf_k8s/Dockerfile b/docker/hf_k8s/Dockerfile
index 68abe12..b28da66 100644
--- a/docker/hf_k8s/Dockerfile
+++ b/docker/hf_k8s/Dockerfile
@@ -35,30 +35,28 @@ SHELL ["/bin/bash", "-c"]
 
 RUN apt-get update -y && \
     apt-get install -y --no-install-recommends --fix-missing \
-    ca-certificates \
     google-perftools \
-    libgl1 \
     libjemalloc2 \
     libomp-dev \
     numactl
 
 RUN python -m pip install --no-cache-dir \
+    'SentencePiece==0.2.0' \
+    'accelerate==0.28.0' \
+    'datasets==2.19.0' \
+    'einops==0.7.0' \
+    'evaluate==0.4.1' \
     'mkl-include==2023.2.0' \
     'mkl==2023.2.0' \
+    'nltk==3.8.1' \
+    'onnxruntime-extensions==0.10.1' \
+    'onnxruntime==1.17.3' \
+    'peft==0.10.0' \
     'protobuf==4.24.4' \
-    SentencePiece \
-    accelerate==0.28.0 \
-    datasets \
-    einops \
-    evaluate \
-    nltk \
-    onnxruntime \
-    onnxruntime-extensions \
-    peft \
-    psutil \
-    py-cpuinfo \
-    rouge_score \
-    tokenizers
+    'psutil==5.9.5' \
+    'py-cpuinfo==9.0.0' \
+    'rouge_score==0.1.2' \
+    'tokenizers==0.19.1'
 
 # Install OpenSSH for MPI to communicate between containers
 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
@@ -81,6 +79,7 @@ COPY scripts /workspace/scripts/
 COPY generate_ssh_keys.sh /workspace/generate_ssh_keys.sh
 
 RUN cat /workspace/generate_ssh_keys.sh >> ${HOME}/.bash_profile && \
+    sed -i 's#source /inc/bin/activate##g' ${HOME}/.bashrc && \
     cat /workspace/generate_ssh_keys.sh >> ${HOME}/.bashrc
 
 ENV BASH_ENV=${HOME}/.bash_profile
@@ -88,4 +87,3 @@ ENV BASH_ENV=${HOME}/.bash_profile
 # Set CCL env vars
 ENV PATH="/usr/local/lib/${PYTHON}${PYTHON_VER}/dist-packages/oneccl_bindings_for_pytorch/bin:${PATH}"
 ENV LD_LIBRARY_PATH="/usr/local/lib/${PYTHON}${PYTHON_VER}/dist-packages/oneccl_bindings_for_pytorch/lib:${LD_LIBRARY_PATH}"
-

From aff6a4e6695cbef4c633e06dd1071af7cde1b402 Mon Sep 17 00:00:00 2001
From: Dina Suehiro Jones <dina.s.jones@intel.com>
Date: Fri, 26 Apr 2024 08:13:01 -0700
Subject: [PATCH 2/8] Update the LLM fine tuning script in the k8s example to
 support Gaudi (#483)

* Update training args parsing

* Change arg parsing

* Update the Llama2 fine tuning script to support Gaudi with optimum-habana

* Formatting

* Remove compute_metrics eval

* Fix for running without optimum-habana

* Code clean up

* Remove commented out code:

* Formatting

* Formatting fix based on review feedback
---
 docker/hf_k8s/scripts/finetune.py | 194 +++++++++++++++++++++++++++---
 1 file changed, 174 insertions(+), 20 deletions(-)

diff --git a/docker/hf_k8s/scripts/finetune.py b/docker/hf_k8s/scripts/finetune.py
index 166fc1c..bf27a46 100644
--- a/docker/hf_k8s/scripts/finetune.py
+++ b/docker/hf_k8s/scripts/finetune.py
@@ -18,19 +18,18 @@
 
 
 import datasets
+import importlib
 import logging
 import math
 import os
 import sys
+import torch
 import transformers
 import copy
 
 from dataclasses import dataclass, field
 from datasets import load_dataset
 from datetime import datetime
-from neural_compressor import benchmark, quantization
-from neural_compressor.config import BenchmarkConfig, PostTrainingQuantConfig
-from neural_compressor.utils.pytorch import load
 from peft import (
     LoraConfig,
     PeftModel,
@@ -99,6 +98,10 @@ class ModelArguments:
         default=None,
         metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
     )
+    token: Optional[str] = field(
+        default=None,
+        metadata={"help": "auth token for private models"},
+    )
     use_fast_tokenizer: bool = field(
         default=True,
         metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
@@ -111,7 +114,95 @@ class ModelArguments:
         default=False,
         metadata={
             "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
-            "with private models)."
+                    "with private models)."
+        },
+    )
+    use_gaudi: bool = field(
+        default=False,
+        metadata={"help": "Fine tune using Intel Gaudi accelerators."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` "
+                    "instead."
+        },
+    )
+    trust_remote_code: bool = field(
+        default=False,
+        metadata={
+            "help": "should enable when using custom model architecture that is not yet part of the Hugging Face "
+                    "transformers package like MPT)."
+        },
+    )
+    use_cache: bool = field(
+        default=True,
+        metadata={
+            "help": (
+                "Whether or not the model should return the last key/values attentions (not used by all models)."
+                "Only relevant if `config.is_decoder=True`."
+            )
+        },
+    )
+    low_cpu_mem_usage: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "It is an option to create the model as an empty shell, then only materialize its parameters when the "
+                "pretrained weights are loaded. When set to True, it will benefit LLM loading time and RAM consumption."
+            )
+        },
+    )
+    attn_softmax_bf16: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Whether to run attention softmax layer in bf16 precision for fine-tuning. The current support is "
+                "limited to Llama only.",
+            )
+        },
+    )
+    use_flash_attention: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Whether to use Habana flash attention for fine-tuning. The current support is limited to Llama only.",
+            )
+        },
+    )
+    flash_attention_recompute: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Whether to enable recompute in Habana flash attention for fine-tuning."
+                " It is applicable only when use_flash_attention is True.",
+            )
+        },
+    )
+    flash_attention_causal_mask: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Whether to enable causal mask in Habana flash attention for fine-tuning."
+                " It is applicable only when use_flash_attention is True.",
+            )
+        },
+    )
+    use_fused_rope: bool = field(
+        default=True,
+        metadata={
+            "help": (
+                "Whether to use Habana fused-rope for fine-tuning. The current support is limited to Llama only.",
+            )
+        },
+    )
+    load_meta_device: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "It is an option to load the model to the device instead of the host, so it can reduce the host RAM "
+                "usage. https://huggingface.co/blog/accelerate-large-models"
+            )
         },
     )
 
@@ -352,7 +443,6 @@ class BenchmarkArguments:
     ),
 }
 
-
 # Prompt dictionary without output used for chat models
 CHAT_PROMPT_DICT2 = {
     "prompt_with_input": (
@@ -406,22 +496,40 @@ def create_system_turn(examples, prompt_dict):
     return prompts
 
 
+def is_optimum_habana_available():
+    """
+    Check for optimum-habana and return False if the library is not found.
+    """
+    if importlib.util.find_spec('optimum'):
+        return importlib.util.find_spec('optimum.habana') is not None
+    return False
+
+
 def main():
     start_time = datetime.now()
 
+    script_args = (ModelArguments, DataArguments, FinetuneArguments, QuantizationArguments, BenchmarkArguments)
+
+    # If optimum-habana is available, use GaudiTrainingArguments. Otherwise, use Transformers TrainingArguments
+    if is_optimum_habana_available():
+        from optimum.habana import GaudiTrainingArguments
+        script_args += (GaudiTrainingArguments,)
+    else:
+        script_args += (TrainingArguments,)
+
     # See all possible arguments in src/transformers/training_args.py
     # or by passing the --help flag to this script.
     # We now keep distinct sets of args, for a cleaner separation of concerns.
-    parser = HfArgumentParser((ModelArguments, DataArguments, TrainingArguments, FinetuneArguments,
-                               QuantizationArguments, BenchmarkArguments))
+    parser = HfArgumentParser(script_args)
+
     if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
         # If we pass only one argument to the script and it's the path to a json file,
         # let's parse it to get our arguments.
-        model_args, data_args, training_args, finetune_args, quant_args, benchmark_args = parser.parse_json_file(
+        model_args, data_args, finetune_args, quant_args, benchmark_args, training_args = parser.parse_json_file(
             json_file=os.path.abspath(sys.argv[1]))
     else:
         # model_args, data_args, training_args, finetune_args = parser.parse_args_into_dataclasses()
-        model_args, data_args, training_args, finetune_args, quant_args, benchmark_args, optim_args = \
+        model_args, data_args, finetune_args, quant_args, benchmark_args, training_args, optim_args = \
             parser.parse_args_into_dataclasses(return_remaining_strings=True)
 
     # Setup logging
@@ -457,6 +565,9 @@ def main():
         "cache_dir": model_args.cache_dir,
         "revision": model_args.model_revision,
         "use_auth_token": True if model_args.use_auth_token else None,
+        "trust_remote_code": True if model_args.trust_remote_code else None,
+        "use_cache": False if training_args.gradient_checkpointing else model_args.use_cache,
+        "token": model_args.token,
     }
     if model_args.config_name:
         config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
@@ -472,6 +583,7 @@ def main():
         "use_auth_token": True if model_args.use_auth_token else None,
         "add_bos_token": False,
         "add_eos_token": False,
+        "token": model_args.token,
     }
     if model_args.tokenizer_name:
         tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
@@ -532,11 +644,14 @@ def main():
             **dataset_args,
         )
 
+    dataset_keys = ["train"]
+
     # If no test data is there, validation_split_percentage will be used to divide the dataset.
     if "test" not in raw_datasets.keys() and training_args.do_eval:
         logger.info("Original dataset length: {}".format(len(raw_datasets["train"])))
         raw_datasets["train"] = raw_datasets["train"].shuffle(seed=data_args.dataset_seed)
         raw_datasets = raw_datasets["train"].train_test_split(test_size=data_args.validation_split_percentage)
+        dataset_keys += ["test"]
         logger.info("Validation split percentage: {}".format(data_args.validation_split_percentage))
         logger.info("Train split length: {}".format(len(raw_datasets["train"])))
         logger.info("Test split length: {}".format(len(raw_datasets["test"])))
@@ -616,8 +731,10 @@ def concatenate_data(dataset, max_seq_length):
                                  for i in range(len(concatenated_data) // max_seq_length)]
                 concatenated_dataset[column] = reshaped_data
             return datasets.Dataset.from_dict(concatenated_dataset)
-        tokenized_datasets_ = tokenized_datasets["train"].remove_columns(["prompts", "system_prompts"])
-        tokenized_datasets["train"] = concatenate_data(tokenized_datasets_, data_args.max_seq_length)
+
+        for key in dataset_keys:
+            tokenized_datasets_ = tokenized_datasets[key].remove_columns(["prompts", "system_prompts"])
+            tokenized_datasets[key] = concatenate_data(tokenized_datasets_, data_args.max_seq_length)
 
     if training_args.do_train:
         if "train" not in tokenized_datasets:
@@ -642,6 +759,7 @@ def concatenate_data(dataset, max_seq_length):
 
     # Load model
     if model_args.model_name_or_path:
+        model_dtype = torch.bfloat16 if training_args.bf16 else None
         model = AutoModelForCausalLM.from_pretrained(
             model_args.model_name_or_path,
             from_tf=bool(".ckpt" in model_args.model_name_or_path),
@@ -649,6 +767,11 @@ def concatenate_data(dataset, max_seq_length):
             cache_dir=model_args.cache_dir,
             revision=model_args.model_revision,
             use_auth_token=True if model_args.use_auth_token else None,
+            trust_remote_code=True if model_args.trust_remote_code else None,
+            torch_dtype=model_dtype,
+            low_cpu_mem_usage=model_args.low_cpu_mem_usage,
+            device_map=training_args.device.type if model_args.load_meta_device else None,
+            token=model_args.token,
         )
         model.generation_config.pad_token_id = 0
         model.generation_config.bos_token_id = 1
@@ -681,14 +804,32 @@ def concatenate_data(dataset, max_seq_length):
             model.print_trainable_parameters()
 
         # Initialize our Trainer
-        trainer = Trainer(
-            model=model,
-            args=training_args,
-            train_dataset=train_dataset if training_args.do_train else None,
-            eval_dataset=eval_dataset if training_args.do_eval else None,
-            tokenizer=tokenizer,
-            data_collator=data_collator,
-        )
+        if is_optimum_habana_available():
+            from optimum.habana import GaudiConfig, GaudiTrainer
+
+            gaudi_config = GaudiConfig()
+            gaudi_config.use_fused_adam = True
+            gaudi_config.use_fused_clip_norm = True
+
+            trainer = GaudiTrainer(
+                model=model,
+                args=training_args,
+                gaudi_config=gaudi_config,
+                train_dataset=train_dataset if training_args.do_train else None,
+                eval_dataset=eval_dataset if training_args.do_eval else None,
+                tokenizer=tokenizer,
+                data_collator=data_collator,
+            )
+        else:
+            trainer = Trainer(
+                model=model,
+                args=training_args,
+                train_dataset=train_dataset if training_args.do_train else None,
+                eval_dataset=eval_dataset if training_args.do_eval else None,
+                tokenizer=tokenizer,
+                data_collator=data_collator,
+            )
+
         trainer.add_callback(CustomPrinterCallback)
 
         training_start = datetime.now()
@@ -720,7 +861,13 @@ def concatenate_data(dataset, max_seq_length):
         # Get and log evaluation metrics
         max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
         eval_metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
-        eval_metrics["perplexity"] = math.exp(eval_metrics["eval_loss"])
+
+        try:
+            perplexity = math.exp(eval_metrics["eval_loss"])
+        except OverflowError:
+            perplexity = float("inf")
+        eval_metrics["perplexity"] = perplexity
+
         trainer.log_metrics("eval", eval_metrics)
         trainer.save_metrics("eval", eval_metrics)
 
@@ -737,6 +884,9 @@ def concatenate_data(dataset, max_seq_length):
                                              for_calib=True)
 
             if benchmark_args.do_benchmark:
+                from neural_compressor import benchmark
+                from neural_compressor.config import BenchmarkConfig
+
                 os.environ['NC_ENV_CONF'] = 'True'
                 if benchmark_args.benchmark_cores_per_instance == -1:
                     benchmark_args.benchmark_cores_per_instance = None
@@ -762,6 +912,8 @@ def concatenate_data(dataset, max_seq_length):
         # Post training quantization
         if quant_args.do_quantize:
             logger.info("Post training quantization")
+            from neural_compressor import quantization
+            from neural_compressor.config import PostTrainingQuantConfig
 
             # Currently this script only supports weight only quantization
             quant_config = PostTrainingQuantConfig(
@@ -790,6 +942,8 @@ def concatenate_data(dataset, max_seq_length):
         int8_latency = int8_throughput = None
         if benchmark_args.do_benchmark and quant_args.quantize_output_dir is not None and \
                 os.path.exists(quant_args.quantize_output_dir) and len(os.listdir(quant_args.quantize_output_dir)) > 0:
+            from neural_compressor.utils.pytorch import load
+
             # Load the quantized model using INC
             kwargs = {'weight_only': True}
             reloaded_quantized_model = load(quant_args.quantize_output_dir, model, dataloader=calib_dataloader,

From c736bfc67af073975e1fbff8a5d5f4b7e044237c Mon Sep 17 00:00:00 2001
From: Dina Suehiro Jones <dina.s.jones@intel.com>
Date: Mon, 29 Apr 2024 15:16:10 -0700
Subject: [PATCH 3/8] Tests and removing unnecessary arg in the LLM fine tuning
 script (#484)

* Formatting and removing unnecessary arg

* Fixing comment header

* Fix creating output directory and mark test as integration
---
 docker/hf_k8s/scripts/finetune.py  |   4 --
 tests/docker/test_hf_k8s_script.py | 104 +++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 4 deletions(-)
 create mode 100644 tests/docker/test_hf_k8s_script.py

diff --git a/docker/hf_k8s/scripts/finetune.py b/docker/hf_k8s/scripts/finetune.py
index bf27a46..44a09f4 100644
--- a/docker/hf_k8s/scripts/finetune.py
+++ b/docker/hf_k8s/scripts/finetune.py
@@ -117,10 +117,6 @@ class ModelArguments:
                     "with private models)."
         },
     )
-    use_gaudi: bool = field(
-        default=False,
-        metadata={"help": "Fine tune using Intel Gaudi accelerators."},
-    )
     use_auth_token: bool = field(
         default=False,
         metadata={
diff --git a/tests/docker/test_hf_k8s_script.py b/tests/docker/test_hf_k8s_script.py
new file mode 100644
index 0000000..ebe5e81
--- /dev/null
+++ b/tests/docker/test_hf_k8s_script.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+import os
+import pytest
+import sys
+import tempfile
+import yaml
+
+from downloader.datasets import DataDownloader
+from shutil import rmtree
+from tlt import TLT_BASE_DIR
+from unittest.mock import patch
+
+
+@pytest.mark.pytorch
+def test_no_hf_token():
+    """
+    Verifies that none of our values yaml files have tokens (the value should be blank)
+    """
+    helm_chart_dir = os.path.join(TLT_BASE_DIR, "../docker/hf_k8s/chart")
+
+    for values_file in [d for d in os.listdir(helm_chart_dir) if "values" in d]:
+        file_path = os.path.join(helm_chart_dir, values_file)
+        with open(file_path, 'r') as f:
+            values_yaml = yaml.safe_load(f)
+            assert "secret" in values_yaml
+            assert "encodedToken" in values_yaml["secret"]
+            assert values_yaml["secret"]["encodedToken"] is None, "encodedToken value found in {}".format(values_file)
+
+
+@pytest.mark.integration
+@pytest.mark.pytorch
+def test_llm_finetune_script():
+    """
+    This is a basic test that runs the LLM fine tuning using distilgpt2 with the code_alpaca_2k with a
+    limited number of steps.
+    """
+    sys.path.append(os.path.join(TLT_BASE_DIR, "../docker/hf_k8s/scripts"))
+    from finetune import BenchmarkArguments, DataArguments, FinetuneArguments, main, ModelArguments, \
+        QuantizationArguments, TrainingArguments
+
+    # Define the dataset directory and download a test dataset
+    dataset_dir = os.getenv('DATASET_DIR', tempfile.mkdtemp(dir='/tmp/data'))
+    dataset_path = os.path.join(dataset_dir, 'code_alpaca_2k.json')
+    if not os.path.exists(dataset_path):
+        download_url = "https://raw.githubusercontent.com/sahil280114/codealpaca/master/data/code_alpaca_2k.json"
+        data_downloader = DataDownloader("code_alpaca_2k", dataset_dir, url=download_url)
+        data_downloader.download()
+    assert os.path.exists(dataset_path)
+
+    # Define the output directory
+    output_dir = os.getenv('OUTPUT_DIR', '/tmp/output')
+    os.makedirs(output_dir, exist_ok=True)
+    output_dir = tempfile.mkdtemp(dir=output_dir)
+
+    try:
+        with patch('transformers.HfArgumentParser.parse_args_into_dataclasses') as mock_parser:
+            model_args = ModelArguments()
+            model_args.model_name_or_path = "distilbert/distilgpt2"
+
+            data_args = DataArguments(train_file=dataset_path, validation_split_percentage=0.2, max_eval_samples=5)
+            finetune_args = FinetuneArguments(use_lora=False)
+            training_args = TrainingArguments(output_dir=output_dir, do_train=True, do_eval=True, max_steps=5)
+            benchmark_args = BenchmarkArguments(do_benchmark=False)
+            quant_args = QuantizationArguments(do_quantize=False)
+
+            mock_parser.return_value = model_args, data_args, finetune_args, quant_args, \
+                benchmark_args, training_args, {}
+            main()
+            assert len(os.listdir(output_dir)) > 0
+    finally:
+        for d in [dataset_dir, output_dir]:
+            rmtree(d, ignore_errors=True)
+
+
+@pytest.mark.pytorch
+def test_optimum_habana_unavailable():
+    """
+    This test checks that the is_optimum_habana_available() method returns False in our test environment, which does
+    not have optimum-habana installed.
+    """
+    sys.path.append(os.path.join(TLT_BASE_DIR, "../docker/hf_k8s/scripts"))
+    from finetune import is_optimum_habana_available
+
+    # In our normal test environment, optimum-habana is unavailable
+    assert not is_optimum_habana_available()

From 92cf739d30fd74fce440e1c5b947cca6de118592 Mon Sep 17 00:00:00 2001
From: okhleif-IL <87550612+okhleif-IL@users.noreply.github.com>
Date: Wed, 15 May 2024 12:29:11 -0700
Subject: [PATCH 4/8] Helm chart for Gaudi K8 LLM Finetuning (#485)

* initial commit for helm chart

* more args accounted for

* rm token

* review comments

* added if statement

* fixed bool type

* fixed useHabana bug

* fixed helm install

* fixed use_ipex value

* removed unneeded params

* added privileged value

* addressed review comments
---
 docker/hf_k8s/chart/gaudi_values.yaml         | 108 +++++++++
 docker/hf_k8s/chart/templates/gaudijob.yaml   | 220 ++++++++++++++++++
 docker/hf_k8s/chart/templates/pytorchjob.yaml |   2 +
 3 files changed, 330 insertions(+)
 create mode 100644 docker/hf_k8s/chart/gaudi_values.yaml
 create mode 100644 docker/hf_k8s/chart/templates/gaudijob.yaml

diff --git a/docker/hf_k8s/chart/gaudi_values.yaml b/docker/hf_k8s/chart/gaudi_values.yaml
new file mode 100644
index 0000000..edafeaf
--- /dev/null
+++ b/docker/hf_k8s/chart/gaudi_values.yaml
@@ -0,0 +1,108 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+metadata:
+  name: llama2-gaudi-finetuning
+  namespace: kubeflow
+
+secret:
+  encodedToken:
+
+image:
+  name: amr-registry.caas.intel.com/aiops/mlops-ci  # Specify the image name that was pushed to docker hub or copied to the nodes
+  tag: optimum-habana-llm-1.15.1  # Specify the image tag that was pushed to docker hub or copied to the nodes
+  pullPolicy: IfNotPresent
+
+securityContext:
+  runAsUser:
+  runAsGroup:
+  fsGroup:
+  privileged: true
+  allowPrivilegeEscalation: false
+
+distributed:
+  script: /workspace/scripts/finetune.py
+  modelNameOrPath: meta-llama/Llama-2-7b-chat-hf
+  logLevel: info
+
+  doTrain: True
+  doEval: True
+
+  train:
+    datasetName: # Name of the Hugging Face dataset to use. Leave blank if using a data file
+    dataFile: /tmp/pvc-mount/dataset/financial-alpaca.json
+    datasetConcatenation: True
+    promptWithInput: Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+    promptWithoutInput: Below is an instruction that describes a task. Write a response that appropriately completes the request.
+    perDeviceBatchSize: 8
+    epochs: 3
+    maxSteps: -1
+    maxGradNorm: 0.3
+    gradientAccumulationSteps: 1
+    learningRate: 1e-4
+    lrSchedularType: "constant"
+    useFastTokenizer: False
+    outputDir: /tmp/pvc-mount/output/saved_model
+    loggingSteps: 10
+    saveTotalLimit: 2
+    evaluationStrategy: "no"
+    saveStrategy: "no"
+    warmupRatio: 0.03
+    throughputWarmupSteps: 3
+    useLora: True
+    useLazyMode: True
+    loraRank: 8
+    loraAlpha: 16
+    loraDropout: 0.05
+    loraTargetModules: q_proj vproj
+    noCuda: True
+    overwriteOutputDir: True
+    adamEpsilon: 1e-08
+    bf16: True
+    useHabana: true
+    Token:
+  eval:
+    perDeviceBatchSize: 8
+    validationSplitPercentage: 0.20
+
+envVars:
+  ldPreload: /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4.5.9:/usr/local/lib/libiomp5.so
+  logLevel: INFO
+  transformersCache: /tmp/pvc-mount/transformers_cache
+  hfDatasetsCache: /tmp/pvc-mount/hf_dataset_cache
+  hfHome: /tmp/home
+  httpProxy:
+  httpsProxy:
+  noProxy:
+  ftpProxy:
+  socksProxy:
+
+# Resources allocated to each worker
+resources:
+  hpuRequest:
+  hpuLimit: 1
+  memoryRequest:
+  memoryLimit: 409Gi
+  nodeSelectorLabel:
+  nodeSelectorValue:
+  hugePages2Mi: 35202Mi
+
+# Persistent volume claim storage resources
+storage:
+  storageClassName: nfs-client
+  resources: 50Gi
+  pvcMountPath: /tmp/pvc-mount
diff --git a/docker/hf_k8s/chart/templates/gaudijob.yaml b/docker/hf_k8s/chart/templates/gaudijob.yaml
new file mode 100644
index 0000000..9115733
--- /dev/null
+++ b/docker/hf_k8s/chart/templates/gaudijob.yaml
@@ -0,0 +1,220 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+{{- if .Values.distributed.train.useHabana}}
+apiVersion: "batch/v1"
+kind: Job
+metadata:
+  name: {{ .Values.metadata.name }}-gaudijob
+  namespace: {{ .Values.metadata.namespace }}
+spec:
+  template:
+    spec:
+      securityContext:
+        runAsUser: {{ .Values.securityContext.runAsUser }}
+        runAsGroup: {{ .Values.securityContext.runAsGroup }}
+        fsGroup: {{ .Values.securityContext.fsGroup }}
+      containers:
+        - name: pytorch
+          image: {{ .Values.image.name }}:{{ .Values.image.tag }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          command:
+            - python
+            - {{ .Values.distributed.script }}
+            - --model_name_or_path
+            - "{{ .Values.distributed.modelNameOrPath }}"
+            - --dataset_name
+            - "{{ .Values.distributed.train.datasetName }}"
+            - --dataset_cache_directory
+            - "{{ .Values.envVars.hfDatasetsCache }}"
+            - --train_file
+            - "{{ .Values.distributed.train.dataFile }}"
+            - --dataset_concatenation
+            - "{{ .Values.distributed.train.datasetConcatenation }}"
+            - --evaluation_strategy
+            - "{{ .Values.distributed.train.evaluationStrategy }}"
+            - --prompt_with_input
+            - "{{ .Values.distributed.train.promptWithInput }}"
+            - --prompt_without_input
+            - "{{ .Values.distributed.train.promptWithoutInput }}"
+            - --per_device_train_batch_size
+            - "{{ .Values.distributed.train.perDeviceBatchSize }}"
+            - --per_device_eval_batch_size
+            - "{{ .Values.distributed.eval.perDeviceBatchSize }}"
+            - --gradient_accumulation_steps
+            - "{{ .Values.distributed.train.gradientAccumulationSteps }}"
+            - --learning_rate
+            - "{{ .Values.distributed.train.learningRate }}"
+            - --lr_scheduler_type
+            - "{{ .Values.distributed.train.lrSchedularType }}"
+            - --num_train_epochs
+            - "{{ .Values.distributed.train.epochs }}"
+            - --max_steps
+            - "{{ .Values.distributed.train.maxSteps }}"
+            - --max_grad_norm
+            - "{{ .Values.distributed.train.maxGradNorm }}"
+            - --logging_steps
+            - "{{ .Values.distributed.train.loggingSteps }}"
+            - --save_total_limit
+            - "{{ .Values.distributed.train.saveTotalLimit }}"
+            - --output_dir
+            - "{{ .Values.distributed.train.outputDir }}"
+            - --validation_split_percentage
+            - "{{ .Values.distributed.eval.validationSplitPercentage }}"
+            - --log_level
+            - "{{ .Values.distributed.logLevel }}"
+            - --save_strategy
+            - "{{ .Values.distributed.train.saveStrategy }}"
+            - --warmup_ratio
+            - "{{ .Values.distributed.train.warmupRatio }}"
+            - --use_fast_tokenizer
+            - "{{ .Values.distributed.train.useFastTokenizer }}"
+            - --use_lora
+            - "{{ .Values.distributed.train.useLora }}"
+            - --lora_rank
+            - "{{ .Values.distributed.train.loraRank }}"
+            - --lora_alpha
+            - "{{ .Values.distributed.train.loraAlpha }}"
+            - --lora_dropout
+            - "{{ .Values.distributed.train.loraDropout }}"
+            - --lora_target_modules
+            - "{{ .Values.distributed.train.loraTargetModules }}"
+            - --no_cuda
+            - "{{ .Values.distributed.train.noCuda }}"
+            - --overwrite_output_dir
+            - "{{ .Values.distributed.train.overwriteOutputDir }}"
+            - --do_train
+            - "{{ .Values.distributed.doTrain }}"
+            - --do_eval
+            - "{{ .Values.distributed.doEval }}"
+            - --bf16
+            - "{{ .Values.distributed.train.bf16 }}"
+            - --adam_epsilon
+            - "{{ .Values.distributed.train.adamEpsilon }}"
+            - --use_ipex
+            - "False"
+            - --use_habana
+            - "{{ .Values.distributed.train.useHabana }}"
+            - --use_lazy_mode
+            - "{{ .Values.distributed.train.useLazyMode }}"
+            - --throughput_warmup_steps
+            - "{{ .Values.distributed.train.throughputWarmupSteps }}"
+            - --token
+            - "{{ .Values.distributed.train.Token }}"
+
+          env:
+          {{- if .Values.envVars.ldPreload }}
+          - name: LD_PRELOAD
+            value: "{{ .Values.envVars.ldPreload }}"
+          {{- end }}
+          {{- if .Values.envVars.httpProxy }}
+          - name: http_proxy
+            value: "{{ .Values.envVars.httpProxy }}"
+          {{- end }}
+          {{- if .Values.envVars.httpsProxy }}
+          - name: https_proxy
+            value: "{{ .Values.envVars.httpsProxy }}"
+          {{- end }}
+          {{- if .Values.envVars.noProxy }}
+          - name: no_proxy
+            value: "{{ .Values.envVars.noProxy }}"
+          {{- end }}
+          {{- if .Values.envVars.ftpProxy }}
+          - name: ftp_proxy
+            value: "{{ .Values.envVars.ftpProxy }}"
+          {{- end }}
+          {{- if .Values.envVars.socksProxy }}
+          - name: socks_proxy
+            value: "{{ .Values.envVars.socksProxy }}"
+          {{- end }}
+          {{- if .Values.envVars.transformersCache }}
+          - name: TRANSFORMERS_CACHE
+            value: "{{ .Values.envVars.transformersCache }}"
+          {{- end }}
+          {{- if .Values.envVars.hfDatasetsCache }}
+          - name: HF_DATASETS_CACHE
+            value: "{{ .Values.envVars.hfDatasetsCache }}"
+          {{- end }}
+          {{- if .Values.envVars.hfHome }}
+          - name: HF_HOME
+            value: "{{ .Values.envVars.hfHome }}"
+          {{- end }}
+          {{- if .Values.envVars.logLevel }}
+          - name: LOGLEVEL
+            value: "{{ .Values.envVars.logLevel }}"
+          {{- end }}
+          resources:
+            {{- if or .Values.resources.hpuLimit .Values.resources.memoryLimit .Values.resources.hugePages2Mi}}
+            limits:
+              {{- if .Values.resources.hpuLimit }}
+              habana.ai/gaudi: {{ .Values.resources.hpuLimit }}
+              {{- end }}
+              {{- if .Values.resources.memoryLimit }}
+              memory: {{ .Values.resources.memoryLimit }}
+              {{- end }}
+              {{- if .Values.resources.hugePages2Mi }}
+              hugepages-2Mi: {{ .Values.resources.hugePages2Mi }}
+              {{- end }}
+            {{- end }}
+            {{- if or .Values.resources.hpuLimit .Values.resources.memoryLimit .Values.resources.hugePages2Mi}}
+            requests:
+              {{- if .Values.resources.hpuRequest }}
+              habana.ai/gaudi: {{ .Values.resources.hpuRequest }}
+              {{- end }}
+              {{- if .Values.resources.memoryRequest }}
+              memory: {{ .Values.resources.memoryRequest }}
+              {{- end }}
+              {{- if .Values.resources.hugePages2Mi }}
+              hugepages-2Mi: {{ .Values.resources.hugePages2Mi }}
+              {{- end }}
+            {{- end }}
+          volumeMounts:
+          - name: output-dir
+            mountPath: {{ .Values.storage.pvcMountPath }}
+          - mountPath: /dev/shm
+            name: dshm
+          {{- if .Values.secret.encodedToken}}
+          - name: secret-volume
+            mountPath: {{ .Values.envVars.hfHome }}
+            readOnly: true
+          {{- end }}
+          {{- if .Values.securityContext.allowPrivilegeEscalation }}
+          securityContext:
+            allowPrivilegeEscalation: {{ .Values.securityContext.allowPrivilegeEscalation }}
+          {{- end }}
+          {{- if .Values.securityContext.privileged }}
+          securityContext:
+            privileged: {{ .Values.securityContext.privileged }}
+          {{- end }}
+      restartPolicy: Never
+      {{- if .Values.resources.nodeSelectorLabel }}
+      nodeSelector:
+        {{ .Values.resources.nodeSelectorLabel }}: {{ .Values.resources.nodeSelectorValue }}
+      {{- end }}
+      volumes:
+      - name: output-dir
+        persistentVolumeClaim:
+          claimName: {{ .Values.metadata.name }}-pvc
+      - name: dshm
+        emptyDir:
+          medium: Memory
+      {{- if .Values.secret.encodedToken}}
+      - name: secret-volume
+        secret:
+          secretName: {{ .Values.metadata.name }}-secret
+      {{- end }}
+{{- end }}
diff --git a/docker/hf_k8s/chart/templates/pytorchjob.yaml b/docker/hf_k8s/chart/templates/pytorchjob.yaml
index 82c04b4..6ee404b 100644
--- a/docker/hf_k8s/chart/templates/pytorchjob.yaml
+++ b/docker/hf_k8s/chart/templates/pytorchjob.yaml
@@ -15,6 +15,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
+{{- if ne .Values.distributed.train.useHabana true}}
 apiVersion: "kubeflow.org/v1"
 kind: PyTorchJob
 metadata:
@@ -228,3 +229,4 @@ spec:
             secret:
               secretName: {{ .Values.metadata.name }}-secret
           {{- end }}
+{{- end }}

From 4bef8f0ef1e187b029607e9f0df16155484c9366 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 16 May 2024 17:17:25 -0700
Subject: [PATCH 5/8] Bump tqdm from 4.66.1 to 4.66.3 in
 /workflows/vision_anomaly_detection (#486)

Bumps [tqdm](https://github.com/tqdm/tqdm) from 4.66.1 to 4.66.3.
- [Release notes](https://github.com/tqdm/tqdm/releases)
- [Commits](https://github.com/tqdm/tqdm/compare/v4.66.1...v4.66.3)

---
updated-dependencies:
- dependency-name: tqdm
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 workflows/vision_anomaly_detection/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/vision_anomaly_detection/requirements.txt b/workflows/vision_anomaly_detection/requirements.txt
index ffbbef5..ac52baf 100644
--- a/workflows/vision_anomaly_detection/requirements.txt
+++ b/workflows/vision_anomaly_detection/requirements.txt
@@ -7,4 +7,4 @@ prettytable==3.9.0
 scikit-learn-intelex==2023.1.1
 torch==2.2.0+cpu --extra-index-url https://download.pytorch.org/whl/cpu
 torchvision==0.17.0+cpu --extra-index-url https://download.pytorch.org/whl/cpu
-tqdm==4.66.1
+tqdm==4.66.3

From e08df59c2e0b905cb58cc3c427e3260bfb891b83 Mon Sep 17 00:00:00 2001
From: Dina Suehiro Jones <dina.s.jones@intel.com>
Date: Fri, 17 May 2024 11:27:46 -0700
Subject: [PATCH 6/8] Adds multi-card support to the Gaudi LLM k8s fine tuning
 workflow (#487)

* Adds multi-card support to the Gaudi LLM k8s fine tuning workflow

* Update Gaudi Dockerfile

* Update doc

* Update based on review feedback

* Update to dockerfile based on review feedback
---
 docker/hf_k8s/Dockerfile.gaudi                |  36 +++
 .../hf_k8s/chart/gaudi_multicard_values.yaml  | 108 +++++++++
 docker/hf_k8s/chart/gaudi_values.yaml         |  10 +-
 docker/hf_k8s/chart/templates/gaudijob.yaml   |  30 ++-
 docker/hf_k8s/gaudi.md                        | 214 ++++++++++++++++++
 5 files changed, 383 insertions(+), 15 deletions(-)
 create mode 100644 docker/hf_k8s/Dockerfile.gaudi
 create mode 100644 docker/hf_k8s/chart/gaudi_multicard_values.yaml
 create mode 100644 docker/hf_k8s/gaudi.md

diff --git a/docker/hf_k8s/Dockerfile.gaudi b/docker/hf_k8s/Dockerfile.gaudi
new file mode 100644
index 0000000..44403bf
--- /dev/null
+++ b/docker/hf_k8s/Dockerfile.gaudi
@@ -0,0 +1,36 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+ARG GAUDI_SW_VER=1.15.1
+ARG OS=ubuntu22.04
+ARG TORCH_VER=2.2.0
+
+FROM vault.habana.ai/gaudi-docker/${GAUDI_SW_VER}/${OS}/habanalabs/pytorch-installer-${TORCH_VER}:latest
+
+ARG GAUDI_SW_VER=1.15.1
+ARG OPTIMUM_HABANA_VER=1.11.1
+
+RUN pip install --no-cache-dir optimum-habana==${OPTIMUM_HABANA_VER} && \
+    git clone https://github.com/huggingface/optimum-habana.git --single-branch --branch v${OPTIMUM_HABANA_VER} && \
+    pip install --no-cache-dir -r optimum-habana/examples/language-modeling/requirements.txt && \
+    mkdir -p /workspace/optimum-habana && \
+    cp -r optimum-habana/examples /workspace/optimum-habana/examples && \
+    rm -rf optimum-habana && \
+    pip install --no-cache-dir git+https://github.com/HabanaAI/DeepSpeed.git@${GAUDI_SW_VER}
+
+COPY scripts /workspace/scripts
+
+WORKDIR /workspace
diff --git a/docker/hf_k8s/chart/gaudi_multicard_values.yaml b/docker/hf_k8s/chart/gaudi_multicard_values.yaml
new file mode 100644
index 0000000..c1b5eef
--- /dev/null
+++ b/docker/hf_k8s/chart/gaudi_multicard_values.yaml
@@ -0,0 +1,108 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+metadata:
+  name: llama2-gaudi-multicard
+  namespace: kubeflow
+
+secret:
+  encodedToken:
+
+image:
+  name:  # Specify the image name that was pushed to docker hub or copied to the nodes
+  tag:  # Specify the image tag that was pushed to docker hub or copied to the nodes
+  pullPolicy: Always
+
+securityContext:
+  runAsUser:
+  runAsGroup:
+  fsGroup:
+  privileged: true
+  allowPrivilegeEscalation: false
+
+distributed:
+  script: /workspace/scripts/finetune.py
+  modelNameOrPath: meta-llama/Llama-2-7b-hf
+  logLevel: info
+
+  doTrain: True
+  doEval: True
+
+  train:
+    useMpi: false
+    useDeepSpeed: false
+    datasetName: medalpaca/medical_meadow_medical_flashcards  # Name of the Hugging Face dataset to use. Leave blank if using a data file
+    dataFile:
+    datasetConcatenation: True
+    promptWithInput: Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+    promptWithoutInput: Below is an instruction that describes a task. Write a response that appropriately completes the request.
+    perDeviceBatchSize: 12
+    epochs: 3
+    maxSteps: -1
+    maxGradNorm: 0.3
+    gradientAccumulationSteps: 1
+    learningRate: 1e-4
+    lrSchedularType: "constant"
+    useFastTokenizer: False
+    outputDir: /tmp/pvc-mount/output/saved_model
+    loggingSteps: 1020
+    saveTotalLimit: 2
+    evaluationStrategy: "no"
+    saveStrategy: "no"
+    warmupRatio: 0.03
+    throughputWarmupSteps: 3
+    useLora: True
+    useLazyMode: True
+    loraRank: 8
+    loraAlpha: 16
+    loraDropout: 0.05
+    loraTargetModules: q_proj vproj
+    noCuda: True
+    overwriteOutputDir: True
+    adamEpsilon: 1e-08
+    bf16: True
+    useHabana: true
+  eval:
+    perDeviceBatchSize: 8
+    validationSplitPercentage: 0.20
+
+envVars:
+  ldPreload:
+  logLevel: INFO
+  transformersCache: /tmp/pvc-mount/transformers_cache
+  hfDatasetsCache: /tmp/pvc-mount/hf_dataset_cache
+  hfHome: /tmp/home
+  httpProxy:
+  httpsProxy:
+  noProxy:
+  ftpProxy:
+  socksProxy:
+
+# Resources allocated to each worker
+resources:
+  hpu: 8
+  memoryRequest: 409Gi
+  memoryLimit: 409Gi
+  nodeSelectorLabel:
+  nodeSelectorValue:
+  hugePages2Mi: 35202Mi
+
+# Persistent volume claim storage resources
+storage:
+  storageClassName: nfs-client
+  resources: 50Gi
+  pvcMountPath: /tmp/pvc-mount
diff --git a/docker/hf_k8s/chart/gaudi_values.yaml b/docker/hf_k8s/chart/gaudi_values.yaml
index edafeaf..274257f 100644
--- a/docker/hf_k8s/chart/gaudi_values.yaml
+++ b/docker/hf_k8s/chart/gaudi_values.yaml
@@ -23,8 +23,8 @@ secret:
   encodedToken:
 
 image:
-  name: amr-registry.caas.intel.com/aiops/mlops-ci  # Specify the image name that was pushed to docker hub or copied to the nodes
-  tag: optimum-habana-llm-1.15.1  # Specify the image tag that was pushed to docker hub or copied to the nodes
+  name:  # Specify the image name that was pushed to docker hub or copied to the nodes
+  tag:  # Specify the image tag that was pushed to docker hub or copied to the nodes
   pullPolicy: IfNotPresent
 
 securityContext:
@@ -74,7 +74,6 @@ distributed:
     adamEpsilon: 1e-08
     bf16: True
     useHabana: true
-    Token:
   eval:
     perDeviceBatchSize: 8
     validationSplitPercentage: 0.20
@@ -93,9 +92,8 @@ envVars:
 
 # Resources allocated to each worker
 resources:
-  hpuRequest:
-  hpuLimit: 1
-  memoryRequest:
+  hpu: 1
+  memoryRequest: 409Gi
   memoryLimit: 409Gi
   nodeSelectorLabel:
   nodeSelectorValue:
diff --git a/docker/hf_k8s/chart/templates/gaudijob.yaml b/docker/hf_k8s/chart/templates/gaudijob.yaml
index 9115733..04d3f04 100644
--- a/docker/hf_k8s/chart/templates/gaudijob.yaml
+++ b/docker/hf_k8s/chart/templates/gaudijob.yaml
@@ -34,15 +34,30 @@ spec:
           imagePullPolicy: {{ .Values.image.pullPolicy }}
           command:
             - python
+            {{- if gt (int .Values.resources.hpu) 1 }}
+            - /workspace/optimum-habana/examples/gaudi_spawn.py
+            - --world_size
+            - "{{ .Values.resources.hpu }}"
+            {{- if .Values.distributed.train.useMpi }}
+            - --use_mpi
+            {{- end }}
+            {{- if .Values.distributed.train.useDeepSpeed }}
+            - --use_deepspeed
+            {{- end }}
+            {{- end }}
             - {{ .Values.distributed.script }}
             - --model_name_or_path
             - "{{ .Values.distributed.modelNameOrPath }}"
+            {{- if .Values.distributed.train.datasetName }}
             - --dataset_name
             - "{{ .Values.distributed.train.datasetName }}"
+            {{- end }}
             - --dataset_cache_directory
             - "{{ .Values.envVars.hfDatasetsCache }}"
+            {{- if .Values.distributed.train.dataFile }}
             - --train_file
             - "{{ .Values.distributed.train.dataFile }}"
+            {{- end }}
             - --dataset_concatenation
             - "{{ .Values.distributed.train.datasetConcatenation }}"
             - --evaluation_strategy
@@ -113,9 +128,6 @@ spec:
             - "{{ .Values.distributed.train.useLazyMode }}"
             - --throughput_warmup_steps
             - "{{ .Values.distributed.train.throughputWarmupSteps }}"
-            - --token
-            - "{{ .Values.distributed.train.Token }}"
-
           env:
           {{- if .Values.envVars.ldPreload }}
           - name: LD_PRELOAD
@@ -158,10 +170,10 @@ spec:
             value: "{{ .Values.envVars.logLevel }}"
           {{- end }}
           resources:
-            {{- if or .Values.resources.hpuLimit .Values.resources.memoryLimit .Values.resources.hugePages2Mi}}
+            {{- if or .Values.resources.hpu .Values.resources.memoryLimit .Values.resources.hugePages2Mi }}
             limits:
-              {{- if .Values.resources.hpuLimit }}
-              habana.ai/gaudi: {{ .Values.resources.hpuLimit }}
+              {{- if .Values.resources.hpu }}
+              habana.ai/gaudi: {{ .Values.resources.hpu }}
               {{- end }}
               {{- if .Values.resources.memoryLimit }}
               memory: {{ .Values.resources.memoryLimit }}
@@ -170,10 +182,10 @@ spec:
               hugepages-2Mi: {{ .Values.resources.hugePages2Mi }}
               {{- end }}
             {{- end }}
-            {{- if or .Values.resources.hpuLimit .Values.resources.memoryLimit .Values.resources.hugePages2Mi}}
+            {{- if or .Values.resources.hpu .Values.resources.memoryLimit .Values.resources.hugePages2Mi }}
             requests:
-              {{- if .Values.resources.hpuRequest }}
-              habana.ai/gaudi: {{ .Values.resources.hpuRequest }}
+              {{- if .Values.resources.hpu }}
+              habana.ai/gaudi: {{ .Values.resources.hpu }}
               {{- end }}
               {{- if .Values.resources.memoryRequest }}
               memory: {{ .Values.resources.memoryRequest }}
diff --git a/docker/hf_k8s/gaudi.md b/docker/hf_k8s/gaudi.md
new file mode 100644
index 0000000..e261475
--- /dev/null
+++ b/docker/hf_k8s/gaudi.md
@@ -0,0 +1,214 @@
+# LLM fine tuning with Kubernetes and Intel® Gaudi® AI Accelerators
+
+## Docker Image
+
+Prior to deploying the fine tuning job to Kubernetes, a Docker image needs to be built and pushed to a container repo,
+or copied to the Gaudi nodes on your Kubernetes cluster. The [`Dockerfile.gaudi`](Dockerfile.gaudi) used to run the
+fine tuning job uses `vault.habana.ai/gaudi-docker/1.15.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest` as it's
+base and then adds on library installations like [optimum-habana](https://github.com/huggingface/optimum-habana). The
+container also includes example scripts from optimum-habana and [Llama 2 fine tuning script](scripts/finetune.py) from
+this workflow directory.
+
+### Container Build
+
+The [`Dockerfile.gaudi`](Dockerfile.gaudi) has build arguments for the following versions:
+
+| Argument | Default Value | Description |
+|----------|---------------|-------------|
+| GAUDI_SW_VER | `intel/intel-optimized-pytorch` | SynapseAI / Gaudi driver version  |
+| OS | `ubuntu22.04` | Base image tag |
+| TORCH_VER | `2.2.0` | Torch version |
+| OPTIMUM_HABANA_VER | `1.11.1` | Hugging Face Optimum Habana version |
+
+The container can be built with the default package versions using the following command:
+```
+docker build -t <image name>:<tag> .
+```
+
+Alternatively, build arguments can be passed to the build command to use different versions:
+```
+export GAUDI_SW_VER=<GAUDI_SW_VER>
+export OS=<OS>
+export TORCH_VER=<TORCH_VER>
+export OPTIMUM_HABANA_VER=<OPTIMUM_HABANA_VER>
+
+docker build \
+ --build-arg GAUDI_SW_VER=${GAUDI_SW_VER} \
+ --build-arg OS=${OS} \
+ --build-arg TORCH_VER=${TORCH_VER} \
+ --build-arg OPTIMUM_HABANA_VER=${OPTIMUM_HABANA_VER} \
+ -t <image name>:<tag> .
+```
+
+### Container Push
+
+The container needs to be pushed for the Kubernetes cluster to have access to the image. If you have a Docker container
+registry (such as [DockerHub](https://hub.docker.com)), you can push the container to that registry. Otherwise, we have
+alternative instructions for getting the container distributed to the cluster nodes by saving the image and copying it
+to the nodes.
+
+Use one of these options to push the container:
+
+a. First, ensure that you are logged in with your container registry account using
+   [`docker login`](https://docs.docker.com/engine/reference/commandline/login/). Next,
+   [re-tag your image](https://docs.docker.com/engine/reference/commandline/tag/) and then
+   [push the image](https://docs.docker.com/engine/reference/commandline/push/) to the registry.
+   ```
+   # Retag the image by providing the source image and destination image
+   docker tag <source image>:<tag> <destination image name>:<tag>
+
+   # Push the image to the registry
+   docker push <image name>:<tag>
+   ```
+b. If you don't have a container registry, use the commands below to save the container, copy it to the nodes on the
+   Kubernetes cluster, and then load it into Docker.
+   ```
+   # Save the image to a tar.gz file
+   docker save <image name>:<tag> | gzip > hf_k8s.tar.gz
+
+   # Copy the tar file to every Kubernetes node that could be used to run the fine tuning job
+   scp hf_k8s.tar.gx <user>@<host>:/tmp/hf_k8s.tar.gz
+
+   # SSH to each of the Kubernetes nodes and load the image to Docker
+   docker load --input /tmp/hf_k8s.tar.gz
+   ```
+
+> Note: The `<image name>:<tag>` that was pushed needs to be specified in the Helm chart values file.
+
+## Running the fine tuning job on the Kubernetes cluster
+
+There are two Helm values files that are setup to run LLM fine tuning with Gaudi:
+
+| Value file name | Description |
+|-----------------|-------------|
+| [`gaudi_values.yaml`](chart/gaudi_values.yaml) | Uses a single Gaudi card to fine tune `meta-llama/Llama-2-7b-chat-hf` using a subset of the [Financial alpaca dataset](https://huggingface.co/datasets/gbharti/finance-alpaca)  |
+| [`gaudi_multicard_values.yaml`](chart/gaudi_multicard_values.yaml) | Uses 8 Gaudi cards to fine tune `meta-llama/Llama-2-7b-hf` using the [Medical Meadow flashcards dataset](https://huggingface.co/datasets/medalpaca/medical_meadow_medical_flashcards) |
+
+Pick one of the value files to use depending on your desired use case, make modifications to customize your fine tuning
+job, and then use the instructions below to deploy the job to your cluster.
+
+> Before running the fine tuning job on the cluster, the Docker image must be built and pushed to a container
+> registry or loaded into Docker on the cluster nodes. See the [container build](#container-build) and
+> [container push](#container-push) sections for instructions.
+
+1. If you are using a gated model, get a [Hugging Face token](https://huggingface.co/docs/hub/security-tokens) with read
+   access and use your terminal to get the base64 encoding for your token using a terminal using
+   `echo <your token> | base64`. If you are not using a gated model, you can skip this step.
+
+   For example:
+   ```
+   $ echo hf_ABCDEFG | base64
+   aGZfQUJDREVGRwo=
+   ```
+
+   Copy and paste the encoded token value into your values yaml file `encodedToken` field in the `secret` section.
+   For example:
+   ```
+   secret:
+     name: hf-token-secret
+     encodedToken: aGZfQUJDREVGRwo=
+   ```
+
+2. Edit your values file based on the parameters that you would like to use and your cluster. Key parameters to look
+   at and edit are:
+   * `image.name` should be set to the name of your docker image
+   * `image.tag` should be set to the tag of your docker image
+   * `resources.hpu` specifies the number of Gaudi cards to use.
+   * `resources.memoryRequest` and `resources.memoryLimit` values should be updated based on the amount of memory
+     available on the nodes in your cluster
+   * `resources.hugePages2Mi` to specify the hugepages-2Mi request/limit based on your Gaudi node.
+   * `storage.storageClassName` should be set to your Kubernetes NFS storage class name (use `kubectl get storageclass`
+     to see a list of storage classes on your cluster)
+
+   In the same values file, edit the security context parameters to have the containers run with a non-root user:
+   * `securityContext.runAsUser` should be set to your user ID (UID)
+   * `securityContext.runAsGroup` should be set to your group ID
+   * `securityContext.fsGroup` should be set to your file system group ID
+
+   See a complete list and descriptions of the available parameters in the [Helm chart values documentation](values.md).
+
+3. Deploy the helm chart to the cluster using the `kubeflow` namespace:
+   ```
+   # Navigate to the directory that contains the Hugging Face Kubernetes example
+   cd docker/hf_k8s
+
+   # Deploy the job using the helm chart, specifying the values file with the -f parameter
+   helm install --namespace kubeflow -f chart/<values file>.yaml gaudi-llm ./chart
+   ```
+
+4. (Optional) If a custom dataset is being used, the file needs to be uploaded to the persistent volume claim (PVC), so
+   that it can be accessed by the worker pods. If your values yaml file is using a Hugging Face dataset (such as
+   `medalpaca/medical_meadow_medical_flashcards`), you can skip this step.
+
+   The dataset can be uploaded to the PVC using the [`kubectl cp` command](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#cp).
+   The destination path for the dataset needs to match the `train.dataFile` path in your values yaml file.  Note that the worker pods would keep failing and restarting until you upload your dataset.
+   ```
+   # Copies a local "dataset" folder to the PVC at /tmp/pvc-mount/dataset
+   kubectl cp dataset <dataaccess pod name>:/tmp/pvc-mount/dataset
+
+   # Verify that the data file is at the expected path
+   kubectl exec <dataaccess pod name> -- ls -l /tmp/pvc-mount/dataset
+   ```
+
+   For example:
+
+   The [`gaudi_values.yaml`](chart/gaudi_values.yaml) file requires this step for uploading the custom dataset to the cluster. Run the [`download_financial_dataset.sh`](scripts/download_financial_dataset.sh) script to create a custom dataset and copy it to the PVC, as mentioned below.
+
+   ```
+   # Set a location for the dataset to download
+   export DATASET_DIR=/tmp/dataset
+
+   # Run the download shell script
+   bash scripts/download_financial_dataset.sh
+
+   # Copy the local "dataset" folder to the PVC at /tmp/pvc-mount/dataset
+   kubectl cp ${DATASET_DIR} llama2-gaudi-finetuning-dataaccess:/tmp/pvc-mount/dataset
+   ```
+
+5. The training job can be monitored using by checking the status of the PyTorchJob using:
+   * `kubectl get pytorchjob -n kubeflow`: Lists the PyTorch jobs that have been deployed to the cluster along with
+     their status.
+   * `kubectl describe pytorchjob <job name> -n kubeflow`: Lists the details of a particular PyTorch job, including
+     information about events related to the job, such as pods getting created for each worker.
+   The worker pods can be monitored using:
+   * `kubectl get pods -n kubeflow`: To see the pods in the `kubeflow` namespace and their status. Also, adding
+     `-o wide` to the command will additionally list out which node each pod is running on.
+   * `kubectl logs <pod name> -n kubeflow`: Dumps the log for the specified pod. Add `-f` to the command to
+     stream/follow the logs as the pod is running.
+
+6. After the job completes, files can be copied from the persistent volume claim to your local system using the
+   [`kubectl cp` command](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#cp) using the
+   data access pod. The path to the trained model is in the values file field called `distributed.train.outputDir` and
+   if quantization was also done, the quanted model path is in the `distributed.quantize.outputDir` field.
+
+   As an example, the trained model from the Medical Meadows use case can be copied from the
+   `/tmp/pvc-mount/output/saved_model` path to the local system using the following command:
+   ```
+   kubectl cp --namespace kubeflow <dataaccess pod name>:/tmp/pvc-mount/output/saved_model .
+   ```
+7. Finally, the resources can be deleted from the cluster using the
+   [`helm uninstall`](https://helm.sh/docs/helm/helm_uninstall/) command. For example:
+   ```
+   helm uninstall --namespace kubeflow gaudi-llm
+   ```
+   A list of all the deployed helm releases can be seen using `helm list`.
+
+## Citations
+
+```
+@misc{touvron2023llama,
+      title={Llama 2: Open Foundation and Fine-Tuned Chat Models},
+      author={Hugo Touvron and Louis Martin and Kevin Stone and Peter Albert and Amjad Almahairi and Yasmine Babaei and Nikolay Bashlykov and Soumya Batra and Prajjwal Bhargava and Shruti Bhosale and Dan Bikel and Lukas Blecher and Cristian Canton Ferrer and Moya Chen and Guillem Cucurull and David Esiobu and Jude Fernandes and Jeremy Fu and Wenyin Fu and Brian Fuller and Cynthia Gao and Vedanuj Goswami and Naman Goyal and Anthony Hartshorn and Saghar Hosseini and Rui Hou and Hakan Inan and Marcin Kardas and Viktor Kerkez and Madian Khabsa and Isabel Kloumann and Artem Korenev and Punit Singh Koura and Marie-Anne Lachaux and Thibaut Lavril and Jenya Lee and Diana Liskovich and Yinghai Lu and Yuning Mao and Xavier Martinet and Todor Mihaylov and Pushkar Mishra and Igor Molybog and Yixin Nie and Andrew Poulton and Jeremy Reizenstein and Rashi Rungta and Kalyan Saladi and Alan Schelten and Ruan Silva and Eric Michael Smith and Ranjan Subramanian and Xiaoqing Ellen Tan and Binh Tang and Ross Taylor and Adina Williams and Jian Xiang Kuan and Puxin Xu and Zheng Yan and Iliyan Zarov and Yuchen Zhang and Angela Fan and Melanie Kambadur and Sharan Narang and Aurelien Rodriguez and Robert Stojnic and Sergey Edunov and Thomas Scialom},
+      year={2023},
+      eprint={2307.09288},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}
+
+@article{han2023medalpaca,
+  title={MedAlpaca--An Open-Source Collection of Medical Conversational AI Models and Training Data},
+  author={Han, Tianyu and Adams, Lisa C and Papaioannou, Jens-Michalis and Grundmann, Paul and Oberhauser, Tom and L{\"o}ser, Alexander and Truhn, Daniel and Bressem, Keno K},
+  journal={arXiv preprint arXiv:2304.08247},
+  year={2023}
+}
+```

From 16025465f4814747e963aadc3a2b38dcf1765fe4 Mon Sep 17 00:00:00 2001
From: okhleif-IL <87550612+okhleif-IL@users.noreply.github.com>
Date: Thu, 23 May 2024 16:39:46 -0700
Subject: [PATCH 7/8] updated gaudi.md with device plugin docs (#488)

---
 docker/hf_k8s/gaudi.md | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/docker/hf_k8s/gaudi.md b/docker/hf_k8s/gaudi.md
index e261475..4172ab1 100644
--- a/docker/hf_k8s/gaudi.md
+++ b/docker/hf_k8s/gaudi.md
@@ -75,6 +75,28 @@ b. If you don't have a container registry, use the commands below to save the co
 
 > Note: The `<image name>:<tag>` that was pushed needs to be specified in the Helm chart values file.
 
+## Setting up the Gaudi Device Plugin
+With a Gaudi device deployed in the Kubernetes cluster, this plugin will enable the registration of that device for use. The daemonset can be deployed using the following .yaml file from the [Intel Gaudi Docs](https://docs.habana.ai/en/latest/Orchestration/Gaudi_Kubernetes/Device_Plugin_for_Kubernetes.html). Be sure to refer to the Intel Gaudi Docs for more details if need be.
+
+Deployment
+
+```
+kubectl create -f https://vault.habana.ai/artifactory/docker-k8s-device-plugin/habana-k8s-device-plugin.yaml
+```
+
+Checking Deployment
+```
+kubectl get pods -n habana-system
+```
+
+Sample Output:
+```
+NAME                                      READY    STATUS             RESTARTS           AGE
+habanalabs-device-plugin-daemonset-#xxxx   1/1     Running            0                  1s
+...
+```
+Once this is running, the Kubernetes job will know to look for a Gaudi device for usage in the job.
+
 ## Running the fine tuning job on the Kubernetes cluster
 
 There are two Helm values files that are setup to run LLM fine tuning with Gaudi:

From 59e77a889e32e8a58b583b8b3b76ddfedcb4b723 Mon Sep 17 00:00:00 2001
From: Abolfazl Shahbazi <abolfazl.shahbazi@intel.com>
Date: Wed, 29 May 2024 09:34:21 -0700
Subject: [PATCH 8/8] updating docs after moving the repo to Intel Org (#489)

Signed-off-by: Abolfazl Shahbazi <abolfazl.shahbazi@intel.com>
---
 GetStarted.md                                |  2 +-
 README.md                                    |  6 +++---
 api.md                                       |  2 +-
 cli.md                                       |  2 +-
 docs/index.rst                               |  2 +-
 docs/notebooks/README.rst                    | 18 +++++++++---------
 notebooks/README.md                          |  2 +-
 notebooks/setup.md                           |  2 +-
 setup.py                                     |  2 +-
 tests/README.md                              |  2 +-
 workflows/disease_prediction/README.md       |  6 +++---
 workflows/vision_anomaly_detection/README.md | 12 ++++++------
 12 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/GetStarted.md b/GetStarted.md
index 86e17c3..41f7cf5 100644
--- a/GetStarted.md
+++ b/GetStarted.md
@@ -75,7 +75,7 @@ approaches.
       Clone the repo:
 
       ```
-      git clone https://github.com/IntelAI/transfer-learning.git
+      git clone https://github.com/Intel/transfer-learning.git
       cd transfer-learning
       ```
 
diff --git a/README.md b/README.md
index 75620c6..35b6079 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
 
 *Note: You may find it easier to read about Intel Transfer Learning tool, follow the Get
 Started guide, and browse the API material from our published documentation site
-https://intelai.github.io/transfer-learning.*
+https://intel.github.io/transfer-learning.*
 
 <!-- SkipBadges -->
 
@@ -109,12 +109,12 @@ command can be found using, for example, `tlt train --help`.
 
 ## Note on Evaluation and Bias
 
-Intel Transfer Learning Tool provides standard evaluation metrics such as accuracy and loss for validation/test/train sets. While important, it's essential to acknowledge that these metrics may not explicitly capture biases. Users should be cautious and consider potential biases by analyzing disparities in the data and model prediction. Techniques such as confusion matrices, PR curves, ROC curves, local attribution-based and `gradCAM` explanations, can all be good indicators for bias. Clear documentation of model behavior and performance is also crucial for iterative bias mitigation. [Intel® Explainable AI Tools](https://github.com/IntelAI/intel-xai-tools/tree/main) provides components that demonstrate the aformentioned techniques with [Explainer](https://github.com/IntelAI/intel-xai-tools/tree/main/explainer), a simple API providing post-hoc model distillation and visualization methods, as well as The [Model Card Generator](https://github.com/IntelAI/intel-xai-tools/tree/main/model_card_gen) which provides an interactive HTML report that containing these workflows and demonstrations of model behavior.
+Intel Transfer Learning Tool provides standard evaluation metrics such as accuracy and loss for validation/test/train sets. While important, it's essential to acknowledge that these metrics may not explicitly capture biases. Users should be cautious and consider potential biases by analyzing disparities in the data and model prediction. Techniques such as confusion matrices, PR curves, ROC curves, local attribution-based and `gradCAM` explanations, can all be good indicators for bias. Clear documentation of model behavior and performance is also crucial for iterative bias mitigation. [Intel® Explainable AI Tools](https://github.com/Intel/intel-xai-tools/tree/main) provides components that demonstrate the aformentioned techniques with [Explainer](https://github.com/Intel/intel-xai-tools/tree/main/explainer), a simple API providing post-hoc model distillation and visualization methods, as well as The [Model Card Generator](https://github.com/Intel/intel-xai-tools/tree/main/model_card_gen) which provides an interactive HTML report that containing these workflows and demonstrations of model behavior.
 
 ## Support
 
 The Intel Transfer Learning Tool team tracks bugs and enhancement requests using
-[GitHub issues](https://github.com/IntelAI/transfer-learning-tool/issues). Before submitting a
+[GitHub issues](https://github.com/Intel/transfer-learning-tool/issues). Before submitting a
 suggestion or bug report, search the existing GitHub issues to see if your issue has already been reported.
 
 See [Legal Information](Legal.md) for Disclaimers, Trademark, and Licensing information.
diff --git a/api.md b/api.md
index a936a4d..027a7a2 100644
--- a/api.md
+++ b/api.md
@@ -1,4 +1,4 @@
 # API Reference
 
 Low-code Python\* API documentation is automatically generated from the code and
-appears in the Transfer Learning Tool documentation website's [API](https://intelai.github.io/transfer-learning/main/api.html) page.
+appears in the Transfer Learning Tool documentation website's [API](https://intel.github.io/transfer-learning/main/api.html) page.
diff --git a/cli.md b/cli.md
index 8b41a59..1b68c44 100644
--- a/cli.md
+++ b/cli.md
@@ -1,4 +1,4 @@
 # CLI Reference
 
 No-code bash CLI documentation is automatically generated from the code and
-appears in the Transfer Learning Tool documentation website's [CLI](https://intelai.github.io/transfer-learning/main/cli.html) page.
+appears in the Transfer Learning Tool documentation website's [CLI](https://intel.github.io/transfer-learning/main/cli.html) page.
diff --git a/docs/index.rst b/docs/index.rst
index c892c31..7256aa4 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -19,5 +19,5 @@
    Supported Models <Models>
    Legal
    genindex
-   GitHub Repository <https://github.com/IntelAI/transfer-learning-tool>
+   GitHub Repository <https://github.com/Intel/transfer-learning-tool>
 
diff --git a/docs/notebooks/README.rst b/docs/notebooks/README.rst
index 0bf4ee8..55524de 100644
--- a/docs/notebooks/README.rst
+++ b/docs/notebooks/README.rst
@@ -22,27 +22,27 @@ Intel Transfer Learning Tool API Tutorial Notebooks
 .. |imageClassPyTorch| image:: /images/Jupyter_logo.svg
              :alt: Jupyter notebook .ipynb file
              :height: 35
-.. _imageClassPyTorch: https://github.com/IntelAI/transfer-learning/blob/main/notebooks/image_classification/tlt_api_pyt_image_classification/TLT_PyTorch_Image_Classification_Transfer_Learning.ipynb
+.. _imageClassPyTorch: https://github.com/Intel/transfer-learning/blob/main/notebooks/image_classification/tlt_api_pyt_image_classification/TLT_PyTorch_Image_Classification_Transfer_Learning.ipynb
 
 .. |imageClassTensorFlow| image:: /images/Jupyter_logo.svg
              :alt: Jupyter notebook .ipynb file
              :height: 35
-.. _imageClassTensorflow: https://github.com/IntelAI/transfer-learning/blob/main/notebooks/image_classification/tlt_api_tf_image_classification/TLT_TF_Image_Classification_Transfer_Learning.ipynb
+.. _imageClassTensorflow: https://github.com/Intel/transfer-learning/blob/main/notebooks/image_classification/tlt_api_tf_image_classification/TLT_TF_Image_Classification_Transfer_Learning.ipynb
 
 .. |textClassPyTorch| image:: /images/Jupyter_logo.svg
              :alt: Jupyter notebook .ipynb file
              :height: 35
-.. _textClassPyTorch: https://github.com/IntelAI/transfer-learning/blob/main/notebooks/text_classification/tlt_api_pyt_text_classification/TLT_PYT_Text_Classification.ipynb
+.. _textClassPyTorch: https://github.com/Intel/transfer-learning/blob/main/notebooks/text_classification/tlt_api_pyt_text_classification/TLT_PYT_Text_Classification.ipynb
 
 .. |textClassTensorFlow| image:: /images/Jupyter_logo.svg
              :alt: Jupyter notebook .ipynb file
              :height: 35
-.. _textClassTensorflow: https://github.com/IntelAI/transfer-learning/blob/main/notebooks/text_classification/tlt_api_tf_text_classification/TLT_TF_Text_Classification.ipynb
+.. _textClassTensorflow: https://github.com/Intel/transfer-learning/blob/main/notebooks/text_classification/tlt_api_tf_text_classification/TLT_TF_Text_Classification.ipynb
 
 .. |imageAnomalyPyTorch| image:: /images/Jupyter_logo.svg
              :alt: Jupyter notebook .ipynb file
              :height: 35
-.. _imageAnomalyPyTorch: https://github.com/IntelAI/transfer-learning/blob/main/notebooks/image_anomaly_detection/tlt_api_pyt_anomaly_detection/Anomaly_Detection.ipynb
+.. _imageAnomalyPyTorch: https://github.com/Intel/transfer-learning/blob/main/notebooks/image_anomaly_detection/tlt_api_pyt_anomaly_detection/Anomaly_Detection.ipynb
 
 .. csv-table::
    :header: "Notebook Title", ".ipynb Link", "Use Case", "Framework"
@@ -60,12 +60,12 @@ Intel Transfer Learning Tool API End-to-End Pipelines
 .. |imageClassMedical| image:: /images/Jupyter_logo.svg
              :alt: Jupyter notebook .ipynb file
              :height: 35
-.. _imageClassMedical: https://github.com/IntelAI/transfer-learning/blob/main/notebooks/e2e_workflows/Medical_Imaging_Classification.ipynb
+.. _imageClassMedical: https://github.com/Intel/transfer-learning/blob/main/notebooks/e2e_workflows/Medical_Imaging_Classification.ipynb
 
 .. |imageClassRemote| image:: /images/Jupyter_logo.svg
              :alt: Jupyter notebook .ipynb file
              :height: 35
-.. _imageClassRemote: https://github.com/IntelAI/transfer-learning/blob/main/notebooks/e2e_workflows/Remote_Sensing_Image_Scene_Classification.ipynb
+.. _imageClassRemote: https://github.com/Intel/transfer-learning/blob/main/notebooks/e2e_workflows/Remote_Sensing_Image_Scene_Classification.ipynb
 
 
 .. csv-table::
@@ -81,12 +81,12 @@ Intel Transfer Learning Tool Performance Comparison
 .. |imageClassTFPerf| image:: /images/Jupyter_logo.svg
              :alt: Jupyter notebook .ipynb file
              :height: 35
-.. _imageClassTFPerf: https://github.com/IntelAI/transfer-learning/blob/main/notebooks/performance/tf_image_classification_performance.ipynb
+.. _imageClassTFPerf: https://github.com/Intel/transfer-learning/blob/main/notebooks/performance/tf_image_classification_performance.ipynb
 
 .. |textClassHFPerf| image:: /images/Jupyter_logo.svg
              :alt: Jupyter notebook .ipynb file
              :height: 35
-.. _textClassHFPerf: https://github.com/IntelAI/transfer-learning/blob/main/notebooks/performance/hf_text_classification_performance.ipynb
+.. _textClassHFPerf: https://github.com/Intel/transfer-learning/blob/main/notebooks/performance/hf_text_classification_performance.ipynb
 
 .. csv-table::
    :header: "Notebook Title", ".ipynb Link", "Use Case", "Framework"
diff --git a/notebooks/README.md b/notebooks/README.md
index fd5889b..28c54bc 100644
--- a/notebooks/README.md
+++ b/notebooks/README.md
@@ -53,4 +53,4 @@ and [Intel Extension for PyTorch](https://github.com/intel/intel-extension-for-p
 | [Performance Comparison: Text Classification Transfer Learning with Hugging Face and the Intel Transfer Learning Tool](/notebooks/performance/hf_text_classification_performance.ipynb) | NLP: Text Classification | Hugging Face, PyTorch, and the Intel Transfer Learning Tool API | Compares training and evaluation metrics for text classification transfer learning using the Hugging Face Trainer and the Intel Transfer Learning Tool. |
 
 ### Note on Evaluation and Bias
-All notebooks provide standard evaluation metrics such as accuracy and loss for validation/test/train sets. While important, it's essential to acknowledge that these metrics may not explicitly capture biases. Users should be cautious and consider potential biases by analyzing disparities in the data and model prediction. Techniques such as confusion matrices, PR curves, ROC curves, local attribution-based and `gradCAM` explanations, can all be good indicators for bias. Clear documentation of model behavior and performance is also crucial for iterative bias mitigation. [Intel® Explainable AI Tools](https://github.com/IntelAI/intel-xai-tools/tree/main) provides components that demonstrate the aformentioned techniques with [Explainer](https://github.com/IntelAI/intel-xai-tools/tree/main/explainer), a simple API providing post-hoc model distillation and visualization methods, as well as The [Model Card Generator](https://github.com/IntelAI/intel-xai-tools/tree/main/model_card_gen) which provides an interactive HTML report that containing these workflows and demonstrations of model behavior.
\ No newline at end of file
+All notebooks provide standard evaluation metrics such as accuracy and loss for validation/test/train sets. While important, it's essential to acknowledge that these metrics may not explicitly capture biases. Users should be cautious and consider potential biases by analyzing disparities in the data and model prediction. Techniques such as confusion matrices, PR curves, ROC curves, local attribution-based and `gradCAM` explanations, can all be good indicators for bias. Clear documentation of model behavior and performance is also crucial for iterative bias mitigation. [Intel® Explainable AI Tools](https://github.com/Intel/intel-xai-tools/tree/main) provides components that demonstrate the aformentioned techniques with [Explainer](https://github.com/Intel/intel-xai-tools/tree/main/explainer), a simple API providing post-hoc model distillation and visualization methods, as well as The [Model Card Generator](https://github.com/Intel/intel-xai-tools/tree/main/model_card_gen) which provides an interactive HTML report that containing these workflows and demonstrations of model behavior.
\ No newline at end of file
diff --git a/notebooks/setup.md b/notebooks/setup.md
index d715017..8615c98 100644
--- a/notebooks/setup.md
+++ b/notebooks/setup.md
@@ -16,7 +16,7 @@ Software Requirements:
 2. Clone the GitHub repo if you haven't done this in step 1
 
    ```
-   git clone https://github.com/IntelAI/transfer-learning.git
+   git clone https://github.com/Intel/transfer-learning.git
    cd transfer-learning 
    ```
 
diff --git a/setup.py b/setup.py
index 3fe14ae..eb9dd6e 100644
--- a/setup.py
+++ b/setup.py
@@ -49,7 +49,7 @@ def get_framework_requirements(framework_name):
 setup(name="intel-transfer-learning-tool",
       description="Intel® Transfer Learning Tool",
       version="0.7.0",
-      url='https://github.com/IntelAI/transfer-learning',
+      url='https://github.com/Intel/transfer-learning',
       license='Apache 2.0',
       author='IntelAI',
       author_email='IntelAI@intel.com',
diff --git a/tests/README.md b/tests/README.md
index 3d9c5dd..d055a42 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -5,7 +5,7 @@ Then install the following dependencies:
 
 ```
 # Clone this repo, if you don't already have it
-git clone https://github.com/IntelAI.transfer-learning.git
+git clone https://github.com/Intel.transfer-learning.git
 cd transfer-learning
 
 # Run all tests with make, or skip this step to run individually
diff --git a/workflows/disease_prediction/README.md b/workflows/disease_prediction/README.md
index c872502..fa1167e 100644
--- a/workflows/disease_prediction/README.md
+++ b/workflows/disease_prediction/README.md
@@ -54,13 +54,13 @@ Linux OS (Ubuntu 22.04) is used to validate this reference solution. Make sure t
 
 ## How It Works?
 
-The Vision reference Implementation component uses [Intel Transfer Learning Toolkit based vision workload](https://github.com/IntelAI/transfer-learning), which is optimized for image fine-tuning and inference. This workload uses Tensorflowhub's ResNet-50 model to fine-tune a new convolutional neural network model with subtracted CESM image dataset. The images are preprocessed by using domain expert-defined segmented regions to reduce redundancies during training.
+The Vision reference Implementation component uses [Intel Transfer Learning Toolkit based vision workload](https://github.com/Intel/transfer-learning), which is optimized for image fine-tuning and inference. This workload uses Tensorflowhub's ResNet-50 model to fine-tune a new convolutional neural network model with subtracted CESM image dataset. The images are preprocessed by using domain expert-defined segmented regions to reduce redundancies during training.
 
 ## Get Started
 
 ### Download the repository
 
-git clone https://github.com/IntelAI/transfer-learning.git vision_workflow
+git clone https://github.com/Intel/transfer-learning.git vision_workflow
 cd vision_workflow/workflows/disease_prediction
 
 
@@ -214,7 +214,7 @@ To implement this reference use case on a different or customized pre-training m
 For more information or to read about other relevant workflow examples, see these guides and software resources:
 - [Intel® AI Analytics Toolkit (AI Kit)](https://www.intel.com/content/www/us/en/developer/tools/oneapi/ai-analytics-toolkit.html)
 - [Intel® Neural Compressor](https://github.com/intel/neural-compressor)
-- [Intel® Transfer Learning Tool](https://github.com/IntelAI/transfer-learning/tree/v0.7.0)
+- [Intel® Transfer Learning Tool](https://github.com/Intel/transfer-learning/tree/v0.7.0)
 
 ## Support
 If you have any questions with this workflow, want help with troubleshooting, want to report a bug or submit enhancement requests, please submit a GitHub issue.
diff --git a/workflows/vision_anomaly_detection/README.md b/workflows/vision_anomaly_detection/README.md
index 466c67c..14dbdbc 100644
--- a/workflows/vision_anomaly_detection/README.md
+++ b/workflows/vision_anomaly_detection/README.md
@@ -16,7 +16,7 @@ This workflow is a fine-tuning module under the [Visual Quality Inspection refer
 - [Support](#support)
 
 ## Technical Overview
-This repository provides a layer within the higher level Visual Quality Inspection reference kit and supports the following using [Intel® Transfer Learning Tool](https://github.com/IntelAI/transfer-learning):
+This repository provides a layer within the higher level Visual Quality Inspection reference kit and supports the following using [Intel® Transfer Learning Tool](https://github.com/Intel/transfer-learning):
 - Fine-tuning and inference on custom dataset
 - Implementation for different feature extractors based on:
   - Pre-trained model (without fine-tuning)
@@ -87,7 +87,7 @@ It contains the workflow code:
 ```
 export $WORKSPACE=/<workdir/path>
 cd $WORKSPACE
-git clone https://github.com/IntelAI/transfer-learning.git
+git clone https://github.com/Intel/transfer-learning.git
 cd transfer-learning/workflows/vision_anomaly_detection
 ```
 
@@ -124,7 +124,7 @@ Ensure you have completed steps in the [Get Started Section](#get-started).
 Build or Pull the provided docker image.
 
 ```bash
-git clone https://github.com/IntelAI/models -b r2.11 intel-models
+git clone https://github.com/Intel/models -b r2.11 intel-models
 cd docker
 docker compose build
 cd ..
@@ -244,7 +244,7 @@ pip install -r requirements.txt
 
 Download the mvtec dataset using Intel Model Zoo dataset download API
 ```
-git clone https://github.com/IntelAI/models.git $WORKSPACE/models
+git clone https://github.com/Intel/models.git $WORKSPACE/models
 cd $WORKSPACE/models/datasets/dataset_api/
 ```
 
@@ -280,8 +280,8 @@ python src/vision_anomaly_wrapper.py --config_file config/config.yaml
 
 ## Learn More
 For more information or to read about other relevant workflow examples, see these guides and software resources:
-- [Intel® Transfer Learning Tool](https://github.com/IntelAI/transfer-learning)
-- [Anomaly Detection fine-tuning workflow using SimSiam and CutPaste techniques](https://github.com/IntelAI/transfer-learning/tree/main/workflows/vision_anomaly_detection)
+- [Intel® Transfer Learning Tool](https://github.com/Intel/transfer-learning)
+- [Anomaly Detection fine-tuning workflow using SimSiam and CutPaste techniques](https://github.com/Intel/transfer-learning/tree/main/workflows/vision_anomaly_detection)
 - [Intel® AI Analytics Toolkit (AI Kit)](https://www.intel.com/content/www/us/en/developer/tools/oneapi/ai-analytics-toolkit.html)
 - [Intel® Extension for PyTorch](https://intel.github.io/intel-extension-for-pytorch/)
 - [Intel® Extension for Scikit-learn](https://www.intel.com/content/www/us/en/developer/tools/oneapi/scikit-learn.html#gs.x609e4)