From 72fff407c59d6bc3378d8afc71dd748bcf451e28 Mon Sep 17 00:00:00 2001
From: Hetul Patel <hetul@infocusp.com>
Date: Sat, 20 Apr 2024 01:09:36 +0530
Subject: [PATCH 1/5] Added baseline submission

---
 .../check_star_for_challange_submission.yaml  | 25 ++++++
 .gitignore                                    |  8 +-
 session_2/README.md                           |  3 +
 session_2/challenge/README.md                 | 83 +++++++++++++++++
 session_2/challenge/requirements.txt          |  3 +
 .../challenge/sample_inputs/sample_1_yes.txt  |  1 +
 .../challenge/sample_inputs/sample_2_no.txt   |  1 +
 session_2/challenge/scripts/base.py           | 35 ++++++++
 session_2/challenge/scripts/evaluate.py       | 90 +++++++++++++++++++
 session_2/challenge/scripts/model.py          | 26 ++++++
 session_2/challenge/scripts/registry.py       | 30 +++++++
 session_2/challenge/submissions/baseline.py   | 34 +++++++
 12 files changed, 338 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/check_star_for_challange_submission.yaml
 create mode 100644 session_2/README.md
 create mode 100644 session_2/challenge/README.md
 create mode 100644 session_2/challenge/requirements.txt
 create mode 100644 session_2/challenge/sample_inputs/sample_1_yes.txt
 create mode 100644 session_2/challenge/sample_inputs/sample_2_no.txt
 create mode 100644 session_2/challenge/scripts/base.py
 create mode 100644 session_2/challenge/scripts/evaluate.py
 create mode 100644 session_2/challenge/scripts/model.py
 create mode 100644 session_2/challenge/scripts/registry.py
 create mode 100644 session_2/challenge/submissions/baseline.py
diff --git a/.github/workflows/check_star_for_challange_submission.yaml b/.github/workflows/check_star_for_challange_submission.yaml
new file mode 100644
index 0000000..e515d4f
--- /dev/null
+++ b/.github/workflows/check_star_for_challange_submission.yaml
@@ -0,0 +1,25 @@
+name: Check star for a prompt challenge submission
+
+on:
+  pull_request:
+    types: [opened, reopened, synchronize]
+
+jobs:
+  is-stargazer:
+    runs-on: ubuntu-latest
+    steps:
+
+      - uses: dorny/paths-filter@v3.0.2
+        id: changes
+        with:
+          filters: |
+            src:
+              - 'session_2/challenge/submissions/**'
+      
+      - uses: gacts/is-stargazer@v1.1.0
+        id: check-star
+
+      - if: ${{ (steps.changes.outputs.src == 'true') && (steps.check-star.outputs.is-stargazer != 'true') }}
+        uses: actions/github-script@v6
+        with:
+          script: core.setFailed('⭐ Please, star this repository!')
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index a5b3032..5b96ae4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,7 @@
-site/*
\ No newline at end of file
+site/*
+*.pyc
+
+# Ignore samples used for local testing and keep the default ones.
+session_2/challenge/sample_inputs/*.txt
+!session_2/challenge/sample_inputs/sample_1_yes.txt
+!session_2/challenge/sample_inputs/sample_2_no.txt
diff --git a/session_2/README.md b/session_2/README.md
new file mode 100644
index 0000000..3975414
--- /dev/null
+++ b/session_2/README.md
@@ -0,0 +1,3 @@
+# Session 1 - Universe of Pretrained LLMs and Prompt Engineering
+
+<p align="center"><img src="../images/home_page/Session%202.png" alt="Session 2" style="width:70%;"/></p>
\ No newline at end of file
diff --git a/session_2/challenge/README.md b/session_2/challenge/README.md
new file mode 100644
index 0000000..1f92445
--- /dev/null
+++ b/session_2/challenge/README.md
@@ -0,0 +1,83 @@
+# Prompt Engineering Challenge
+
+## Description
+
+Classify if a job is suitable for a fresher or not from the job description 
+using LLM using prompt engineering.
+
+## Public leaderboard
+
+TODO
+
+## How to participate?
+
+!!! tip "TLDR"
+     Fork and star the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series)
+     repo, add your submission in `llm_seminar_series/session_2/challenge` dir 
+     and raise a pull request.
+
+
+1. Fork the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series) 
+   repo and open it in github codespaces or clone locally.
+2. Go to `llm_seminar_series/session_2/challenge` dir and run the evaluation
+   script to test the `"baseline"` prompt as shown below.
+
+    ```bash
+    # Change the present working dir
+    cd session_2/challenge
+
+    # Run baseline evaluation
+    python3 -m scripts.evaluate --prompt="baseline"
+    ```
+
+3. To submit your own prompt, make a copy of `submissions/baseline.py`  and 
+   change the name of the prompt from `baseline` to something else which
+   describes your prompt. E.g,
+   
+    ```python
+    # file: submissions/name_of_your_prompt.py
+    
+    @registry.register("name_of_your_prompt")
+    class NameOfYourPrompt(base.PromptSubmission):
+        ...
+    ```
+
+    Also change the class name and register it with a new name (can be same as the
+    filename.)
+
+4. Update the `build_prompt` and `parse_response` method.
+
+      - The `build_prompt` method must take job description as input and create a
+      prompt for the llm.
+
+      - The `parse_response` method must post process the output
+      generated by the llm and return a boolean value.
+
+         - `True`: If the job description is for a fresher level job.
+         - `False`: If the job description is for an expert level job.
+
+
+6. Run the evaluation locally using your new prompt and check the results.
+
+      ```bash
+      python3 -m scripts.evaluate --prompt="name_of_your_prompt"
+      ```
+
+7.  Push your changes to the forked repo and create a pull request.
+
+    - Add your changes: ```git add submissions/name_of_your_prompt.py```
+    - Commit your changes: ```git commit -m "write a commit message"```
+    - Push your changes to your forked repo: ```git push``` 
+    - Star the [original repo](https://github.com/infocusp/llm_seminar_series)
+      (mandatory for submission) and raise a pull request from github to submit
+      your prompt.
+
+8. Congratulations 🎉, once a repo maintainer approves your submission and merges
+   your PR, your rank based on a private test set will be published on the
+   public leader board.
+
+!!! note
+     You can test your prompt on your own samples by adding new files under
+     `sample_inputs` dir. The file name must ends with `"yes.txt"` if the JD is
+     for a fresher, otherwise it should end with `"no.txt"`. Do not commit 
+     these files.
\ No newline at end of file
diff --git a/session_2/challenge/requirements.txt b/session_2/challenge/requirements.txt
new file mode 100644
index 0000000..ec415f7
--- /dev/null
+++ b/session_2/challenge/requirements.txt
@@ -0,0 +1,3 @@
+g4f>=0.2.9.9
+tqdm>=4.66.2
+absl-py>=2.1.0
\ No newline at end of file
diff --git a/session_2/challenge/sample_inputs/sample_1_yes.txt b/session_2/challenge/sample_inputs/sample_1_yes.txt
new file mode 100644
index 0000000..b7e0bae
--- /dev/null
+++ b/session_2/challenge/sample_inputs/sample_1_yes.txt
@@ -0,0 +1 @@
+We need a beginner level python developer.
\ No newline at end of file
diff --git a/session_2/challenge/sample_inputs/sample_2_no.txt b/session_2/challenge/sample_inputs/sample_2_no.txt
new file mode 100644
index 0000000..d0016ec
--- /dev/null
+++ b/session_2/challenge/sample_inputs/sample_2_no.txt
@@ -0,0 +1 @@
+We need an python expert with 7+ years of experience.
\ No newline at end of file
diff --git a/session_2/challenge/scripts/base.py b/session_2/challenge/scripts/base.py
new file mode 100644
index 0000000..af6ae02
--- /dev/null
+++ b/session_2/challenge/scripts/base.py
@@ -0,0 +1,35 @@
+"""Base class for prompt submission."""
+
+import abc
+
+
+class PromptSubmission(abc.ABC):
+    """Base class for prompt submission."""
+
+    def __init__(self):
+        """Initializes a prompt submission class."""
+        pass
+
+    @abc.abstractmethod
+    def build_prompt(self, job_description: str) -> str:
+        """Builds a prompt for classification of job description.
+
+        Args:
+            job_description: Input for classification.
+
+        Returns:
+            Input for the LLM.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def parse_response(self, model_response: str) -> bool:
+        """Parses a response from the LLM to decide the final answer.
+
+        Args:
+            model_response: Output of the llm for the given prompt.
+
+        Returns:
+            True is the job_description is for a fresher otherwise False.
+        """
+        raise NotImplementedError
diff --git a/session_2/challenge/scripts/evaluate.py b/session_2/challenge/scripts/evaluate.py
new file mode 100644
index 0000000..2d0ba77
--- /dev/null
+++ b/session_2/challenge/scripts/evaluate.py
@@ -0,0 +1,90 @@
+"""Evaluates the submitted prompts.
+
+You can copy session_2/challenge/submissions/baseline.py to modify your own
+prompt and evaluate it locally using this script.
+
+You need to pass the name used for registering a submission.
+
+For example,
+
+```
+@registry.register("baseline")
+class Baseline(base.PromptSubmission):
+
+    def build_prompt(self, job_description: str) -> str:
+        ...
+```
+
+In the above code, a Baseline class is registered with the name of `baseline`,
+so you can run the below sample command to evaluate it.
+
+python3 -m scripts.evaluate --prompt=baseline
+"""
+
+import glob
+import logging
+import os
+from collections.abc import Sequence
+
+import tqdm
+from absl import app, flags
+from scripts import model, registry
+from submissions import baseline  # noqa: F401
+
+_PROMPT = flags.DEFINE_string(
+    "prompt", None, "Name of the prompt to evaluate."
+)
+
+_SAMPLES_DIR = "sample_inputs"
+
+
+def load_sample_test_set() -> list[tuple[str, bool]]:
+    """Loads sample job descriptions and answers for local testing."""
+    sample_files = glob.glob(os.path.join(_SAMPLES_DIR, "*.txt"))
+    sample_inputs = []
+    for filepath in sample_files:
+        content = open(filepath, "r").read()
+        filename = os.path.basename(filepath).lower()
+        if filename.endswith("_yes.txt"):
+            target = True
+        elif filename.endswith("_no.txt"):
+            target = False
+        else:
+            raise ValueError(
+                "File %s must end with yes.txt or no.txt" % filepath
+            )
+        target = True if "yes" in filename.lower() else False
+        sample_inputs.append((content, target))
+    return sample_inputs
+
+
+def evaluate(prompt_name: str):
+    """Evaluates the prompt submission."""
+    # Loads a free gpt4 model.
+    llm = model.G4fModel()
+
+    # Loads a prompt submission.
+    prompt_handler = registry.get(name=prompt_name)
+
+    # Generate results for the dataset.
+    dataset = load_sample_test_set()
+    correct_pred = 0
+    for job_description, target in tqdm.tqdm(dataset):
+        prompt = prompt_handler.build_prompt(job_description=job_description)
+        response = llm.generate(prompt=prompt)
+        output = prompt_handler.parse_response(model_response=response)
+        if output == target:
+            correct_pred += 1
+
+    logging.info("Acc : %.3f" % (correct_pred / len(dataset) * 100))
+
+
+def main(argv: Sequence[str]) -> None:
+    """Entrypoint."""
+    if len(argv) > 1:
+        raise app.UsageError("Too many command-line arguments.")
+    evaluate(prompt_name=_PROMPT.value)
+
+
+if __name__ == "__main__":
+    app.run(main)
diff --git a/session_2/challenge/scripts/model.py b/session_2/challenge/scripts/model.py
new file mode 100644
index 0000000..3a91a96
--- /dev/null
+++ b/session_2/challenge/scripts/model.py
@@ -0,0 +1,26 @@
+"""Model inference."""
+
+import g4f
+
+
+class Model:
+    """Base class for LLM."""
+
+    def generate(self, prompt: str) -> str:
+        """Returns a generation for prompt."""
+        return ""
+
+
+class G4fModel(Model):
+    """A free gpt4 model.
+    
+    Reference: https://github.com/xtekky/gpt4free
+    """
+
+    def generate(self, prompt: str) -> str:
+        """Completes a prompt using gpt-4 for free model."""
+        response = g4f.ChatCompletion.create(
+            model=g4f.models.gpt_4,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        return response
diff --git a/session_2/challenge/scripts/registry.py b/session_2/challenge/scripts/registry.py
new file mode 100644
index 0000000..3659dec
--- /dev/null
+++ b/session_2/challenge/scripts/registry.py
@@ -0,0 +1,30 @@
+"""Registry of all the submitted prompts."""
+
+from typing import Type
+
+from scripts import base
+
+_SUBMISSIONS_REGISTRY: dict[str, Type[base.PromptSubmission]] = {}
+
+
+def register(name: str):
+    """Returns a decorator that registers a submission with the given name."""
+
+    def _register(klass: Type[base.PromptSubmission]):
+        _SUBMISSIONS_REGISTRY[name] = klass
+        return klass
+
+    return _register
+
+
+def get(name: str) -> base.PromptSubmission:
+    """Returns the submission registered with the given name."""
+    if name not in _SUBMISSIONS_REGISTRY:
+        raise NotImplementedError(f"Submission with name {name} not found.")
+    klass = _SUBMISSIONS_REGISTRY[name]
+    return klass()
+
+
+def get_all() -> list[Type[base.PromptSubmission]]:
+    """Returns all the submissions."""
+    return list(_SUBMISSIONS_REGISTRY.values())
diff --git a/session_2/challenge/submissions/baseline.py b/session_2/challenge/submissions/baseline.py
new file mode 100644
index 0000000..1f76d67
--- /dev/null
+++ b/session_2/challenge/submissions/baseline.py
@@ -0,0 +1,34 @@
+"""Baseline submission for the job description classification challenge."""
+
+from scripts import base, registry
+
+
+@registry.register("baseline")
+class Baseline(base.PromptSubmission):
+    """Baseline submission."""
+
+    def build_prompt(self, job_description: str) -> str:
+        """Builds a prompt for classification of job description."""
+        prompt = f"""
+        
+        Say "YES" if the given job description is suitable for
+        a freshers other wise say "NO".
+
+        {job_description}.
+        
+        """
+        return prompt.strip()
+
+    def parse_response(self, model_response: str) -> bool:
+        """Parses a response from the LLM to decide the final answer.
+
+        Args:
+            model_response: Output of the llm for the given prompt.
+
+        Returns:
+            True is the job_description is for a fresher otherwise False.
+        """
+        model_response = model_response.lower()
+        if "yes" in model_response:
+            return True
+        return False

From 0cac2b0287493dfdb329d14c26a99ae9d57091fd Mon Sep 17 00:00:00 2001
From: Hetul Patel <hetul@infocusp.com>
Date: Sat, 20 Apr 2024 04:17:32 +0530
Subject: [PATCH 2/5] Added leaderboard md

---
 mkdocs.yaml                                   |  1 +
 session_2/README.md                           |  2 +-
 session_2/challenge/.pages                    |  3 +
 .../{README.md => how_to_participate.md}      | 13 +---
 session_2/challenge/leaderboard.md            | 65 +++++++++++++++++++
 session_2/challenge/scripts/evaluate.py       | 16 ++++-
 session_2/challenge/scripts/model.py          |  9 +--
 stylesheets/extra.css                         |  2 +-
 8 files changed, 91 insertions(+), 20 deletions(-)
 create mode 100644 session_2/challenge/.pages
 rename session_2/challenge/{README.md => how_to_participate.md} (92%)
 create mode 100644 session_2/challenge/leaderboard.md

diff --git a/mkdocs.yaml b/mkdocs.yaml
index b010c86..749973c 100644
--- a/mkdocs.yaml
+++ b/mkdocs.yaml
@@ -38,6 +38,7 @@ markdown_extensions:
           format: !!python/name:pymdownx.superfences.fence_code_format  
 extra_css:
   - stylesheets/extra.css
+  - stylesheets/leaderboard.css
 extra:
   generator: false
   social:
diff --git a/session_2/README.md b/session_2/README.md
index 3975414..8c344b3 100644
--- a/session_2/README.md
+++ b/session_2/README.md
@@ -1,3 +1,3 @@
-# Session 1 - Universe of Pretrained LLMs and Prompt Engineering
+# Session 2 - Universe of Pretrained LLMs and Prompt Engineering
 
 <p align="center"><img src="../images/home_page/Session%202.png" alt="Session 2" style="width:70%;"/></p>
\ No newline at end of file
diff --git a/session_2/challenge/.pages b/session_2/challenge/.pages
new file mode 100644
index 0000000..967e585
--- /dev/null
+++ b/session_2/challenge/.pages
@@ -0,0 +1,3 @@
+nav:
+    - Leaderboard: leaderboard.md
+    - How to participate ?: how_to_participate.md
\ No newline at end of file
diff --git a/session_2/challenge/README.md b/session_2/challenge/how_to_participate.md
similarity index 92%
rename from session_2/challenge/README.md
rename to session_2/challenge/how_to_participate.md
index 1f92445..740cbf4 100644
--- a/session_2/challenge/README.md
+++ b/session_2/challenge/how_to_participate.md
@@ -1,15 +1,4 @@
-# Prompt Engineering Challenge
-
-## Description
-
-Classify if a job is suitable for a fresher or not from the job description 
-using LLM using prompt engineering.
-
-## Public leaderboard
-
-TODO
-
-## How to participate?
+# How to participate?
 
 !!! tip "TLDR"
      Fork and star the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series)
diff --git a/session_2/challenge/leaderboard.md b/session_2/challenge/leaderboard.md
new file mode 100644
index 0000000..7a573bf
--- /dev/null
+++ b/session_2/challenge/leaderboard.md
@@ -0,0 +1,65 @@
+# Leaderboard
+
+!!! tip "Description"
+    Test your prompt engineering skills to classify if a job description is suitable
+    for a fresher or not. Check [participation guide](how_to_participate.md).
+
+<center>
+    <table>
+        <thead style="background: #fafafa;">
+            <tr>
+                <th>Rank</th>
+                <th>Profile Image</th>
+                <th>GitHub Username</th>
+                <th>Solution</th>
+                <th>Accuracy %</th>
+            </tr>
+        </thead>
+        <tbody>
+            <!-- Sample data, replace with actual leaderboard data -->
+            <tr>
+                <td>1</td>
+                <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
+                <td><a href="https://github.com/username1">Username 1</a></td>
+                <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
+                <td>100</td>
+            </tr>
+            <!-- Add more rows as needed -->
+            <tr>
+                <td>2</td>
+                <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
+                <td><a href="https://github.com/username2">Username 2</a></td>
+                <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
+                <td>95</td>
+            </tr>
+            <tr>
+                <td>3</td>
+                <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
+                <td><a href="https://github.com/username2">Username 2</a></td>
+                <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
+                <td>95</td>
+                <tr>
+                    <td>3</td>
+                    <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
+                    <td><a href="https://github.com/username2">Username 2</a></td>
+                    <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
+                    <td>95</td>
+                </tr>
+                <tr>
+                    <td>3</td>
+                    <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
+                    <td><a href="https://github.com/username2">Username 2</a></td>
+                    <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
+                    <td>95</td>
+                </tr>
+                <tr>
+                    <td>3</td>
+                    <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
+                    <td><a href="https://github.com/username2">Username 2</a></td>
+                    <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
+                    <td>95</td>
+                </tr>
+                <!-- Add more rows up to 10 -->
+        </tbody>
+    </table>
+</center>
\ No newline at end of file
diff --git a/session_2/challenge/scripts/evaluate.py b/session_2/challenge/scripts/evaluate.py
index 2d0ba77..c2fe3a9 100644
--- a/session_2/challenge/scripts/evaluate.py
+++ b/session_2/challenge/scripts/evaluate.py
@@ -35,6 +35,10 @@ def build_prompt(self, job_description: str) -> str:
     "prompt", None, "Name of the prompt to evaluate."
 )
 
+_DEBUG = flags.DEFINE_bool(
+    "debug", True, "Prints prompt and response if true."
+)
+
 _SAMPLES_DIR = "sample_inputs"
 
 
@@ -69,20 +73,28 @@ def evaluate(prompt_name: str):
     # Generate results for the dataset.
     dataset = load_sample_test_set()
     correct_pred = 0
-    for job_description, target in tqdm.tqdm(dataset):
+    for idx, (job_description, target) in enumerate(tqdm.tqdm(dataset)):
         prompt = prompt_handler.build_prompt(job_description=job_description)
+        logging.debug("[prompt %d]\n%s", idx, prompt)
         response = llm.generate(prompt=prompt)
+        logging.debug("[response %d]\n%s", idx, response)
         output = prompt_handler.parse_response(model_response=response)
+        logging.debug("[target %d]\n%s", idx, target)
+        logging.debug("[prediction %d]\n%s", idx, output)
         if output == target:
             correct_pred += 1
 
-    logging.info("Acc : %.3f" % (correct_pred / len(dataset) * 100))
+    print("Accuracy: [%.3f] %%" % (correct_pred / len(dataset) * 100))  # noqa: T201
 
 
 def main(argv: Sequence[str]) -> None:
     """Entrypoint."""
     if len(argv) > 1:
         raise app.UsageError("Too many command-line arguments.")
+    if _DEBUG.value:
+        logging.getLogger().setLevel(logging.DEBUG)
+    else:
+        logging.getLogger().setLevel(logging.INFO)
     evaluate(prompt_name=_PROMPT.value)
 
 
diff --git a/session_2/challenge/scripts/model.py b/session_2/challenge/scripts/model.py
index 3a91a96..ba6c450 100644
--- a/session_2/challenge/scripts/model.py
+++ b/session_2/challenge/scripts/model.py
@@ -19,8 +19,9 @@ class G4fModel(Model):
 
     def generate(self, prompt: str) -> str:
         """Completes a prompt using gpt-4 for free model."""
-        response = g4f.ChatCompletion.create(
-            model=g4f.models.gpt_4,
-            messages=[{"role": "user", "content": prompt}],
-        )
+        # response = g4f.ChatCompletion.create(
+        #     model=g4f.models.gpt_4,
+        #     messages=[{"role": "user", "content": prompt}],
+        # )
+        response = "yes"
         return response
diff --git a/stylesheets/extra.css b/stylesheets/extra.css
index 21c5ba1..26a0b0b 100644
--- a/stylesheets/extra.css
+++ b/stylesheets/extra.css
@@ -4,4 +4,4 @@
 
 .md-header {
   margin-top: 10px;
-}
\ No newline at end of file
+}

From 002f879321cc52bcf631bdf15ad1db31eb57425a Mon Sep 17 00:00:00 2001
From: Hetul Patel <hetul@infocusp.com>
Date: Sat, 20 Apr 2024 11:35:37 +0530
Subject: [PATCH 3/5] Added script for leaderboard sorting

---
 session_2/challenge/leaderboard.md         | 79 ++++++----------------
 session_2/challenge/requirements.txt       |  4 +-
 session_2/challenge/scripts/leaderboard.py | 76 +++++++++++++++++++++
 stylesheets/extra.css                      |  9 ++-
 4 files changed, 105 insertions(+), 63 deletions(-)
 create mode 100644 session_2/challenge/scripts/leaderboard.py

diff --git a/session_2/challenge/leaderboard.md b/session_2/challenge/leaderboard.md
index 7a573bf..e9937bd 100644
--- a/session_2/challenge/leaderboard.md
+++ b/session_2/challenge/leaderboard.md
@@ -1,65 +1,24 @@
 # Leaderboard
 
-!!! tip "Description"
-    Test your prompt engineering skills to classify if a job description is suitable
-    for a fresher or not. Check [participation guide](how_to_participate.md).
+## Problem statement
+
+Test your prompt engineering skills to classify if a job description is suitable
+for a fresher or not. 
+
+Check [participation guide](how_to_participate.md).
+
+## Rankings
 
 <center>
-    <table>
-        <thead style="background: #fafafa;">
-            <tr>
-                <th>Rank</th>
-                <th>Profile Image</th>
-                <th>GitHub Username</th>
-                <th>Solution</th>
-                <th>Accuracy %</th>
-            </tr>
-        </thead>
-        <tbody>
-            <!-- Sample data, replace with actual leaderboard data -->
-            <tr>
-                <td>1</td>
-                <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
-                <td><a href="https://github.com/username1">Username 1</a></td>
-                <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
-                <td>100</td>
-            </tr>
-            <!-- Add more rows as needed -->
-            <tr>
-                <td>2</td>
-                <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
-                <td><a href="https://github.com/username2">Username 2</a></td>
-                <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
-                <td>95</td>
-            </tr>
-            <tr>
-                <td>3</td>
-                <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
-                <td><a href="https://github.com/username2">Username 2</a></td>
-                <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
-                <td>95</td>
-                <tr>
-                    <td>3</td>
-                    <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
-                    <td><a href="https://github.com/username2">Username 2</a></td>
-                    <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
-                    <td>95</td>
-                </tr>
-                <tr>
-                    <td>3</td>
-                    <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
-                    <td><a href="https://github.com/username2">Username 2</a></td>
-                    <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
-                    <td>95</td>
-                </tr>
-                <tr>
-                    <td>3</td>
-                    <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
-                    <td><a href="https://github.com/username2">Username 2</a></td>
-                    <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
-                    <td>95</td>
-                </tr>
-                <!-- Add more rows up to 10 -->
-        </tbody>
-    </table>
+
+<!-- leader-board-begins -->
+|   Rank | Profile Image                                                                                   | GitHub Username                            | Solution                                                                                                                                 |   Accuracy % |
+|-------:|:------------------------------------------------------------------------------------------------|:-------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------|-------------:|
+|      1 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [New User](https://github.com/new_user)    | [New Solution](https://github.com/new_solution)                                                                                          |         99.5 |
+|      2 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 2](https://github.com/username2) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) |         95   |
+|      3 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 4](https://github.com/username4) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) |         95   |
+|      4 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 3](https://github.com/username3) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) |         10   |
+|      5 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 1](https://github.com/username1) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) |          0   |
+<!-- leader-board-ends -->
+
 </center>
\ No newline at end of file
diff --git a/session_2/challenge/requirements.txt b/session_2/challenge/requirements.txt
index ec415f7..56cbd84 100644
--- a/session_2/challenge/requirements.txt
+++ b/session_2/challenge/requirements.txt
@@ -1,3 +1,5 @@
 g4f>=0.2.9.9
 tqdm>=4.66.2
-absl-py>=2.1.0
\ No newline at end of file
+absl-py>=2.1.0
+pandas>=2.2.2
+tabulate>=0.9.0
\ No newline at end of file
diff --git a/session_2/challenge/scripts/leaderboard.py b/session_2/challenge/scripts/leaderboard.py
new file mode 100644
index 0000000..08e8631
--- /dev/null
+++ b/session_2/challenge/scripts/leaderboard.py
@@ -0,0 +1,76 @@
+"""Generates leaderboard."""
+
+import re
+
+import pandas as pd
+
+# Read the markdown table into a DataFrame
+with open("session_2/challenge/leaderboard.md", "r") as file:
+    content = file.read()
+
+start_marker = "<!-- leader-board-begins -->\n"
+start_index = content.find(start_marker)
+end_index = content.find("\n<!-- leader-board-ends -->")
+table_content = content[start_index:end_index]
+
+
+# Extract rows using regex
+rows = re.findall(
+    r"\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|", table_content
+)[2:]
+
+# Create a DataFrame from the extracted rows
+df = pd.DataFrame(
+    rows,
+    columns=[
+        "Rank",
+        "Profile Image",
+        "GitHub Username",
+        "Solution",
+        "Accuracy %",
+    ],
+)
+
+# Strip extra spaces before and after text in each cell
+df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
+
+# Convert "Rank" column to integer and "Accuracy %" column to float
+df["Rank"] = df["Rank"].astype(int)
+df["Accuracy %"] = df["Accuracy %"].astype(float)
+
+# Add a new entry to the DataFrame
+new_entry = {
+    "Rank": len(df) + 1,
+    "Profile Image": '<img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image">',
+    "GitHub Username": "[New User](https://github.com/new_user)",
+    "Solution": "[New Solution](https://github.com/new_solution)",
+    "Accuracy %": 99.5,
+}  # Example accuracy value
+
+df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
+
+# Keep only the highest submission for each user
+highest_indices = df.groupby("GitHub Username")["Accuracy %"].idxmax()
+df_highest = df.loc[highest_indices]
+
+# Sort the DataFrame by "Accuracy %" column in descending order
+df_sorted = df_highest.sort_values(
+    by="Accuracy %", ascending=False
+).reset_index(drop=True)
+
+# Update the "Rank" column after sorting
+df_sorted["Rank"] = df_sorted.index + 1
+
+# Convert the DataFrame back to markdown format
+markdown_table = df_sorted.to_markdown(index=False)
+
+# Replace the existing table in the markdown file with the sorted table
+new_content = (
+    content[: start_index + len(start_marker)]
+    + markdown_table
+    + content[end_index:]
+)
+
+# Write the updated content back to the markdown file
+with open("session_2/challenge/leaderboard.md", "w") as file:
+    file.write(new_content)
diff --git a/stylesheets/extra.css b/stylesheets/extra.css
index 26a0b0b..ce1f7d4 100644
--- a/stylesheets/extra.css
+++ b/stylesheets/extra.css
@@ -1,7 +1,12 @@
 .md-grid {
-    max-width: 1520px; 
-  }
+  max-width: 1520px;
+}
 
 .md-header {
   margin-top: 10px;
 }
+
+.profile-image {
+  border-radius: 50%;
+  box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);
+}
\ No newline at end of file

From 2461e6a46d4219500f4c5b49ee9c8f37bcc61852 Mon Sep 17 00:00:00 2001
From: Hetul Patel <hetul@infocusp.com>
Date: Sat, 20 Apr 2024 15:17:11 +0530
Subject: [PATCH 4/5] Added github action to evaluate on private dataset

---
 ... check_star_for_challenge_submission.yaml} |   2 +-
 .github/workflows/github_pages.yaml           |   2 +-
 .github/workflows/update_leaderboard.yaml     |  82 ++++++++
 session_2/challenge/how_to_participate.md     |  16 +-
 session_2/challenge/leaderboard.md            |  15 +-
 session_2/challenge/scripts/dataset.py        |  24 +++
 session_2/challenge/scripts/evaluate.py       |  61 +-----
 session_2/challenge/scripts/evaluate_lib.py   |  36 ++++
 session_2/challenge/scripts/leaderboard.py    | 192 ++++++++++++------
 session_2/challenge/scripts/model.py          |   2 +-
 session_2/challenge/scripts/registry.py       |   5 +-
 session_2/challenge/submissions/baseline.py   |   2 +-
 12 files changed, 295 insertions(+), 144 deletions(-)
 rename .github/workflows/{check_star_for_challange_submission.yaml => check_star_for_challenge_submission.yaml} (93%)
 create mode 100644 .github/workflows/update_leaderboard.yaml
 create mode 100644 session_2/challenge/scripts/dataset.py
 create mode 100644 session_2/challenge/scripts/evaluate_lib.py

diff --git a/.github/workflows/check_star_for_challange_submission.yaml b/.github/workflows/check_star_for_challenge_submission.yaml
similarity index 93%
rename from .github/workflows/check_star_for_challange_submission.yaml
rename to .github/workflows/check_star_for_challenge_submission.yaml
index e515d4f..d58f8c5 100644
--- a/.github/workflows/check_star_for_challange_submission.yaml
+++ b/.github/workflows/check_star_for_challenge_submission.yaml
@@ -20,6 +20,6 @@ jobs:
         id: check-star
 
       - if: ${{ (steps.changes.outputs.src == 'true') && (steps.check-star.outputs.is-stargazer != 'true') }}
-        uses: actions/github-script@v6
+        uses: actions/github-script@v7
         with:
           script: core.setFailed('⭐ Please, star this repository!')
\ No newline at end of file
diff --git a/.github/workflows/github_pages.yaml b/.github/workflows/github_pages.yaml
index e2da726..f65b651 100644
--- a/.github/workflows/github_pages.yaml
+++ b/.github/workflows/github_pages.yaml
@@ -1,4 +1,4 @@
-name: ci 
+name: Deploy to github pages 
 on:
   push:
     branches:
diff --git a/.github/workflows/update_leaderboard.yaml b/.github/workflows/update_leaderboard.yaml
new file mode 100644
index 0000000..c1fde3f
--- /dev/null
+++ b/.github/workflows/update_leaderboard.yaml
@@ -0,0 +1,82 @@
+name: Update leaderboard.
+
+on:
+  pull_request:
+    types: [opened, reopened, synchronize]
+
+jobs:
+  leaderboard_evaluation:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check if there are any changes in submissions dir
+        uses: dorny/paths-filter@v3.0.2
+        id: changes
+        with:
+          filters: |
+            src:
+              - 'session_2/challenge/submissions/**'
+          list-files: "shell"
+
+      - name: Print changed files
+        run: |
+          echo '${{ toJSON(steps.changes.outputs) }}'
+
+      - if: ${{ (steps.changes.outputs.src_count > 1) }}
+        uses: actions/github-script@v7
+        with:
+          script: core.setFailed('More than one submissions are not allowed at once.')          
+                        
+      # Update leaderboard only if single file is changed in submission dir
+      - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }}
+        name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          ref: ${{ github.event.pull_request.head.ref }}        
+
+      - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }}
+        name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }}
+        name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r session_2/challenge/requirements.txt
+
+      - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }}
+        name: Run leaderboard update script
+        id: leaderboard-update
+        run: |
+          cd session_2/challenge
+          filename=$(basename "${{ steps.changes.outputs.src_files }}")
+          filename_without_extension="${filename%.*}"  # Remove extension          
+          python -m scripts.leaderboard --github_user="${{ github.actor }}" --prompt="$filename_without_extension"
+
+      - name: Commit changes
+        uses: EndBug/add-and-commit@v9
+        with:
+          author_name: GitHub Actions
+          author_email: actions@github.com
+          message: 'Updated leader board'
+          add: 'session_2/challenge/leaderboard.md'
+
+      #   # Commit the updated leaderboard
+      # - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }}
+      #   name: Commit updated leaderboard
+      #   id: commit-leaderboard
+      #   run: |
+      #     git config --global user.name "GitHub Actions"
+      #     git config --global user.email "actions@github.com"
+      #     git add session_2/challenge/leaderboard.md
+      #     git commit -m "Update leaderboard"
+      #     git push -f origin HEAD:${{ github.ref }}
+
+
+      #   # Print the commit SHA for reference
+      # - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }}
+      #   name: Print Commit SHA
+      #   run: |
+      #     echo "Commit SHA: ${{ steps.commit-leaderboard.outputs.commit_sha }}"
\ No newline at end of file
diff --git a/session_2/challenge/how_to_participate.md b/session_2/challenge/how_to_participate.md
index 740cbf4..a0c0c94 100644
--- a/session_2/challenge/how_to_participate.md
+++ b/session_2/challenge/how_to_participate.md
@@ -20,19 +20,18 @@
     ```
 
 3. To submit your own prompt, make a copy of `submissions/baseline.py`  and 
-   change the name of the prompt from `baseline` to something else which
+   change the name of the file from `baseline` to something else which
    describes your prompt. E.g,
    
     ```python
     # file: submissions/name_of_your_prompt.py
     
-    @registry.register("name_of_your_prompt")
+    @registry.register()
     class NameOfYourPrompt(base.PromptSubmission):
         ...
     ```
 
-    Also change the class name and register it with a new name (can be same as the
-    filename.)
+    Also change the class name.
 
 4. Update the `build_prompt` and `parse_response` method.
 
@@ -62,11 +61,4 @@
       your prompt.
 
 8. Congratulations 🎉, once a repo maintainer approves your submission and merges
-   your PR, your rank based on a private test set will be published on the
-   public leader board.
-
-!!! note
-     You can test your prompt on your own samples by adding new files under
-     `sample_inputs` dir. The file name must ends with `"yes.txt"` if the JD is
-     for a fresher, otherwise it should end with `"no.txt"`. Do not commit 
-     these files.
\ No newline at end of file
+   your PR, your rank will be published on the public leader board.
diff --git a/session_2/challenge/leaderboard.md b/session_2/challenge/leaderboard.md
index e9937bd..b17a344 100644
--- a/session_2/challenge/leaderboard.md
+++ b/session_2/challenge/leaderboard.md
@@ -12,13 +12,14 @@ Check [participation guide](how_to_participate.md).
 <center>
 
 <!-- leader-board-begins -->
-|   Rank | Profile Image                                                                                   | GitHub Username                            | Solution                                                                                                                                 |   Accuracy % |
-|-------:|:------------------------------------------------------------------------------------------------|:-------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------|-------------:|
-|      1 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [New User](https://github.com/new_user)    | [New Solution](https://github.com/new_solution)                                                                                          |         99.5 |
-|      2 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 2](https://github.com/username2) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) |         95   |
-|      3 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 4](https://github.com/username4) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) |         95   |
-|      4 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 3](https://github.com/username3) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) |         10   |
-|      5 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 1](https://github.com/username1) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) |          0   |
+|   Rank | Profile Image                                                                                   | GitHub Username                               | Solution                                                                                                                                 |   Accuracy % |
+|-------:|:------------------------------------------------------------------------------------------------|:----------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------|-------------:|
+|      1 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [New User](https://github.com/new_user)       | [New Solution](https://github.com/new_solution)                                                                                          |         99.5 |
+|      2 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 2](https://github.com/username2)    | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) |         95   |
+|      3 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 4](https://github.com/username4)    | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) |         95   |
+|      4 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [hetul-patel](https://github.com/hetul-patel) | [baseline](https://github.com/infocusp/llm_seminar_series/blob/main/session_2/challenge/submissions/baseline.py)                         |         50   |
+|      6 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 3](https://github.com/username3)    | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) |         10   |
+|      7 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 1](https://github.com/username1)    | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) |          0   |
 <!-- leader-board-ends -->
 
 </center>
\ No newline at end of file
diff --git a/session_2/challenge/scripts/dataset.py b/session_2/challenge/scripts/dataset.py
new file mode 100644
index 0000000..74bd476
--- /dev/null
+++ b/session_2/challenge/scripts/dataset.py
@@ -0,0 +1,24 @@
+"""Utilities to load evaluation datasets."""
+
+import glob
+import os
+
+
+def load_sample_test_set(samples_dir: str) -> list[tuple[str, bool]]:
+    """Loads sample job descriptions and answers for local testing."""
+    sample_files = glob.glob(os.path.join(samples_dir, "*.txt"))
+    sample_inputs = []
+    for filepath in sample_files:
+        content = open(filepath, "r").read()
+        filename = os.path.basename(filepath).lower()
+        if filename.endswith("_yes.txt"):
+            target = True
+        elif filename.endswith("_no.txt"):
+            target = False
+        else:
+            raise ValueError(
+                "File %s must end with yes.txt or no.txt" % filepath
+            )
+        target = True if "yes" in filename.lower() else False
+        sample_inputs.append((content, target))
+    return sample_inputs
diff --git a/session_2/challenge/scripts/evaluate.py b/session_2/challenge/scripts/evaluate.py
index c2fe3a9..1ca6baa 100644
--- a/session_2/challenge/scripts/evaluate.py
+++ b/session_2/challenge/scripts/evaluate.py
@@ -21,15 +21,11 @@ def build_prompt(self, job_description: str) -> str:
 python3 -m scripts.evaluate --prompt=baseline
 """
 
-import glob
 import logging
-import os
 from collections.abc import Sequence
 
-import tqdm
 from absl import app, flags
-from scripts import model, registry
-from submissions import baseline  # noqa: F401
+from scripts import dataset, evaluate_lib
 
 _PROMPT = flags.DEFINE_string(
     "prompt", None, "Name of the prompt to evaluate."
@@ -39,52 +35,12 @@ def build_prompt(self, job_description: str) -> str:
     "debug", True, "Prints prompt and response if true."
 )
 
-_SAMPLES_DIR = "sample_inputs"
-
-
-def load_sample_test_set() -> list[tuple[str, bool]]:
-    """Loads sample job descriptions and answers for local testing."""
-    sample_files = glob.glob(os.path.join(_SAMPLES_DIR, "*.txt"))
-    sample_inputs = []
-    for filepath in sample_files:
-        content = open(filepath, "r").read()
-        filename = os.path.basename(filepath).lower()
-        if filename.endswith("_yes.txt"):
-            target = True
-        elif filename.endswith("_no.txt"):
-            target = False
-        else:
-            raise ValueError(
-                "File %s must end with yes.txt or no.txt" % filepath
-            )
-        target = True if "yes" in filename.lower() else False
-        sample_inputs.append((content, target))
-    return sample_inputs
-
-
-def evaluate(prompt_name: str):
-    """Evaluates the prompt submission."""
-    # Loads a free gpt4 model.
-    llm = model.G4fModel()
-
-    # Loads a prompt submission.
-    prompt_handler = registry.get(name=prompt_name)
-
-    # Generate results for the dataset.
-    dataset = load_sample_test_set()
-    correct_pred = 0
-    for idx, (job_description, target) in enumerate(tqdm.tqdm(dataset)):
-        prompt = prompt_handler.build_prompt(job_description=job_description)
-        logging.debug("[prompt %d]\n%s", idx, prompt)
-        response = llm.generate(prompt=prompt)
-        logging.debug("[response %d]\n%s", idx, response)
-        output = prompt_handler.parse_response(model_response=response)
-        logging.debug("[target %d]\n%s", idx, target)
-        logging.debug("[prediction %d]\n%s", idx, output)
-        if output == target:
-            correct_pred += 1
-
-    print("Accuracy: [%.3f] %%" % (correct_pred / len(dataset) * 100))  # noqa: T201
+
+def evaluate_on_sample_dataset(prompt_name: str):
+    """Evaluates the prompt on a sample_dataset."""
+    sample_inputs = dataset.load_sample_test_set(samples_dir="sample_inputs")
+    acc = evaluate_lib.evaluate(dataset=sample_inputs, prompt_name=prompt_name)
+    print("Accuracy: [%.3f] %%" % acc)  # noqa: T201
 
 
 def main(argv: Sequence[str]) -> None:
@@ -95,8 +51,9 @@ def main(argv: Sequence[str]) -> None:
         logging.getLogger().setLevel(logging.DEBUG)
     else:
         logging.getLogger().setLevel(logging.INFO)
-    evaluate(prompt_name=_PROMPT.value)
+    evaluate_on_sample_dataset(prompt_name=_PROMPT.value)
 
 
 if __name__ == "__main__":
+    flags.mark_flag_as_required("prompt")
     app.run(main)
diff --git a/session_2/challenge/scripts/evaluate_lib.py b/session_2/challenge/scripts/evaluate_lib.py
new file mode 100644
index 0000000..355979e
--- /dev/null
+++ b/session_2/challenge/scripts/evaluate_lib.py
@@ -0,0 +1,36 @@
+"""Library function for evaluating a prompt on a particular dataset."""
+
+import logging
+
+import tqdm
+from scripts import model, registry
+from submissions import *  # noqa: F401, F403
+from submissions import baseline  # noqa: F401
+
+
+def evaluate(dataset: list[tuple[str, bool]], prompt_name: str):
+    """Evaluates the prompt submission."""
+    # Loads a free gpt4 model.
+    llm = model.G4fModel()
+
+    # Loads a prompt submission.
+    prompt_handler = registry.get(name=prompt_name)
+
+    # Generate results for the dataset.
+    correct_pred = 0
+    for idx, (job_description, target) in enumerate(tqdm.tqdm(dataset)):
+        prompt = prompt_handler.build_prompt(job_description=job_description)
+        response = llm.generate(prompt=prompt)
+        prediction = prompt_handler.parse_response(model_response=response)
+        if prediction == target:
+            correct_pred += 1
+            result = "[PASS]"
+        else:
+            result = "[FAIL]"
+
+        logging.debug(
+            "No=%d. target=%s prediction=%s %s\n[prompt]\n%s\n[response]\n%s"
+            % (idx, target, prediction, result, prompt, response)
+        )
+    acc = correct_pred / len(dataset) * 100
+    return acc
diff --git a/session_2/challenge/scripts/leaderboard.py b/session_2/challenge/scripts/leaderboard.py
index 08e8631..c6b25e1 100644
--- a/session_2/challenge/scripts/leaderboard.py
+++ b/session_2/challenge/scripts/leaderboard.py
@@ -1,76 +1,134 @@
-"""Generates leaderboard."""
+"""Updates the public leaderboard after evaluating given submission.
 
+Sample command:
+python -m scripts.leaderboard \
+    --github_user=your_github_user \
+    --prompt_file=baseline
+"""
+
+import logging
 import re
+from collections.abc import Sequence
 
 import pandas as pd
+from absl import app, flags
+from scripts import dataset, evaluate_lib
 
-# Read the markdown table into a DataFrame
-with open("session_2/challenge/leaderboard.md", "r") as file:
-    content = file.read()
-
-start_marker = "<!-- leader-board-begins -->\n"
-start_index = content.find(start_marker)
-end_index = content.find("\n<!-- leader-board-ends -->")
-table_content = content[start_index:end_index]
-
-
-# Extract rows using regex
-rows = re.findall(
-    r"\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|", table_content
-)[2:]
-
-# Create a DataFrame from the extracted rows
-df = pd.DataFrame(
-    rows,
-    columns=[
-        "Rank",
-        "Profile Image",
-        "GitHub Username",
-        "Solution",
-        "Accuracy %",
-    ],
+_PROMPT = flags.DEFINE_string(
+    "prompt", None, "Name of the submitted prompt to evaluate."
 )
 
-# Strip extra spaces before and after text in each cell
-df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
-
-# Convert "Rank" column to integer and "Accuracy %" column to float
-df["Rank"] = df["Rank"].astype(int)
-df["Accuracy %"] = df["Accuracy %"].astype(float)
-
-# Add a new entry to the DataFrame
-new_entry = {
-    "Rank": len(df) + 1,
-    "Profile Image": '<img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image">',
-    "GitHub Username": "[New User](https://github.com/new_user)",
-    "Solution": "[New Solution](https://github.com/new_solution)",
-    "Accuracy %": 99.5,
-}  # Example accuracy value
-
-df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
-
-# Keep only the highest submission for each user
-highest_indices = df.groupby("GitHub Username")["Accuracy %"].idxmax()
-df_highest = df.loc[highest_indices]
-
-# Sort the DataFrame by "Accuracy %" column in descending order
-df_sorted = df_highest.sort_values(
-    by="Accuracy %", ascending=False
-).reset_index(drop=True)
-
-# Update the "Rank" column after sorting
-df_sorted["Rank"] = df_sorted.index + 1
-
-# Convert the DataFrame back to markdown format
-markdown_table = df_sorted.to_markdown(index=False)
-
-# Replace the existing table in the markdown file with the sorted table
-new_content = (
-    content[: start_index + len(start_marker)]
-    + markdown_table
-    + content[end_index:]
+_GITHUB_USER = flags.DEFINE_string(
+    "github_user", None, "Github username to add an entry in leaderboard."
 )
 
-# Write the updated content back to the markdown file
-with open("session_2/challenge/leaderboard.md", "w") as file:
-    file.write(new_content)
+
+_LEADERBORAD = "leaderboard.md"  # current leaderboard
+
+
+def generate_leaderboard(prompt_name: str, accuracy: float, github_user: str):
+    """Generates leaderboard."""
+    # Read the markdown table into a DataFrame
+    with open(_LEADERBORAD, "r") as file:
+        content = file.read()
+
+    start_marker = "<!-- leader-board-begins -->\n"
+    start_index = content.find(start_marker)
+    end_index = content.find("\n<!-- leader-board-ends -->")
+    table_content = content[start_index:end_index]
+
+    # Extract rows using regex
+    rows = re.findall(
+        r"\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|", table_content
+    )[2:]
+
+    # Create a DataFrame from the extracted rows
+    df = pd.DataFrame(
+        rows,
+        columns=[
+            "Rank",
+            "Profile Image",
+            "GitHub Username",
+            "Solution",
+            "Accuracy %",
+        ],
+    )
+
+    # Strip extra spaces before and after text in each cell
+    df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
+
+    # Convert "Rank" column to integer and "Accuracy %" column to float
+    df["Rank"] = df["Rank"].astype(int)
+    df["Accuracy %"] = df["Accuracy %"].astype(float)
+
+    # Add a new entry to the DataFrame
+    repo_url = "https://github.com/infocusp/llm_seminar_series/blob/main/session_2/challenge/submissions"
+    new_entry = {
+        "Rank": len(df) + 1,
+        "Profile Image": f'<img src="https://github.com/{github_user}.png" '
+        + 'width="50px" height="50px" class="profile-image">',
+        "GitHub Username": f"[{github_user}](https://github.com/{github_user})",
+        "Solution": f"[{prompt_name}]({repo_url}/{prompt_name}.py)",
+        "Accuracy %": accuracy,
+    }
+
+    df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
+
+    # Keep only the highest submission for each user
+    highest_indices = df.groupby("GitHub Username")["Accuracy %"].idxmax()
+    df_highest = df.loc[highest_indices]
+
+    # Sort the DataFrame by "Accuracy %" column in descending order
+    df_sorted = df_highest.sort_values(
+        by="Accuracy %", ascending=False
+    ).reset_index(drop=True)
+
+    # Update the "Rank" column after sorting
+    df_sorted["Rank"] = df_sorted.index + 1
+
+    # Convert the DataFrame back to markdown format
+    markdown_table = df_sorted.to_markdown(index=False)
+
+    # Replace the existing table in the markdown file with the sorted table
+    new_content = (
+        content[: start_index + len(start_marker)]
+        + markdown_table
+        + content[end_index:]
+    )
+
+    # Write the updated content back to the markdown file
+    with open(_LEADERBORAD, "w") as file:
+        file.write(new_content)
+
+    logging.info(
+        "Submission by %s with prompt %s updated in the leaderboard.",
+        github_user,
+        prompt_name,
+    )
+
+
+def update_leaderboard(prompt_name: str, github_user: str):
+    """Generates a public leaderboard by evaluating given submission."""
+    sample_dataset = dataset.load_sample_test_set(samples_dir="sample_inputs")
+    acc = evaluate_lib.evaluate(
+        dataset=sample_dataset, prompt_name=prompt_name
+    )
+    generate_leaderboard(
+        prompt_name=prompt_name, accuracy=acc, github_user=github_user
+    )
+
+
+def main(argv: Sequence[str]) -> None:
+    """Entrypoint."""
+    if len(argv) > 1:
+        raise app.UsageError("Too many command-line arguments.")
+    logging.getLogger().setLevel(logging.INFO)
+    update_leaderboard(
+        prompt_name=_PROMPT.value, github_user=_GITHUB_USER.value
+    )
+
+
+if __name__ == "__main__":
+    flags.mark_flag_as_required("prompt")
+    flags.mark_flag_as_required("github_user")
+    app.run(main)
diff --git a/session_2/challenge/scripts/model.py b/session_2/challenge/scripts/model.py
index ba6c450..67a554e 100644
--- a/session_2/challenge/scripts/model.py
+++ b/session_2/challenge/scripts/model.py
@@ -1,6 +1,6 @@
 """Model inference."""
 
-import g4f
+import g4f  # noqa: F401
 
 
 class Model:
diff --git a/session_2/challenge/scripts/registry.py b/session_2/challenge/scripts/registry.py
index 3659dec..02d1c1c 100644
--- a/session_2/challenge/scripts/registry.py
+++ b/session_2/challenge/scripts/registry.py
@@ -7,10 +7,11 @@
 _SUBMISSIONS_REGISTRY: dict[str, Type[base.PromptSubmission]] = {}
 
 
-def register(name: str):
-    """Returns a decorator that registers a submission with the given name."""
+def register():
+    """Returns a decorator that registers a submission with its file as key."""
 
     def _register(klass: Type[base.PromptSubmission]):
+        name = klass.__module__.split(".")[-1]
         _SUBMISSIONS_REGISTRY[name] = klass
         return klass
 
diff --git a/session_2/challenge/submissions/baseline.py b/session_2/challenge/submissions/baseline.py
index 1f76d67..c8f6b3e 100644
--- a/session_2/challenge/submissions/baseline.py
+++ b/session_2/challenge/submissions/baseline.py
@@ -3,7 +3,7 @@
 from scripts import base, registry
 
 
-@registry.register("baseline")
+@registry.register()
 class Baseline(base.PromptSubmission):
     """Baseline submission."""
 

From d05d50bf610942b50249496772fbed06da0af8ce Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Sat, 20 Apr 2024 11:53:02 +0000
Subject: [PATCH 5/5] Updated leader board

---
 .github/CODEOWNERS                        | 9 +++++++++
 .github/workflows/update_leaderboard.yaml | 6 ++++--
 2 files changed, 13 insertions(+), 2 deletions(-)
 create mode 100644 .github/CODEOWNERS

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 0000000..9353ce9
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,9 @@
+##############################################################
+#
+# List of approvers/reviewers for llm_seminar_series repo
+#
+##############################################################
+# 
+#
+# These owners will be the default owners for everything in the repo.
+* @hetulvp
\ No newline at end of file
diff --git a/.github/workflows/update_leaderboard.yaml b/.github/workflows/update_leaderboard.yaml
index c1fde3f..844e705 100644
--- a/.github/workflows/update_leaderboard.yaml
+++ b/.github/workflows/update_leaderboard.yaml
@@ -2,8 +2,10 @@ name: Update leaderboard.
 
 on:
   pull_request:
-    types: [opened, reopened, synchronize]
-
+    branches:
+      - development
+    types:
+      - merged
 jobs:
   leaderboard_evaluation:
     runs-on: ubuntu-latest

Rank	GitHub Username	Solution	Accuracy %
1	Username 1	Baseline	100
2	Username 2	Baseline	95
3	Username 2	Baseline	95
3	Username 2	Baseline	95
3	Username 2	Baseline	95
3	Username 2	Baseline	95