From cc22d2e5feeb15f3eaf380eeaf2c5219db00b198 Mon Sep 17 00:00:00 2001
From: Hetul Patel <hetul@infocusp.com>
Date: Sat, 20 Apr 2024 01:09:36 +0530
Subject: [PATCH] Added baseline submission

---
 .../check_star_for_challange_submission.yaml  | 36 ++++++++
 .gitignore                                    |  3 +-
 session_2/README.md                           |  3 +
 session_2/challenge/README.md                 | 76 ++++++++++++++++
 session_2/challenge/requirements.txt          |  3 +
 .../challenge/sample_inputs/sample_1_yes.txt  |  1 +
 .../challenge/sample_inputs/sample_2_no.txt   |  1 +
 session_2/challenge/scripts/base.py           | 35 ++++++++
 session_2/challenge/scripts/evaluate.py       | 90 +++++++++++++++++++
 session_2/challenge/scripts/model.py          | 26 ++++++
 session_2/challenge/scripts/registry.py       | 30 +++++++
 session_2/challenge/submissions/baseline.py   | 34 +++++++
 12 files changed, 337 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/check_star_for_challange_submission.yaml
 create mode 100644 session_2/README.md
 create mode 100644 session_2/challenge/README.md
 create mode 100644 session_2/challenge/requirements.txt
 create mode 100644 session_2/challenge/sample_inputs/sample_1_yes.txt
 create mode 100644 session_2/challenge/sample_inputs/sample_2_no.txt
 create mode 100644 session_2/challenge/scripts/base.py
 create mode 100644 session_2/challenge/scripts/evaluate.py
 create mode 100644 session_2/challenge/scripts/model.py
 create mode 100644 session_2/challenge/scripts/registry.py
 create mode 100644 session_2/challenge/submissions/baseline.py

diff --git a/.github/workflows/check_star_for_challange_submission.yaml b/.github/workflows/check_star_for_challange_submission.yaml
new file mode 100644
index 0000000..09e1cc7
--- /dev/null
+++ b/.github/workflows/check_star_for_challange_submission.yaml
@@ -0,0 +1,36 @@
+name: Check star for a prompt challenge submission
+
+on:
+  pull_request:
+    types: [opened, reopened, synchronize]
+
+jobs:
+  check_star_for_submission:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Check for changes in submissions directory
+        id: check_submissions_changes
+        run: |
+          if git diff --name-only ${{ github.base_ref }} ${{ github.head_ref }} | grep '^session_2/challenge/submissions/'; then
+            echo "::set-output name=changes_made::true"
+          else
+            echo "::set-output name=changes_made::false"
+          fi
+
+      - name: Check if user has starred the repo
+        id: check_stars
+        if: steps.check_submissions_changes.outputs.changes_made == 'true'
+        run: |
+          user=$(jq -r .sender.login "$GITHUB_EVENT_PATH")
+          repo=$(jq -r .repository.full_name "$GITHUB_EVENT_PATH")
+          if ! curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" "https://api.github.com/user/starred/$repo" | jq -e '.[] | select(.owner.login == "$user")'; then
+            echo "::error::User $user must star the repository to submit a pull request."
+            exit 1
+          fi
+
+      - name: Check if all checks passed
+        run: echo "All checks passed."
diff --git a/.gitignore b/.gitignore
index a5b3032..13bd4cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-site/*
\ No newline at end of file
+site/*
+*.pyc
\ No newline at end of file
diff --git a/session_2/README.md b/session_2/README.md
new file mode 100644
index 0000000..3975414
--- /dev/null
+++ b/session_2/README.md
@@ -0,0 +1,3 @@
+# Session 1 - Universe of Pretrained LLMs and Prompt Engineering
+
+<p align="center"><img src="../images/home_page/Session%202.png" alt="Session 2" style="width:70%;"/></p>
\ No newline at end of file
diff --git a/session_2/challenge/README.md b/session_2/challenge/README.md
new file mode 100644
index 0000000..4057025
--- /dev/null
+++ b/session_2/challenge/README.md
@@ -0,0 +1,76 @@
+# Prompt Engineering Challenge
+
+## Description
+
+Classify if a job is suitable for a fresher or not from the job description 
+using LLM using prompt engineering.
+
+## Public leaderboard
+
+TODO
+
+## How to participate?
+
+1. Fork the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series) 
+   repo and open it in github codespaces or clone locally.
+2. Go to `llm_seminar_series/session_2/challenge` dir and run the evaluation
+   script to test the `"baseline"` prompt as shown below.
+
+    ```bash
+    # Change the present working dir
+    cd session_2/challenge
+
+    # Run baseline evaluation
+    python3 -m scripts.evaluate --prompt="baseline"
+    ```
+
+3. To submit your own prompt, make a copy of `submissions/baseline.py`  and 
+   change the name of the prompt from `baseline` to something else which
+   describes your prompt. E.g,
+   
+    ```python
+    # file: submissions/name_of_your_prompt.py
+    
+    @registry.register("name_of_your_prompt")
+    class NameOfYourPrompt(base.PromptSubmission):
+        ...
+    ```
+
+    Also change the class name and register it with a new name (can be same as the
+    filename.)
+
+4. Update the `build_prompt` and `parse_response` method.
+
+      - The `build_prompt` method must take job description as input and create a
+      prompt for the llm.
+
+      - The `parse_response` method must post process the output
+      generated by the llm and return a boolean value.
+
+         - `True`: If the job description is for a fresher level job.
+         - `False`: If the job description is for an expert level job.
+
+
+6. Run the evaluation locally using your new prompt and check the results.
+
+      ```bash
+      python3 -m scripts.evaluate --prompt="name_of_your_prompt"
+      ```
+
+7.  Push your changes to the forked repo and create a pull request.
+
+    - Add your changes: ```git add submissions/name_of_your_prompt.py```
+    - Commit your changes: ```git commit -m "write a commit message"```
+    - Push your changes to your forked repo: ```git push``` 
+    - Star the [original repo](https://github.com/infocusp/llm_seminar_series)
+      (mandatory for submission) and raise a pull request from github to submit
+      your prompt.
+
+8. Congratulations 🎉, once a repo maintainer approves your submission and merges
+   your PR, your rank based on a private test set will be published on the
+   public leader board.
+
+!!! note
+     You can test your prompt on your own samples by adding new files under
+     `sample_inputs` dir. The file name must contain `"yes"` if the file 
+     contains a JD for a fresher otherwise it should contain `"no"`.
\ No newline at end of file
diff --git a/session_2/challenge/requirements.txt b/session_2/challenge/requirements.txt
new file mode 100644
index 0000000..ec415f7
--- /dev/null
+++ b/session_2/challenge/requirements.txt
@@ -0,0 +1,3 @@
+g4f>=0.2.9.9
+tqdm>=4.66.2
+absl-py>=2.1.0
\ No newline at end of file
diff --git a/session_2/challenge/sample_inputs/sample_1_yes.txt b/session_2/challenge/sample_inputs/sample_1_yes.txt
new file mode 100644
index 0000000..b7e0bae
--- /dev/null
+++ b/session_2/challenge/sample_inputs/sample_1_yes.txt
@@ -0,0 +1 @@
+We need a beginner level python developer.
\ No newline at end of file
diff --git a/session_2/challenge/sample_inputs/sample_2_no.txt b/session_2/challenge/sample_inputs/sample_2_no.txt
new file mode 100644
index 0000000..d0016ec
--- /dev/null
+++ b/session_2/challenge/sample_inputs/sample_2_no.txt
@@ -0,0 +1 @@
+We need an python expert with 7+ years of experience.
\ No newline at end of file
diff --git a/session_2/challenge/scripts/base.py b/session_2/challenge/scripts/base.py
new file mode 100644
index 0000000..af6ae02
--- /dev/null
+++ b/session_2/challenge/scripts/base.py
@@ -0,0 +1,35 @@
+"""Base class for prompt submission."""
+
+import abc
+
+
+class PromptSubmission(abc.ABC):
+    """Base class for prompt submission."""
+
+    def __init__(self):
+        """Initializes a prompt submission class."""
+        pass
+
+    @abc.abstractmethod
+    def build_prompt(self, job_description: str) -> str:
+        """Builds a prompt for classification of job description.
+
+        Args:
+            job_description: Input for classification.
+
+        Returns:
+            Input for the LLM.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def parse_response(self, model_response: str) -> bool:
+        """Parses a response from the LLM to decide the final answer.
+
+        Args:
+            model_response: Output of the llm for the given prompt.
+
+        Returns:
+            True is the job_description is for a fresher otherwise False.
+        """
+        raise NotImplementedError
diff --git a/session_2/challenge/scripts/evaluate.py b/session_2/challenge/scripts/evaluate.py
new file mode 100644
index 0000000..2d0ba77
--- /dev/null
+++ b/session_2/challenge/scripts/evaluate.py
@@ -0,0 +1,90 @@
+"""Evaluates the submitted prompts.
+
+You can copy session_2/challenge/submissions/baseline.py to modify your own
+prompt and evaluate it locally using this script.
+
+You need to pass the name used for registering a submission.
+
+For example,
+
+```
+@registry.register("baseline")
+class Baseline(base.PromptSubmission):
+
+    def build_prompt(self, job_description: str) -> str:
+        ...
+```
+
+In the above code, a Baseline class is registered with the name of `baseline`,
+so you can run the below sample command to evaluate it.
+
+python3 -m scripts.evaluate --prompt=baseline
+"""
+
+import glob
+import logging
+import os
+from collections.abc import Sequence
+
+import tqdm
+from absl import app, flags
+from scripts import model, registry
+from submissions import baseline  # noqa: F401
+
+_PROMPT = flags.DEFINE_string(
+    "prompt", None, "Name of the prompt to evaluate."
+)
+
+_SAMPLES_DIR = "sample_inputs"
+
+
+def load_sample_test_set() -> list[tuple[str, bool]]:
+    """Loads sample job descriptions and answers for local testing."""
+    sample_files = glob.glob(os.path.join(_SAMPLES_DIR, "*.txt"))
+    sample_inputs = []
+    for filepath in sample_files:
+        content = open(filepath, "r").read()
+        filename = os.path.basename(filepath).lower()
+        if filename.endswith("_yes.txt"):
+            target = True
+        elif filename.endswith("_no.txt"):
+            target = False
+        else:
+            raise ValueError(
+                "File %s must end with yes.txt or no.txt" % filepath
+            )
+        target = True if "yes" in filename.lower() else False
+        sample_inputs.append((content, target))
+    return sample_inputs
+
+
+def evaluate(prompt_name: str):
+    """Evaluates the prompt submission."""
+    # Loads a free gpt4 model.
+    llm = model.G4fModel()
+
+    # Loads a prompt submission.
+    prompt_handler = registry.get(name=prompt_name)
+
+    # Generate results for the dataset.
+    dataset = load_sample_test_set()
+    correct_pred = 0
+    for job_description, target in tqdm.tqdm(dataset):
+        prompt = prompt_handler.build_prompt(job_description=job_description)
+        response = llm.generate(prompt=prompt)
+        output = prompt_handler.parse_response(model_response=response)
+        if output == target:
+            correct_pred += 1
+
+    logging.info("Acc : %.3f" % (correct_pred / len(dataset) * 100))
+
+
+def main(argv: Sequence[str]) -> None:
+    """Entrypoint."""
+    if len(argv) > 1:
+        raise app.UsageError("Too many command-line arguments.")
+    evaluate(prompt_name=_PROMPT.value)
+
+
+if __name__ == "__main__":
+    app.run(main)
diff --git a/session_2/challenge/scripts/model.py b/session_2/challenge/scripts/model.py
new file mode 100644
index 0000000..3a91a96
--- /dev/null
+++ b/session_2/challenge/scripts/model.py
@@ -0,0 +1,26 @@
+"""Model inference."""
+
+import g4f
+
+
+class Model:
+    """Base class for LLM."""
+
+    def generate(self, prompt: str) -> str:
+        """Returns a generation for prompt."""
+        return ""
+
+
+class G4fModel(Model):
+    """A free gpt4 model.
+    
+    Reference: https://github.com/xtekky/gpt4free
+    """
+
+    def generate(self, prompt: str) -> str:
+        """Completes a prompt using gpt-4 for free model."""
+        response = g4f.ChatCompletion.create(
+            model=g4f.models.gpt_4,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        return response
diff --git a/session_2/challenge/scripts/registry.py b/session_2/challenge/scripts/registry.py
new file mode 100644
index 0000000..3659dec
--- /dev/null
+++ b/session_2/challenge/scripts/registry.py
@@ -0,0 +1,30 @@
+"""Registry of all the submitted prompts."""
+
+from typing import Type
+
+from scripts import base
+
+_SUBMISSIONS_REGISTRY: dict[str, Type[base.PromptSubmission]] = {}
+
+
+def register(name: str):
+    """Returns a decorator that registers a submission with the given name."""
+
+    def _register(klass: Type[base.PromptSubmission]):
+        _SUBMISSIONS_REGISTRY[name] = klass
+        return klass
+
+    return _register
+
+
+def get(name: str) -> base.PromptSubmission:
+    """Returns the submission registered with the given name."""
+    if name not in _SUBMISSIONS_REGISTRY:
+        raise NotImplementedError(f"Submission with name {name} not found.")
+    klass = _SUBMISSIONS_REGISTRY[name]
+    return klass()
+
+
+def get_all() -> list[Type[base.PromptSubmission]]:
+    """Returns all the submissions."""
+    return list(_SUBMISSIONS_REGISTRY.values())
diff --git a/session_2/challenge/submissions/baseline.py b/session_2/challenge/submissions/baseline.py
new file mode 100644
index 0000000..f4e6e7f
--- /dev/null
+++ b/session_2/challenge/submissions/baseline.py
@@ -0,0 +1,34 @@
+"""Baseline submission for the job description classification challenge."""
+
+from scripts import base, registry
+
+
+@registry.register("baseline")
+class Baseline(base.PromptSubmission):
+    """Baseline submission."""
+
+    def build_prompt(self, job_description: str) -> str:
+        """Builds a prompt for classification of job description."""
+        prompt = f"""
+        
+        Say "yes" if the given job description is suitable for
+        a freshers other wise say "no".
+
+        {job_description}.
+        
+        """
+        return prompt.strip()
+
+    def parse_response(self, model_response: str) -> bool:
+        """Parses a response from the LLM to decide the final answer.
+
+        Args:
+            model_response: Output of the llm for the given prompt.
+
+        Returns:
+            True is the job_description is for a fresher otherwise False.
+        """
+        model_response = model_response.lower()
+        if "yes" in model_response:
+            return True
+        return False