Added baseline submission

infocusp · Apr 19, 2024 · 8509ff5 · 8509ff5
1 parent 99baa0e
commit 8509ff5
Show file tree

Hide file tree

Showing 12 changed files with 326 additions and 1 deletion.
diff --git a/.github/workflows/check_star_for_challange_submission.yaml b/.github/workflows/check_star_for_challange_submission.yaml
@@ -0,0 +1,25 @@
+name: Check star for a prompt challenge submission
+
+on:
+  pull_request:
+    types: [opened, reopened, synchronize]
+
+jobs:
+  is-stargazer:
+    runs-on: ubuntu-latest
+    steps:
+
+      - uses: dorny/[email protected]
+        id: changes
+        with:
+          filters: |
+            src:
+              - 'session_2/challenge/submissions/**'
+      
+      - uses: gacts/[email protected]
+        id: check-star
+
+      - if: ${{ (steps.changes.outputs.src == 'true') && (steps.check-star.outputs.is-stargazer != 'true') }}
+        uses: actions/github-script@v6
+        with:
+          script: core.setFailed('⭐ Please, star this repository!')
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
-site/*
+site/*
+*.pyc
diff --git a/session_2/README.md b/session_2/README.md
@@ -0,0 +1,3 @@
+# Session 1 - Universe of Pretrained LLMs and Prompt Engineering
+
+<p align="center"><img src="../images/home_page/Session%202.png" alt="Session 2" style="width:70%;"/></p>
diff --git a/session_2/challenge/README.md b/session_2/challenge/README.md
@@ -0,0 +1,76 @@
+# Prompt Engineering Challenge
+
+## Description
+
+Classify if a job is suitable for a fresher or not from the job description 
+using LLM using prompt engineering.
+
+## Public leaderboard
+
+TODO
+
+## How to participate?
+
+1. Fork the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series) 
+   repo and open it in github codespaces or clone locally.
+2. Go to `llm_seminar_series/session_2/challenge` dir and run the evaluation
+   script to test the `"baseline"` prompt as shown below.
+
+    ```bash
+    # Change the present working dir
+    cd session_2/challenge
+
+    # Run baseline evaluation
+    python3 -m scripts.evaluate --prompt="baseline"
+    ```
+
+3. To submit your own prompt, make a copy of `submissions/baseline.py`  and 
+   change the name of the prompt from `baseline` to something else which
+   describes your prompt. E.g,
+
+    ```python
+    # file: submissions/name_of_your_prompt.py
+    
+    @registry.register("name_of_your_prompt")
+    class NameOfYourPrompt(base.PromptSubmission):
+        ...
+    ```
+
+    Also change the class name and register it with a new name (can be same as the
+    filename.)
+
+4. Update the `build_prompt` and `parse_response` method.
+
+      - The `build_prompt` method must take job description as input and create a
+      prompt for the llm.
+
+      - The `parse_response` method must post process the output
+      generated by the llm and return a boolean value.
+
+         - `True`: If the job description is for a fresher level job.
+         - `False`: If the job description is for an expert level job.
+
+
+6. Run the evaluation locally using your new prompt and check the results.
+
+      ```bash
+      python3 -m scripts.evaluate --prompt="name_of_your_prompt"
+      ```
+
+7.  Push your changes to the forked repo and create a pull request.
+
+    - Add your changes: ```git add submissions/name_of_your_prompt.py```
+    - Commit your changes: ```git commit -m "write a commit message"```
+    - Push your changes to your forked repo: ```git push``` 
+    - Star the [original repo](https://github.com/infocusp/llm_seminar_series)
+      (mandatory for submission) and raise a pull request from github to submit
+      your prompt.
+
+8. Congratulations 🎉, once a repo maintainer approves your submission and merges
+   your PR, your rank based on a private test set will be published on the
+   public leader board.
+
+!!! note
+     You can test your prompt on your own samples by adding new files under
+     `sample_inputs` dir. The file name must contain `"yes"` if the file 
+     contains a JD for a fresher otherwise it should contain `"no"`.
diff --git a/session_2/challenge/requirements.txt b/session_2/challenge/requirements.txt
@@ -0,0 +1,3 @@
+g4f>=0.2.9.9
+tqdm>=4.66.2
+absl-py>=2.1.0
diff --git a/session_2/challenge/sample_inputs/sample_1_yes.txt b/session_2/challenge/sample_inputs/sample_1_yes.txt
@@ -0,0 +1 @@
+We need a beginner level python developer.
diff --git a/session_2/challenge/sample_inputs/sample_2_no.txt b/session_2/challenge/sample_inputs/sample_2_no.txt
@@ -0,0 +1 @@
+We need an python expert with 7+ years of experience.
diff --git a/session_2/challenge/scripts/base.py b/session_2/challenge/scripts/base.py
@@ -0,0 +1,35 @@
+"""Base class for prompt submission."""
+
+import abc
+
+
+class PromptSubmission(abc.ABC):
+    """Base class for prompt submission."""
+
+    def __init__(self):
+        """Initializes a prompt submission class."""
+        pass
+
+    @abc.abstractmethod
+    def build_prompt(self, job_description: str) -> str:
+        """Builds a prompt for classification of job description.
+
+        Args:
+            job_description: Input for classification.
+
+        Returns:
+            Input for the LLM.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def parse_response(self, model_response: str) -> bool:
+        """Parses a response from the LLM to decide the final answer.
+
+        Args:
+            model_response: Output of the llm for the given prompt.
+
+        Returns:
+            True is the job_description is for a fresher otherwise False.
+        """
+        raise NotImplementedError
diff --git a/session_2/challenge/scripts/evaluate.py b/session_2/challenge/scripts/evaluate.py
@@ -0,0 +1,90 @@
+"""Evaluates the submitted prompts.
+
+You can copy session_2/challenge/submissions/baseline.py to modify your own
+prompt and evaluate it locally using this script.
+
+You need to pass the name used for registering a submission.
+
+For example,
+
+```
+@registry.register("baseline")
+class Baseline(base.PromptSubmission):
+
+    def build_prompt(self, job_description: str) -> str:
+        ...
+```
+
+In the above code, a Baseline class is registered with the name of `baseline`,
+so you can run the below sample command to evaluate it.
+
+python3 -m scripts.evaluate --prompt=baseline
+"""
+
+import glob
+import logging
+import os
+from collections.abc import Sequence
+
+import tqdm
+from absl import app, flags
+from scripts import model, registry
+from submissions import baseline  # noqa: F401
+
+_PROMPT = flags.DEFINE_string(
+    "prompt", None, "Name of the prompt to evaluate."
+)
+
+_SAMPLES_DIR = "sample_inputs"
+
+
+def load_sample_test_set() -> list[tuple[str, bool]]:
+    """Loads sample job descriptions and answers for local testing."""
+    sample_files = glob.glob(os.path.join(_SAMPLES_DIR, "*.txt"))
+    sample_inputs = []
+    for filepath in sample_files:
+        content = open(filepath, "r").read()
+        filename = os.path.basename(filepath).lower()
+        if filename.endswith("_yes.txt"):
+            target = True
+        elif filename.endswith("_no.txt"):
+            target = False
+        else:
+            raise ValueError(
+                "File %s must end with yes.txt or no.txt" % filepath
+            )
+        target = True if "yes" in filename.lower() else False
+        sample_inputs.append((content, target))
+    return sample_inputs
+
+
+def evaluate(prompt_name: str):
+    """Evaluates the prompt submission."""
+    # Loads a free gpt4 model.
+    llm = model.G4fModel()
+
+    # Loads a prompt submission.
+    prompt_handler = registry.get(name=prompt_name)
+
+    # Generate results for the dataset.
+    dataset = load_sample_test_set()
+    correct_pred = 0
+    for job_description, target in tqdm.tqdm(dataset):
+        prompt = prompt_handler.build_prompt(job_description=job_description)
+        response = llm.generate(prompt=prompt)
+        output = prompt_handler.parse_response(model_response=response)
+        if output == target:
+            correct_pred += 1
+
+    logging.info("Acc : %.3f" % (correct_pred / len(dataset) * 100))
+
+
+def main(argv: Sequence[str]) -> None:
+    """Entrypoint."""
+    if len(argv) > 1:
+        raise app.UsageError("Too many command-line arguments.")
+    evaluate(prompt_name=_PROMPT.value)
+
+
+if __name__ == "__main__":
+    app.run(main)
diff --git a/session_2/challenge/scripts/model.py b/session_2/challenge/scripts/model.py
@@ -0,0 +1,26 @@
+"""Model inference."""
+
+import g4f
+
+
+class Model:
+    """Base class for LLM."""
+
+    def generate(self, prompt: str) -> str:
+        """Returns a generation for prompt."""
+        return ""
+
+
+class G4fModel(Model):
+    """A free gpt4 model.
+    
+    Reference: https://github.com/xtekky/gpt4free
+    """
+
+    def generate(self, prompt: str) -> str:
+        """Completes a prompt using gpt-4 for free model."""
+        response = g4f.ChatCompletion.create(
+            model=g4f.models.gpt_4,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        return response
diff --git a/session_2/challenge/scripts/registry.py b/session_2/challenge/scripts/registry.py
@@ -0,0 +1,30 @@
+"""Registry of all the submitted prompts."""
+
+from typing import Type
+
+from scripts import base
+
+_SUBMISSIONS_REGISTRY: dict[str, Type[base.PromptSubmission]] = {}
+
+
+def register(name: str):
+    """Returns a decorator that registers a submission with the given name."""
+
+    def _register(klass: Type[base.PromptSubmission]):
+        _SUBMISSIONS_REGISTRY[name] = klass
+        return klass
+
+    return _register
+
+
+def get(name: str) -> base.PromptSubmission:
+    """Returns the submission registered with the given name."""
+    if name not in _SUBMISSIONS_REGISTRY:
+        raise NotImplementedError(f"Submission with name {name} not found.")
+    klass = _SUBMISSIONS_REGISTRY[name]
+    return klass()
+
+
+def get_all() -> list[Type[base.PromptSubmission]]:
+    """Returns all the submissions."""
+    return list(_SUBMISSIONS_REGISTRY.values())
diff --git a/session_2/challenge/submissions/baseline.py b/session_2/challenge/submissions/baseline.py
@@ -0,0 +1,34 @@
+"""Baseline submission for the job description classification challenge."""
+
+from scripts import base, registry
+
+
+@registry.register("baseline")
+class Baseline(base.PromptSubmission):
+    """Baseline submission."""
+
+    def build_prompt(self, job_description: str) -> str:
+        """Builds a prompt for classification of job description."""
+        prompt = f"""
+        
+        Say "YES" if the given job description is suitable for
+        a freshers other wise say "NO".
+
+        {job_description}.
+        
+        """
+        return prompt.strip()
+
+    def parse_response(self, model_response: str) -> bool:
+        """Parses a response from the LLM to decide the final answer.
+
+        Args:
+            model_response: Output of the llm for the given prompt.
+
+        Returns:
+            True is the job_description is for a fresher otherwise False.
+        """
+        model_response = model_response.lower()
+        if "yes" in model_response:
+            return True
+        return False
-Original file line number
+Diff line change
@@ -1 +1,2 @@
-    site/*
+    site/*
+    *.pyc
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Session 1 - Universe of Pretrained LLMs and Prompt Engineering

		<p align="center"><img src="../images/home_page/Session%202.png" alt="Session 2" style="width:70%;"/></p>
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		We need an python expert with 7+ years of experience.