diff --git a/.github/workflows/check_star_for_challange_submission.yaml b/.github/workflows/check_star_for_challange_submission.yaml new file mode 100644 index 0000000..09e1cc7 --- /dev/null +++ b/.github/workflows/check_star_for_challange_submission.yaml @@ -0,0 +1,36 @@ +name: Check star for a prompt challenge submission + +on: + pull_request: + types: [opened, reopened, synchronize] + +jobs: + check_star_for_submission: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Check for changes in submissions directory + id: check_submissions_changes + run: | + if git diff --name-only ${{ github.base_ref }} ${{ github.head_ref }} | grep '^session_2/challenge/submissions/'; then + echo "::set-output name=changes_made::true" + else + echo "::set-output name=changes_made::false" + fi + + - name: Check if user has starred the repo + id: check_stars + if: steps.check_submissions_changes.outputs.changes_made == 'true' + run: | + user=$(jq -r .sender.login "$GITHUB_EVENT_PATH") + repo=$(jq -r .repository.full_name "$GITHUB_EVENT_PATH") + if ! curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" "https://api.github.com/user/starred/$repo" | jq -e '.[] | select(.owner.login == "$user")'; then + echo "::error::User $user must star the repository to submit a pull request." + exit 1 + fi + + - name: Check if all checks passed + run: echo "All checks passed." diff --git a/.gitignore b/.gitignore index a5b3032..13bd4cb 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -site/* \ No newline at end of file +site/* +*.pyc \ No newline at end of file diff --git a/session_2/README.md b/session_2/README.md new file mode 100644 index 0000000..3975414 --- /dev/null +++ b/session_2/README.md @@ -0,0 +1,3 @@ +# Session 1 - Universe of Pretrained LLMs and Prompt Engineering + +

Session 2

\ No newline at end of file diff --git a/session_2/challenge/README.md b/session_2/challenge/README.md new file mode 100644 index 0000000..4057025 --- /dev/null +++ b/session_2/challenge/README.md @@ -0,0 +1,76 @@ +# Prompt Engineering Challenge + +## Description + +Classify if a job is suitable for a fresher or not from the job description +using LLM using prompt engineering. + +## Public leaderboard + +TODO + +## How to participate? + +1. Fork the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series) + repo and open it in github codespaces or clone locally. +2. Go to `llm_seminar_series/session_2/challenge` dir and run the evaluation + script to test the `"baseline"` prompt as shown below. + + ```bash + # Change the present working dir + cd session_2/challenge + + # Run baseline evaluation + python3 -m scripts.evaluate --prompt="baseline" + ``` + +3. To submit your own prompt, make a copy of `submissions/baseline.py` and + change the name of the prompt from `baseline` to something else which + describes your prompt. E.g, + + ```python + # file: submissions/name_of_your_prompt.py + + @registry.register("name_of_your_prompt") + class NameOfYourPrompt(base.PromptSubmission): + ... + ``` + + Also change the class name and register it with a new name (can be same as the + filename.) + +4. Update the `build_prompt` and `parse_response` method. + + - The `build_prompt` method must take job description as input and create a + prompt for the llm. + + - The `parse_response` method must post process the output + generated by the llm and return a boolean value. + + - `True`: If the job description is for a fresher level job. + - `False`: If the job description is for an expert level job. + + +6. Run the evaluation locally using your new prompt and check the results. + + ```bash + python3 -m scripts.evaluate --prompt="name_of_your_prompt" + ``` + +7. Push your changes to the forked repo and create a pull request. + + - Add your changes: ```git add submissions/name_of_your_prompt.py``` + - Commit your changes: ```git commit -m "write a commit message"``` + - Push your changes to your forked repo: ```git push``` + - Star the [original repo](https://github.com/infocusp/llm_seminar_series) + (mandatory for submission) and raise a pull request from github to submit + your prompt. + +8. Congratulations 🎉, once a repo maintainer approves your submission and merges + your PR, your rank based on a private test set will be published on the + public leader board. + +!!! note + You can test your prompt on your own samples by adding new files under + `sample_inputs` dir. The file name must contain `"yes"` if the file + contains a JD for a fresher otherwise it should contain `"no"`. \ No newline at end of file diff --git a/session_2/challenge/requirements.txt b/session_2/challenge/requirements.txt new file mode 100644 index 0000000..ec415f7 --- /dev/null +++ b/session_2/challenge/requirements.txt @@ -0,0 +1,3 @@ +g4f>=0.2.9.9 +tqdm>=4.66.2 +absl-py>=2.1.0 \ No newline at end of file diff --git a/session_2/challenge/sample_inputs/sample_1_yes.txt b/session_2/challenge/sample_inputs/sample_1_yes.txt new file mode 100644 index 0000000..b7e0bae --- /dev/null +++ b/session_2/challenge/sample_inputs/sample_1_yes.txt @@ -0,0 +1 @@ +We need a beginner level python developer. \ No newline at end of file diff --git a/session_2/challenge/sample_inputs/sample_2_no.txt b/session_2/challenge/sample_inputs/sample_2_no.txt new file mode 100644 index 0000000..d0016ec --- /dev/null +++ b/session_2/challenge/sample_inputs/sample_2_no.txt @@ -0,0 +1 @@ +We need an python expert with 7+ years of experience. \ No newline at end of file diff --git a/session_2/challenge/scripts/base.py b/session_2/challenge/scripts/base.py new file mode 100644 index 0000000..af6ae02 --- /dev/null +++ b/session_2/challenge/scripts/base.py @@ -0,0 +1,35 @@ +"""Base class for prompt submission.""" + +import abc + + +class PromptSubmission(abc.ABC): + """Base class for prompt submission.""" + + def __init__(self): + """Initializes a prompt submission class.""" + pass + + @abc.abstractmethod + def build_prompt(self, job_description: str) -> str: + """Builds a prompt for classification of job description. + + Args: + job_description: Input for classification. + + Returns: + Input for the LLM. + """ + raise NotImplementedError + + @abc.abstractmethod + def parse_response(self, model_response: str) -> bool: + """Parses a response from the LLM to decide the final answer. + + Args: + model_response: Output of the llm for the given prompt. + + Returns: + True is the job_description is for a fresher otherwise False. + """ + raise NotImplementedError diff --git a/session_2/challenge/scripts/evaluate.py b/session_2/challenge/scripts/evaluate.py new file mode 100644 index 0000000..6504ff3 --- /dev/null +++ b/session_2/challenge/scripts/evaluate.py @@ -0,0 +1,89 @@ +"""Evaluates the submitted prompts. + +You can copy session_2/challenge/submissions/baseline.py to modify your own +prompt and evaluate it locally using this script. + +You need to pass the name used for registering a submission. + +For example, + +``` +@registry.register("baseline") +class Baseline(base.PromptSubmission): + + def build_prompt(self, job_description: str) -> str: + ... +``` + +In the above code, a Baseline class is registered with the name of `baseline`, +so you can run the below sample command to evaluate it. + +python3 -m scripts.evaluate --prompt=baseline +""" + +import glob +import os +from collections.abc import Sequence + +import tqdm +from absl import app, flags +from scripts import model, registry +from submissions import baseline # noqa: F401 + +_PROMPT = flags.DEFINE_string( + "prompt", None, "Name of the prompt to evaluate." +) + +_SAMPLES_DIR = "sample_inputs" + + +def load_sample_test_set() -> list[tuple[str, bool]]: + """Loads sample job descriptions and answers for local testing.""" + sample_files = glob.glob(os.path.join(_SAMPLES_DIR, "*.txt")) + sample_inputs = [] + for filepath in sample_files: + content = open(filepath, "r").read() + filename = os.path.basename(filepath).lower() + if filename.endswith("_yes.txt"): + target = True + elif filename.endswith("_no.txt"): + target = False + else: + raise ValueError( + "File %s must end with yes.txt or no.txt" % filepath + ) + target = True if "yes" in filename.lower() else False + sample_inputs.append((content, target)) + return sample_inputs + + +def evaluate(prompt_name: str): + """Evaluates the prompt submission.""" + # Loads a free gpt4 model. + llm = model.G4fModel() + + # Loads a prompt submission. + prompt_handler = registry.get(name=prompt_name) + + # Generate results for the dataset. + dataset = load_sample_test_set() + correct_pred = 0 + for job_description, target in tqdm.tqdm(dataset): + prompt = prompt_handler.build_prompt(job_description=job_description) + response = llm.generate(prompt=prompt) + output = prompt_handler.parse_response(model_response=response) + if output == target: + correct_pred += 1 + + print("Acc : %.3f" % (correct_pred / len(dataset) * 100)) + + +def main(argv: Sequence[str]) -> None: + """Entrypoint.""" + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + evaluate(prompt_name=_PROMPT.value) + + +if __name__ == "__main__": + app.run(main) diff --git a/session_2/challenge/scripts/model.py b/session_2/challenge/scripts/model.py new file mode 100644 index 0000000..ba6c450 --- /dev/null +++ b/session_2/challenge/scripts/model.py @@ -0,0 +1,27 @@ +"""Model inference.""" + +import g4f + + +class Model: + """Base class for LLM.""" + + def generate(self, prompt: str) -> str: + """Returns a generation for prompt.""" + return "" + + +class G4fModel(Model): + """A free gpt4 model. + + Reference: https://github.com/xtekky/gpt4free + """ + + def generate(self, prompt: str) -> str: + """Completes a prompt using gpt-4 for free model.""" + # response = g4f.ChatCompletion.create( + # model=g4f.models.gpt_4, + # messages=[{"role": "user", "content": prompt}], + # ) + response = "yes" + return response diff --git a/session_2/challenge/scripts/registry.py b/session_2/challenge/scripts/registry.py new file mode 100644 index 0000000..3659dec --- /dev/null +++ b/session_2/challenge/scripts/registry.py @@ -0,0 +1,30 @@ +"""Registry of all the submitted prompts.""" + +from typing import Type + +from scripts import base + +_SUBMISSIONS_REGISTRY: dict[str, Type[base.PromptSubmission]] = {} + + +def register(name: str): + """Returns a decorator that registers a submission with the given name.""" + + def _register(klass: Type[base.PromptSubmission]): + _SUBMISSIONS_REGISTRY[name] = klass + return klass + + return _register + + +def get(name: str) -> base.PromptSubmission: + """Returns the submission registered with the given name.""" + if name not in _SUBMISSIONS_REGISTRY: + raise NotImplementedError(f"Submission with name {name} not found.") + klass = _SUBMISSIONS_REGISTRY[name] + return klass() + + +def get_all() -> list[Type[base.PromptSubmission]]: + """Returns all the submissions.""" + return list(_SUBMISSIONS_REGISTRY.values()) diff --git a/session_2/challenge/submissions/baseline.py b/session_2/challenge/submissions/baseline.py new file mode 100644 index 0000000..1f76d67 --- /dev/null +++ b/session_2/challenge/submissions/baseline.py @@ -0,0 +1,34 @@ +"""Baseline submission for the job description classification challenge.""" + +from scripts import base, registry + + +@registry.register("baseline") +class Baseline(base.PromptSubmission): + """Baseline submission.""" + + def build_prompt(self, job_description: str) -> str: + """Builds a prompt for classification of job description.""" + prompt = f""" + + Say "YES" if the given job description is suitable for + a freshers other wise say "NO". + + {job_description}. + + """ + return prompt.strip() + + def parse_response(self, model_response: str) -> bool: + """Parses a response from the LLM to decide the final answer. + + Args: + model_response: Output of the llm for the given prompt. + + Returns: + True is the job_description is for a fresher otherwise False. + """ + model_response = model_response.lower() + if "yes" in model_response: + return True + return False