Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added baseline submission #7

Merged
merged 5 commits into from
Apr 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
##############################################################
#
# List of approvers/reviewers for llm_seminar_series repo
#
##############################################################
#
#
# These owners will be the default owners for everything in the repo.
* @hetulvp
25 changes: 25 additions & 0 deletions .github/workflows/check_star_for_challenge_submission.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Check star for a prompt challenge submission

on:
pull_request:
types: [opened, reopened, synchronize]

jobs:
is-stargazer:
runs-on: ubuntu-latest
steps:

- uses: dorny/[email protected]
id: changes
with:
filters: |
src:
- 'session_2/challenge/submissions/**'

- uses: gacts/[email protected]
id: check-star

- if: ${{ (steps.changes.outputs.src == 'true') && (steps.check-star.outputs.is-stargazer != 'true') }}
uses: actions/github-script@v7
with:
script: core.setFailed('⭐ Please, star this repository!')
2 changes: 1 addition & 1 deletion .github/workflows/github_pages.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: ci
name: Deploy to github pages
on:
push:
branches:
Expand Down
84 changes: 84 additions & 0 deletions .github/workflows/update_leaderboard.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name: Update leaderboard.

on:
pull_request:
branches:
- development
types:
- merged
jobs:
leaderboard_evaluation:
runs-on: ubuntu-latest
steps:
- name: Check if there are any changes in submissions dir
uses: dorny/[email protected]
id: changes
with:
filters: |
src:
- 'session_2/challenge/submissions/**'
list-files: "shell"

- name: Print changed files
run: |
echo '${{ toJSON(steps.changes.outputs) }}'

- if: ${{ (steps.changes.outputs.src_count > 1) }}
uses: actions/github-script@v7
with:
script: core.setFailed('More than one submissions are not allowed at once.')

# Update leaderboard only if single file is changed in submission dir
- if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }}
name: Checkout code
uses: actions/checkout@v4
with:
repository: ${{ github.event.pull_request.head.repo.full_name }}
ref: ${{ github.event.pull_request.head.ref }}

- if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }}
name: Install Python
uses: actions/setup-python@v5
with:
python-version: "3.10"

- if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }}
name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r session_2/challenge/requirements.txt

- if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }}
name: Run leaderboard update script
id: leaderboard-update
run: |
cd session_2/challenge
filename=$(basename "${{ steps.changes.outputs.src_files }}")
filename_without_extension="${filename%.*}" # Remove extension
python -m scripts.leaderboard --github_user="${{ github.actor }}" --prompt="$filename_without_extension"

- name: Commit changes
uses: EndBug/add-and-commit@v9
with:
author_name: GitHub Actions
author_email: [email protected]
message: 'Updated leader board'
add: 'session_2/challenge/leaderboard.md'

# # Commit the updated leaderboard
# - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }}
# name: Commit updated leaderboard
# id: commit-leaderboard
# run: |
# git config --global user.name "GitHub Actions"
# git config --global user.email "[email protected]"
# git add session_2/challenge/leaderboard.md
# git commit -m "Update leaderboard"
# git push -f origin HEAD:${{ github.ref }}


# # Print the commit SHA for reference
# - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }}
# name: Print Commit SHA
# run: |
# echo "Commit SHA: ${{ steps.commit-leaderboard.outputs.commit_sha }}"
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
site/*
site/*
*.pyc

# Ignore samples used for local testing and keep the default ones.
session_2/challenge/sample_inputs/*.txt
!session_2/challenge/sample_inputs/sample_1_yes.txt
!session_2/challenge/sample_inputs/sample_2_no.txt
1 change: 1 addition & 0 deletions mkdocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ markdown_extensions:
format: !!python/name:pymdownx.superfences.fence_code_format
extra_css:
- stylesheets/extra.css
- stylesheets/leaderboard.css
extra:
generator: false
social:
Expand Down
3 changes: 3 additions & 0 deletions session_2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Session 2 - Universe of Pretrained LLMs and Prompt Engineering

<p align="center"><img src="../images/home_page/Session%202.png" alt="Session 2" style="width:70%;"/></p>
3 changes: 3 additions & 0 deletions session_2/challenge/.pages
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
nav:
- Leaderboard: leaderboard.md
- How to participate ?: how_to_participate.md
64 changes: 64 additions & 0 deletions session_2/challenge/how_to_participate.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# How to participate?

!!! tip "TLDR"
Fork and star the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series)
repo, add your submission in `llm_seminar_series/session_2/challenge` dir
and raise a pull request.


1. Fork the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series)
repo and open it in github codespaces or clone locally.
2. Go to `llm_seminar_series/session_2/challenge` dir and run the evaluation
script to test the `"baseline"` prompt as shown below.

```bash
# Change the present working dir
cd session_2/challenge

# Run baseline evaluation
python3 -m scripts.evaluate --prompt="baseline"
```

3. To submit your own prompt, make a copy of `submissions/baseline.py` and
change the name of the file from `baseline` to something else which
describes your prompt. E.g,

```python
# file: submissions/name_of_your_prompt.py

@registry.register()
class NameOfYourPrompt(base.PromptSubmission):
...
```

Also change the class name.

4. Update the `build_prompt` and `parse_response` method.

- The `build_prompt` method must take job description as input and create a
prompt for the llm.

- The `parse_response` method must post process the output
generated by the llm and return a boolean value.

- `True`: If the job description is for a fresher level job.
- `False`: If the job description is for an expert level job.


6. Run the evaluation locally using your new prompt and check the results.

```bash
python3 -m scripts.evaluate --prompt="name_of_your_prompt"
```

7. Push your changes to the forked repo and create a pull request.

- Add your changes: ```git add submissions/name_of_your_prompt.py```
- Commit your changes: ```git commit -m "write a commit message"```
- Push your changes to your forked repo: ```git push```
- Star the [original repo](https://github.com/infocusp/llm_seminar_series)
(mandatory for submission) and raise a pull request from github to submit
your prompt.

8. Congratulations 🎉, once a repo maintainer approves your submission and merges
your PR, your rank will be published on the public leader board.
25 changes: 25 additions & 0 deletions session_2/challenge/leaderboard.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Leaderboard

## Problem statement

Test your prompt engineering skills to classify if a job description is suitable
for a fresher or not.

Check [participation guide](how_to_participate.md).

## Rankings

<center>

<!-- leader-board-begins -->
| Rank | Profile Image | GitHub Username | Solution | Accuracy % |
|-------:|:------------------------------------------------------------------------------------------------|:----------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------|-------------:|
| 1 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [New User](https://github.com/new_user) | [New Solution](https://github.com/new_solution) | 99.5 |
| 2 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 2](https://github.com/username2) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 95 |
| 3 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 4](https://github.com/username4) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 95 |
| 4 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [hetul-patel](https://github.com/hetul-patel) | [baseline](https://github.com/infocusp/llm_seminar_series/blob/main/session_2/challenge/submissions/baseline.py) | 50 |
| 6 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 3](https://github.com/username3) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 10 |
| 7 | <img src="https://github.com/hetul-patel.png" width="50px" height="50px" class="profile-image"> | [Username 1](https://github.com/username1) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 0 |
<!-- leader-board-ends -->

</center>
5 changes: 5 additions & 0 deletions session_2/challenge/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
g4f>=0.2.9.9
tqdm>=4.66.2
absl-py>=2.1.0
pandas>=2.2.2
tabulate>=0.9.0
1 change: 1 addition & 0 deletions session_2/challenge/sample_inputs/sample_1_yes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
We need a beginner level python developer.
1 change: 1 addition & 0 deletions session_2/challenge/sample_inputs/sample_2_no.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
We need an python expert with 7+ years of experience.
35 changes: 35 additions & 0 deletions session_2/challenge/scripts/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Base class for prompt submission."""

import abc


class PromptSubmission(abc.ABC):
"""Base class for prompt submission."""

def __init__(self):
"""Initializes a prompt submission class."""
pass

@abc.abstractmethod
def build_prompt(self, job_description: str) -> str:
"""Builds a prompt for classification of job description.

Args:
job_description: Input for classification.

Returns:
Input for the LLM.
"""
raise NotImplementedError

@abc.abstractmethod
def parse_response(self, model_response: str) -> bool:
"""Parses a response from the LLM to decide the final answer.

Args:
model_response: Output of the llm for the given prompt.

Returns:
True is the job_description is for a fresher otherwise False.
"""
raise NotImplementedError
24 changes: 24 additions & 0 deletions session_2/challenge/scripts/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Utilities to load evaluation datasets."""

import glob
import os


def load_sample_test_set(samples_dir: str) -> list[tuple[str, bool]]:
"""Loads sample job descriptions and answers for local testing."""
sample_files = glob.glob(os.path.join(samples_dir, "*.txt"))
sample_inputs = []
for filepath in sample_files:
content = open(filepath, "r").read()
filename = os.path.basename(filepath).lower()
if filename.endswith("_yes.txt"):
target = True
elif filename.endswith("_no.txt"):
target = False
else:
raise ValueError(
"File %s must end with yes.txt or no.txt" % filepath
)
target = True if "yes" in filename.lower() else False
sample_inputs.append((content, target))
return sample_inputs
59 changes: 59 additions & 0 deletions session_2/challenge/scripts/evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""Evaluates the submitted prompts.

You can copy session_2/challenge/submissions/baseline.py to modify your own
prompt and evaluate it locally using this script.

You need to pass the name used for registering a submission.

For example,

```
@registry.register("baseline")
class Baseline(base.PromptSubmission):

def build_prompt(self, job_description: str) -> str:
...
```

In the above code, a Baseline class is registered with the name of `baseline`,
so you can run the below sample command to evaluate it.

python3 -m scripts.evaluate --prompt=baseline
"""

import logging
from collections.abc import Sequence

from absl import app, flags
from scripts import dataset, evaluate_lib

_PROMPT = flags.DEFINE_string(
"prompt", None, "Name of the prompt to evaluate."
)

_DEBUG = flags.DEFINE_bool(
"debug", True, "Prints prompt and response if true."
)


def evaluate_on_sample_dataset(prompt_name: str):
"""Evaluates the prompt on a sample_dataset."""
sample_inputs = dataset.load_sample_test_set(samples_dir="sample_inputs")
acc = evaluate_lib.evaluate(dataset=sample_inputs, prompt_name=prompt_name)
print("Accuracy: [%.3f] %%" % acc) # noqa: T201


def main(argv: Sequence[str]) -> None:
"""Entrypoint."""
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
if _DEBUG.value:
logging.getLogger().setLevel(logging.DEBUG)
else:
logging.getLogger().setLevel(logging.INFO)
evaluate_on_sample_dataset(prompt_name=_PROMPT.value)


if __name__ == "__main__":
flags.mark_flag_as_required("prompt")
app.run(main)
Loading
Loading