Skip to content

Commit

Permalink
Added leaderboard md
Browse files Browse the repository at this point in the history
  • Loading branch information
hetulvp committed Apr 20, 2024
1 parent 72fff40 commit 0cac2b0
Show file tree
Hide file tree
Showing 8 changed files with 91 additions and 20 deletions.
1 change: 1 addition & 0 deletions mkdocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ markdown_extensions:
format: !!python/name:pymdownx.superfences.fence_code_format
extra_css:
- stylesheets/extra.css
- stylesheets/leaderboard.css
extra:
generator: false
social:
Expand Down
2 changes: 1 addition & 1 deletion session_2/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# Session 1 - Universe of Pretrained LLMs and Prompt Engineering
# Session 2 - Universe of Pretrained LLMs and Prompt Engineering

<p align="center"><img src="../images/home_page/Session%202.png" alt="Session 2" style="width:70%;"/></p>
3 changes: 3 additions & 0 deletions session_2/challenge/.pages
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
nav:
- Leaderboard: leaderboard.md
- How to participate ?: how_to_participate.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,4 @@
# Prompt Engineering Challenge

## Description

Classify if a job is suitable for a fresher or not from the job description
using LLM using prompt engineering.

## Public leaderboard

TODO

## How to participate?
# How to participate?

!!! tip "TLDR"
Fork and star the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series)
Expand Down
65 changes: 65 additions & 0 deletions session_2/challenge/leaderboard.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Leaderboard

!!! tip "Description"
Test your prompt engineering skills to classify if a job description is suitable
for a fresher or not. Check [participation guide](how_to_participate.md).

<center>
<table>
<thead style="background: #fafafa;">
<tr>
<th>Rank</th>
<th>Profile Image</th>
<th>GitHub Username</th>
<th>Solution</th>
<th>Accuracy %</th>
</tr>
</thead>
<tbody>
<!-- Sample data, replace with actual leaderboard data -->
<tr>
<td>1</td>
<td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
<td><a href="https://github.com/username1">Username 1</a></td>
<td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
<td>100</td>
</tr>
<!-- Add more rows as needed -->
<tr>
<td>2</td>
<td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
<td><a href="https://github.com/username2">Username 2</a></td>
<td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
<td>95</td>
</tr>
<tr>
<td>3</td>
<td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
<td><a href="https://github.com/username2">Username 2</a></td>
<td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
<td>95</td>
<tr>
<td>3</td>
<td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
<td><a href="https://github.com/username2">Username 2</a></td>
<td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
<td>95</td>
</tr>
<tr>
<td>3</td>
<td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
<td><a href="https://github.com/username2">Username 2</a></td>
<td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
<td>95</td>
</tr>
<tr>
<td>3</td>
<td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
<td><a href="https://github.com/username2">Username 2</a></td>
<td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
<td>95</td>
</tr>
<!-- Add more rows up to 10 -->
</tbody>
</table>
</center>
16 changes: 14 additions & 2 deletions session_2/challenge/scripts/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def build_prompt(self, job_description: str) -> str:
"prompt", None, "Name of the prompt to evaluate."
)

_DEBUG = flags.DEFINE_bool(
"debug", True, "Prints prompt and response if true."
)

_SAMPLES_DIR = "sample_inputs"


Expand Down Expand Up @@ -69,20 +73,28 @@ def evaluate(prompt_name: str):
# Generate results for the dataset.
dataset = load_sample_test_set()
correct_pred = 0
for job_description, target in tqdm.tqdm(dataset):
for idx, (job_description, target) in enumerate(tqdm.tqdm(dataset)):
prompt = prompt_handler.build_prompt(job_description=job_description)
logging.debug("[prompt %d]\n%s", idx, prompt)
response = llm.generate(prompt=prompt)
logging.debug("[response %d]\n%s", idx, response)
output = prompt_handler.parse_response(model_response=response)
logging.debug("[target %d]\n%s", idx, target)
logging.debug("[prediction %d]\n%s", idx, output)
if output == target:
correct_pred += 1

logging.info("Acc : %.3f" % (correct_pred / len(dataset) * 100))
print("Accuracy: [%.3f] %%" % (correct_pred / len(dataset) * 100)) # noqa: T201


def main(argv: Sequence[str]) -> None:
"""Entrypoint."""
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
if _DEBUG.value:
logging.getLogger().setLevel(logging.DEBUG)
else:
logging.getLogger().setLevel(logging.INFO)
evaluate(prompt_name=_PROMPT.value)


Expand Down
9 changes: 5 additions & 4 deletions session_2/challenge/scripts/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ class G4fModel(Model):

def generate(self, prompt: str) -> str:
"""Completes a prompt using gpt-4 for free model."""
response = g4f.ChatCompletion.create(
model=g4f.models.gpt_4,
messages=[{"role": "user", "content": prompt}],
)
# response = g4f.ChatCompletion.create(
# model=g4f.models.gpt_4,
# messages=[{"role": "user", "content": prompt}],
# )
response = "yes"
return response
2 changes: 1 addition & 1 deletion stylesheets/extra.css
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

.md-header {
margin-top: 10px;
}
}

0 comments on commit 0cac2b0

Please sign in to comment.