diff --git a/mkdocs.yaml b/mkdocs.yaml
index b010c86..749973c 100644
--- a/mkdocs.yaml
+++ b/mkdocs.yaml
@@ -38,6 +38,7 @@ markdown_extensions:
format: !!python/name:pymdownx.superfences.fence_code_format
extra_css:
- stylesheets/extra.css
+ - stylesheets/leaderboard.css
extra:
generator: false
social:
diff --git a/session_2/README.md b/session_2/README.md
index 3975414..8c344b3 100644
--- a/session_2/README.md
+++ b/session_2/README.md
@@ -1,3 +1,3 @@
-# Session 1 - Universe of Pretrained LLMs and Prompt Engineering
+# Session 2 - Universe of Pretrained LLMs and Prompt Engineering
![Session 2](../images/home_page/Session%202.png)
\ No newline at end of file
diff --git a/session_2/challenge/.pages b/session_2/challenge/.pages
new file mode 100644
index 0000000..967e585
--- /dev/null
+++ b/session_2/challenge/.pages
@@ -0,0 +1,3 @@
+nav:
+ - Leaderboard: leaderboard.md
+ - How to participate ?: how_to_participate.md
\ No newline at end of file
diff --git a/session_2/challenge/README.md b/session_2/challenge/how_to_participate.md
similarity index 92%
rename from session_2/challenge/README.md
rename to session_2/challenge/how_to_participate.md
index 1f92445..740cbf4 100644
--- a/session_2/challenge/README.md
+++ b/session_2/challenge/how_to_participate.md
@@ -1,15 +1,4 @@
-# Prompt Engineering Challenge
-
-## Description
-
-Classify if a job is suitable for a fresher or not from the job description
-using LLM using prompt engineering.
-
-## Public leaderboard
-
-TODO
-
-## How to participate?
+# How to participate?
!!! tip "TLDR"
Fork and star the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series)
diff --git a/session_2/challenge/leaderboard.md b/session_2/challenge/leaderboard.md
new file mode 100644
index 0000000..7a573bf
--- /dev/null
+++ b/session_2/challenge/leaderboard.md
@@ -0,0 +1,65 @@
+# Leaderboard
+
+!!! tip "Description"
+ Test your prompt engineering skills to classify if a job description is suitable
+ for a fresher or not. Check [participation guide](how_to_participate.md).
+
+
+
+
\ No newline at end of file
diff --git a/session_2/challenge/scripts/evaluate.py b/session_2/challenge/scripts/evaluate.py
index 2d0ba77..c2fe3a9 100644
--- a/session_2/challenge/scripts/evaluate.py
+++ b/session_2/challenge/scripts/evaluate.py
@@ -35,6 +35,10 @@ def build_prompt(self, job_description: str) -> str:
"prompt", None, "Name of the prompt to evaluate."
)
+_DEBUG = flags.DEFINE_bool(
+ "debug", True, "Prints prompt and response if true."
+)
+
_SAMPLES_DIR = "sample_inputs"
@@ -69,20 +73,28 @@ def evaluate(prompt_name: str):
# Generate results for the dataset.
dataset = load_sample_test_set()
correct_pred = 0
- for job_description, target in tqdm.tqdm(dataset):
+ for idx, (job_description, target) in enumerate(tqdm.tqdm(dataset)):
prompt = prompt_handler.build_prompt(job_description=job_description)
+ logging.debug("[prompt %d]\n%s", idx, prompt)
response = llm.generate(prompt=prompt)
+ logging.debug("[response %d]\n%s", idx, response)
output = prompt_handler.parse_response(model_response=response)
+ logging.debug("[target %d]\n%s", idx, target)
+ logging.debug("[prediction %d]\n%s", idx, output)
if output == target:
correct_pred += 1
- logging.info("Acc : %.3f" % (correct_pred / len(dataset) * 100))
+ print("Accuracy: [%.3f] %%" % (correct_pred / len(dataset) * 100)) # noqa: T201
def main(argv: Sequence[str]) -> None:
"""Entrypoint."""
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
+ if _DEBUG.value:
+ logging.getLogger().setLevel(logging.DEBUG)
+ else:
+ logging.getLogger().setLevel(logging.INFO)
evaluate(prompt_name=_PROMPT.value)
diff --git a/session_2/challenge/scripts/model.py b/session_2/challenge/scripts/model.py
index 3a91a96..ba6c450 100644
--- a/session_2/challenge/scripts/model.py
+++ b/session_2/challenge/scripts/model.py
@@ -19,8 +19,9 @@ class G4fModel(Model):
def generate(self, prompt: str) -> str:
"""Completes a prompt using gpt-4 for free model."""
- response = g4f.ChatCompletion.create(
- model=g4f.models.gpt_4,
- messages=[{"role": "user", "content": prompt}],
- )
+ # response = g4f.ChatCompletion.create(
+ # model=g4f.models.gpt_4,
+ # messages=[{"role": "user", "content": prompt}],
+ # )
+ response = "yes"
return response
diff --git a/stylesheets/extra.css b/stylesheets/extra.css
index 21c5ba1..26a0b0b 100644
--- a/stylesheets/extra.css
+++ b/stylesheets/extra.css
@@ -4,4 +4,4 @@
.md-header {
margin-top: 10px;
-}
\ No newline at end of file
+}