Added leaderboard md

infocusp · Apr 20, 2024 · 0cac2b0 · 0cac2b0
1 parent 72fff40
commit 0cac2b0
Show file tree

Hide file tree

Showing 8 changed files with 91 additions and 20 deletions.
diff --git a/mkdocs.yaml b/mkdocs.yaml
@@ -38,6 +38,7 @@ markdown_extensions:
           format: !!python/name:pymdownx.superfences.fence_code_format  
 extra_css:
   - stylesheets/extra.css
+  - stylesheets/leaderboard.css
 extra:
   generator: false
   social:

diff --git a/session_2/README.md b/session_2/README.md
@@ -1,3 +1,3 @@
-# Session 1 - Universe of Pretrained LLMs and Prompt Engineering
+# Session 2 - Universe of Pretrained LLMs and Prompt Engineering
 
 <p align="center"><img src="../images/home_page/Session%202.png" alt="Session 2" style="width:70%;"/></p>
diff --git a/session_2/challenge/.pages b/session_2/challenge/.pages
@@ -0,0 +1,3 @@
+nav:
+    - Leaderboard: leaderboard.md
+    - How to participate ?: how_to_participate.md
diff --git a/session_2/challenge/README.md → session_2/challenge/how_to_participate.md b/session_2/challenge/README.md → session_2/challenge/how_to_participate.md
@@ -1,15 +1,4 @@
-# Prompt Engineering Challenge
-
-## Description
-
-Classify if a job is suitable for a fresher or not from the job description 
-using LLM using prompt engineering.
-
-## Public leaderboard
-
-TODO
-
-## How to participate?
+# How to participate?
 
 !!! tip "TLDR"
      Fork and star the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series)

diff --git a/session_2/challenge/leaderboard.md b/session_2/challenge/leaderboard.md
@@ -0,0 +1,65 @@
+# Leaderboard
+
+!!! tip "Description"
+    Test your prompt engineering skills to classify if a job description is suitable
+    for a fresher or not. Check [participation guide](how_to_participate.md).
+
+<center>
+    <table>
+        <thead style="background: #fafafa;">
+            <tr>
+                <th>Rank</th>
+                <th>Profile Image</th>
+                <th>GitHub Username</th>
+                <th>Solution</th>
+                <th>Accuracy %</th>
+            </tr>
+        </thead>
+        <tbody>
+            <!-- Sample data, replace with actual leaderboard data -->
+            <tr>
+                <td>1</td>
+                <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
+                <td><a href="https://github.com/username1">Username 1</a></td>
+                <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
+                <td>100</td>
+            </tr>
+            <!-- Add more rows as needed -->
+            <tr>
+                <td>2</td>
+                <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
+                <td><a href="https://github.com/username2">Username 2</a></td>
+                <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
+                <td>95</td>
+            </tr>
+            <tr>
+                <td>3</td>
+                <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
+                <td><a href="https://github.com/username2">Username 2</a></td>
+                <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
+                <td>95</td>
+                <tr>
+                    <td>3</td>
+                    <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
+                    <td><a href="https://github.com/username2">Username 2</a></td>
+                    <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
+                    <td>95</td>
+                </tr>
+                <tr>
+                    <td>3</td>
+                    <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
+                    <td><a href="https://github.com/username2">Username 2</a></td>
+                    <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
+                    <td>95</td>
+                </tr>
+                <tr>
+                    <td>3</td>
+                    <td><img src="https://github.com/hetul-patel.png?" style="width: 50px; height: 50px; border-radius: 50%; box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15);" alt="Profile Image"></td>
+                    <td><a href="https://github.com/username2">Username 2</a></td>
+                    <td><a href="https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py">Baseline</a></td>
+                    <td>95</td>
+                </tr>
+                <!-- Add more rows up to 10 -->
+        </tbody>
+    </table>
+</center>
diff --git a/session_2/challenge/scripts/evaluate.py b/session_2/challenge/scripts/evaluate.py
@@ -35,6 +35,10 @@ def build_prompt(self, job_description: str) -> str:
     "prompt", None, "Name of the prompt to evaluate."
 )
 
+_DEBUG = flags.DEFINE_bool(
+    "debug", True, "Prints prompt and response if true."
+)
+
 _SAMPLES_DIR = "sample_inputs"
 
 
@@ -69,20 +73,28 @@ def evaluate(prompt_name: str):
     # Generate results for the dataset.
     dataset = load_sample_test_set()
     correct_pred = 0
-    for job_description, target in tqdm.tqdm(dataset):
+    for idx, (job_description, target) in enumerate(tqdm.tqdm(dataset)):
         prompt = prompt_handler.build_prompt(job_description=job_description)
+        logging.debug("[prompt %d]\n%s", idx, prompt)
         response = llm.generate(prompt=prompt)
+        logging.debug("[response %d]\n%s", idx, response)
         output = prompt_handler.parse_response(model_response=response)
+        logging.debug("[target %d]\n%s", idx, target)
+        logging.debug("[prediction %d]\n%s", idx, output)
         if output == target:
             correct_pred += 1
 
-    logging.info("Acc : %.3f" % (correct_pred / len(dataset) * 100))
+    print("Accuracy: [%.3f] %%" % (correct_pred / len(dataset) * 100))  # noqa: T201
 
 
 def main(argv: Sequence[str]) -> None:
     """Entrypoint."""
     if len(argv) > 1:
         raise app.UsageError("Too many command-line arguments.")
+    if _DEBUG.value:
+        logging.getLogger().setLevel(logging.DEBUG)
+    else:
+        logging.getLogger().setLevel(logging.INFO)
     evaluate(prompt_name=_PROMPT.value)
 
 

diff --git a/session_2/challenge/scripts/model.py b/session_2/challenge/scripts/model.py
@@ -19,8 +19,9 @@ class G4fModel(Model):
 
     def generate(self, prompt: str) -> str:
         """Completes a prompt using gpt-4 for free model."""
-        response = g4f.ChatCompletion.create(
-            model=g4f.models.gpt_4,
-            messages=[{"role": "user", "content": prompt}],
-        )
+        # response = g4f.ChatCompletion.create(
+        #     model=g4f.models.gpt_4,
+        #     messages=[{"role": "user", "content": prompt}],
+        # )
+        response = "yes"
         return response
diff --git a/stylesheets/extra.css b/stylesheets/extra.css
@@ -4,4 +4,4 @@
 
 .md-header {
   margin-top: 10px;
-}
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,4 +4,4 @@ @@
     .md-header {
       margin-top: 10px;
-    }
+    }