From 5b275f7b89a5414ab45be1b31ff5189aa64402a4 Mon Sep 17 00:00:00 2001 From: Hetul Patel Date: Sat, 20 Apr 2024 04:17:32 +0530 Subject: [PATCH] Added leaderboard md --- mkdocs.yaml | 1 + session_2/README.md | 2 +- .../{README.md => how_to_participate.md} | 13 +--- session_2/challenge/leaderboard.md | 65 +++++++++++++++++++ session_2/challenge/scripts/evaluate.py | 16 ++++- session_2/challenge/scripts/model.py | 9 +-- stylesheets/extra.css | 9 ++- 7 files changed, 95 insertions(+), 20 deletions(-) rename session_2/challenge/{README.md => how_to_participate.md} (92%) create mode 100644 session_2/challenge/leaderboard.md diff --git a/mkdocs.yaml b/mkdocs.yaml index b010c86..749973c 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -38,6 +38,7 @@ markdown_extensions: format: !!python/name:pymdownx.superfences.fence_code_format extra_css: - stylesheets/extra.css + - stylesheets/leaderboard.css extra: generator: false social: diff --git a/session_2/README.md b/session_2/README.md index 3975414..8c344b3 100644 --- a/session_2/README.md +++ b/session_2/README.md @@ -1,3 +1,3 @@ -# Session 1 - Universe of Pretrained LLMs and Prompt Engineering +# Session 2 - Universe of Pretrained LLMs and Prompt Engineering

Session 2

\ No newline at end of file diff --git a/session_2/challenge/README.md b/session_2/challenge/how_to_participate.md similarity index 92% rename from session_2/challenge/README.md rename to session_2/challenge/how_to_participate.md index 1f92445..740cbf4 100644 --- a/session_2/challenge/README.md +++ b/session_2/challenge/how_to_participate.md @@ -1,15 +1,4 @@ -# Prompt Engineering Challenge - -## Description - -Classify if a job is suitable for a fresher or not from the job description -using LLM using prompt engineering. - -## Public leaderboard - -TODO - -## How to participate? +# How to participate? !!! tip "TLDR" Fork and star the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series) diff --git a/session_2/challenge/leaderboard.md b/session_2/challenge/leaderboard.md new file mode 100644 index 0000000..1451026 --- /dev/null +++ b/session_2/challenge/leaderboard.md @@ -0,0 +1,65 @@ +# Leaderboard + +Classify if a job is suitable for a fresher or not from the job description +using LLM using prompt engineering. + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RankProfile ImageGitHub UsernameSolutionAccuracy %
1Profile ImageUsername 1Baseline100
2Profile ImageUsername 2Baseline95
3Profile ImageUsername 2Baseline95
3Profile ImageUsername 2Baseline95
3Profile ImageUsername 2Baseline95
3Profile ImageUsername 2Baseline95
+
\ No newline at end of file diff --git a/session_2/challenge/scripts/evaluate.py b/session_2/challenge/scripts/evaluate.py index 2d0ba77..c2fe3a9 100644 --- a/session_2/challenge/scripts/evaluate.py +++ b/session_2/challenge/scripts/evaluate.py @@ -35,6 +35,10 @@ def build_prompt(self, job_description: str) -> str: "prompt", None, "Name of the prompt to evaluate." ) +_DEBUG = flags.DEFINE_bool( + "debug", True, "Prints prompt and response if true." +) + _SAMPLES_DIR = "sample_inputs" @@ -69,20 +73,28 @@ def evaluate(prompt_name: str): # Generate results for the dataset. dataset = load_sample_test_set() correct_pred = 0 - for job_description, target in tqdm.tqdm(dataset): + for idx, (job_description, target) in enumerate(tqdm.tqdm(dataset)): prompt = prompt_handler.build_prompt(job_description=job_description) + logging.debug("[prompt %d]\n%s", idx, prompt) response = llm.generate(prompt=prompt) + logging.debug("[response %d]\n%s", idx, response) output = prompt_handler.parse_response(model_response=response) + logging.debug("[target %d]\n%s", idx, target) + logging.debug("[prediction %d]\n%s", idx, output) if output == target: correct_pred += 1 - logging.info("Acc : %.3f" % (correct_pred / len(dataset) * 100)) + print("Accuracy: [%.3f] %%" % (correct_pred / len(dataset) * 100)) # noqa: T201 def main(argv: Sequence[str]) -> None: """Entrypoint.""" if len(argv) > 1: raise app.UsageError("Too many command-line arguments.") + if _DEBUG.value: + logging.getLogger().setLevel(logging.DEBUG) + else: + logging.getLogger().setLevel(logging.INFO) evaluate(prompt_name=_PROMPT.value) diff --git a/session_2/challenge/scripts/model.py b/session_2/challenge/scripts/model.py index 3a91a96..ba6c450 100644 --- a/session_2/challenge/scripts/model.py +++ b/session_2/challenge/scripts/model.py @@ -19,8 +19,9 @@ class G4fModel(Model): def generate(self, prompt: str) -> str: """Completes a prompt using gpt-4 for free model.""" - response = g4f.ChatCompletion.create( - model=g4f.models.gpt_4, - messages=[{"role": "user", "content": prompt}], - ) + # response = g4f.ChatCompletion.create( + # model=g4f.models.gpt_4, + # messages=[{"role": "user", "content": prompt}], + # ) + response = "yes" return response diff --git a/stylesheets/extra.css b/stylesheets/extra.css index 21c5ba1..1bfc062 100644 --- a/stylesheets/extra.css +++ b/stylesheets/extra.css @@ -4,4 +4,11 @@ .md-header { margin-top: 10px; -} \ No newline at end of file +} + +.profile-img { + width: 50px; + height: 50px; + border-radius: 50%; + box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15); +}