From 72fff407c59d6bc3378d8afc71dd748bcf451e28 Mon Sep 17 00:00:00 2001 From: Hetul Patel Date: Sat, 20 Apr 2024 01:09:36 +0530 Subject: [PATCH 1/5] Added baseline submission --- .../check_star_for_challange_submission.yaml | 25 ++++++ .gitignore | 8 +- session_2/README.md | 3 + session_2/challenge/README.md | 83 +++++++++++++++++ session_2/challenge/requirements.txt | 3 + .../challenge/sample_inputs/sample_1_yes.txt | 1 + .../challenge/sample_inputs/sample_2_no.txt | 1 + session_2/challenge/scripts/base.py | 35 ++++++++ session_2/challenge/scripts/evaluate.py | 90 +++++++++++++++++++ session_2/challenge/scripts/model.py | 26 ++++++ session_2/challenge/scripts/registry.py | 30 +++++++ session_2/challenge/submissions/baseline.py | 34 +++++++ 12 files changed, 338 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/check_star_for_challange_submission.yaml create mode 100644 session_2/README.md create mode 100644 session_2/challenge/README.md create mode 100644 session_2/challenge/requirements.txt create mode 100644 session_2/challenge/sample_inputs/sample_1_yes.txt create mode 100644 session_2/challenge/sample_inputs/sample_2_no.txt create mode 100644 session_2/challenge/scripts/base.py create mode 100644 session_2/challenge/scripts/evaluate.py create mode 100644 session_2/challenge/scripts/model.py create mode 100644 session_2/challenge/scripts/registry.py create mode 100644 session_2/challenge/submissions/baseline.py diff --git a/.github/workflows/check_star_for_challange_submission.yaml b/.github/workflows/check_star_for_challange_submission.yaml new file mode 100644 index 0000000..e515d4f --- /dev/null +++ b/.github/workflows/check_star_for_challange_submission.yaml @@ -0,0 +1,25 @@ +name: Check star for a prompt challenge submission + +on: + pull_request: + types: [opened, reopened, synchronize] + +jobs: + is-stargazer: + runs-on: ubuntu-latest + steps: + + - uses: dorny/paths-filter@v3.0.2 + id: changes + with: + filters: | + src: + - 'session_2/challenge/submissions/**' + + - uses: gacts/is-stargazer@v1.1.0 + id: check-star + + - if: ${{ (steps.changes.outputs.src == 'true') && (steps.check-star.outputs.is-stargazer != 'true') }} + uses: actions/github-script@v6 + with: + script: core.setFailed('⭐ Please, star this repository!') \ No newline at end of file diff --git a/.gitignore b/.gitignore index a5b3032..5b96ae4 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,7 @@ -site/* \ No newline at end of file +site/* +*.pyc + +# Ignore samples used for local testing and keep the default ones. +session_2/challenge/sample_inputs/*.txt +!session_2/challenge/sample_inputs/sample_1_yes.txt +!session_2/challenge/sample_inputs/sample_2_no.txt diff --git a/session_2/README.md b/session_2/README.md new file mode 100644 index 0000000..3975414 --- /dev/null +++ b/session_2/README.md @@ -0,0 +1,3 @@ +# Session 1 - Universe of Pretrained LLMs and Prompt Engineering + +

Session 2

\ No newline at end of file diff --git a/session_2/challenge/README.md b/session_2/challenge/README.md new file mode 100644 index 0000000..1f92445 --- /dev/null +++ b/session_2/challenge/README.md @@ -0,0 +1,83 @@ +# Prompt Engineering Challenge + +## Description + +Classify if a job is suitable for a fresher or not from the job description +using LLM using prompt engineering. + +## Public leaderboard + +TODO + +## How to participate? + +!!! tip "TLDR" + Fork and star the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series) + repo, add your submission in `llm_seminar_series/session_2/challenge` dir + and raise a pull request. + + +1. Fork the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series) + repo and open it in github codespaces or clone locally. +2. Go to `llm_seminar_series/session_2/challenge` dir and run the evaluation + script to test the `"baseline"` prompt as shown below. + + ```bash + # Change the present working dir + cd session_2/challenge + + # Run baseline evaluation + python3 -m scripts.evaluate --prompt="baseline" + ``` + +3. To submit your own prompt, make a copy of `submissions/baseline.py` and + change the name of the prompt from `baseline` to something else which + describes your prompt. E.g, + + ```python + # file: submissions/name_of_your_prompt.py + + @registry.register("name_of_your_prompt") + class NameOfYourPrompt(base.PromptSubmission): + ... + ``` + + Also change the class name and register it with a new name (can be same as the + filename.) + +4. Update the `build_prompt` and `parse_response` method. + + - The `build_prompt` method must take job description as input and create a + prompt for the llm. + + - The `parse_response` method must post process the output + generated by the llm and return a boolean value. + + - `True`: If the job description is for a fresher level job. + - `False`: If the job description is for an expert level job. + + +6. Run the evaluation locally using your new prompt and check the results. + + ```bash + python3 -m scripts.evaluate --prompt="name_of_your_prompt" + ``` + +7. Push your changes to the forked repo and create a pull request. + + - Add your changes: ```git add submissions/name_of_your_prompt.py``` + - Commit your changes: ```git commit -m "write a commit message"``` + - Push your changes to your forked repo: ```git push``` + - Star the [original repo](https://github.com/infocusp/llm_seminar_series) + (mandatory for submission) and raise a pull request from github to submit + your prompt. + +8. Congratulations 🎉, once a repo maintainer approves your submission and merges + your PR, your rank based on a private test set will be published on the + public leader board. + +!!! note + You can test your prompt on your own samples by adding new files under + `sample_inputs` dir. The file name must ends with `"yes.txt"` if the JD is + for a fresher, otherwise it should end with `"no.txt"`. Do not commit + these files. \ No newline at end of file diff --git a/session_2/challenge/requirements.txt b/session_2/challenge/requirements.txt new file mode 100644 index 0000000..ec415f7 --- /dev/null +++ b/session_2/challenge/requirements.txt @@ -0,0 +1,3 @@ +g4f>=0.2.9.9 +tqdm>=4.66.2 +absl-py>=2.1.0 \ No newline at end of file diff --git a/session_2/challenge/sample_inputs/sample_1_yes.txt b/session_2/challenge/sample_inputs/sample_1_yes.txt new file mode 100644 index 0000000..b7e0bae --- /dev/null +++ b/session_2/challenge/sample_inputs/sample_1_yes.txt @@ -0,0 +1 @@ +We need a beginner level python developer. \ No newline at end of file diff --git a/session_2/challenge/sample_inputs/sample_2_no.txt b/session_2/challenge/sample_inputs/sample_2_no.txt new file mode 100644 index 0000000..d0016ec --- /dev/null +++ b/session_2/challenge/sample_inputs/sample_2_no.txt @@ -0,0 +1 @@ +We need an python expert with 7+ years of experience. \ No newline at end of file diff --git a/session_2/challenge/scripts/base.py b/session_2/challenge/scripts/base.py new file mode 100644 index 0000000..af6ae02 --- /dev/null +++ b/session_2/challenge/scripts/base.py @@ -0,0 +1,35 @@ +"""Base class for prompt submission.""" + +import abc + + +class PromptSubmission(abc.ABC): + """Base class for prompt submission.""" + + def __init__(self): + """Initializes a prompt submission class.""" + pass + + @abc.abstractmethod + def build_prompt(self, job_description: str) -> str: + """Builds a prompt for classification of job description. + + Args: + job_description: Input for classification. + + Returns: + Input for the LLM. + """ + raise NotImplementedError + + @abc.abstractmethod + def parse_response(self, model_response: str) -> bool: + """Parses a response from the LLM to decide the final answer. + + Args: + model_response: Output of the llm for the given prompt. + + Returns: + True is the job_description is for a fresher otherwise False. + """ + raise NotImplementedError diff --git a/session_2/challenge/scripts/evaluate.py b/session_2/challenge/scripts/evaluate.py new file mode 100644 index 0000000..2d0ba77 --- /dev/null +++ b/session_2/challenge/scripts/evaluate.py @@ -0,0 +1,90 @@ +"""Evaluates the submitted prompts. + +You can copy session_2/challenge/submissions/baseline.py to modify your own +prompt and evaluate it locally using this script. + +You need to pass the name used for registering a submission. + +For example, + +``` +@registry.register("baseline") +class Baseline(base.PromptSubmission): + + def build_prompt(self, job_description: str) -> str: + ... +``` + +In the above code, a Baseline class is registered with the name of `baseline`, +so you can run the below sample command to evaluate it. + +python3 -m scripts.evaluate --prompt=baseline +""" + +import glob +import logging +import os +from collections.abc import Sequence + +import tqdm +from absl import app, flags +from scripts import model, registry +from submissions import baseline # noqa: F401 + +_PROMPT = flags.DEFINE_string( + "prompt", None, "Name of the prompt to evaluate." +) + +_SAMPLES_DIR = "sample_inputs" + + +def load_sample_test_set() -> list[tuple[str, bool]]: + """Loads sample job descriptions and answers for local testing.""" + sample_files = glob.glob(os.path.join(_SAMPLES_DIR, "*.txt")) + sample_inputs = [] + for filepath in sample_files: + content = open(filepath, "r").read() + filename = os.path.basename(filepath).lower() + if filename.endswith("_yes.txt"): + target = True + elif filename.endswith("_no.txt"): + target = False + else: + raise ValueError( + "File %s must end with yes.txt or no.txt" % filepath + ) + target = True if "yes" in filename.lower() else False + sample_inputs.append((content, target)) + return sample_inputs + + +def evaluate(prompt_name: str): + """Evaluates the prompt submission.""" + # Loads a free gpt4 model. + llm = model.G4fModel() + + # Loads a prompt submission. + prompt_handler = registry.get(name=prompt_name) + + # Generate results for the dataset. + dataset = load_sample_test_set() + correct_pred = 0 + for job_description, target in tqdm.tqdm(dataset): + prompt = prompt_handler.build_prompt(job_description=job_description) + response = llm.generate(prompt=prompt) + output = prompt_handler.parse_response(model_response=response) + if output == target: + correct_pred += 1 + + logging.info("Acc : %.3f" % (correct_pred / len(dataset) * 100)) + + +def main(argv: Sequence[str]) -> None: + """Entrypoint.""" + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + evaluate(prompt_name=_PROMPT.value) + + +if __name__ == "__main__": + app.run(main) diff --git a/session_2/challenge/scripts/model.py b/session_2/challenge/scripts/model.py new file mode 100644 index 0000000..3a91a96 --- /dev/null +++ b/session_2/challenge/scripts/model.py @@ -0,0 +1,26 @@ +"""Model inference.""" + +import g4f + + +class Model: + """Base class for LLM.""" + + def generate(self, prompt: str) -> str: + """Returns a generation for prompt.""" + return "" + + +class G4fModel(Model): + """A free gpt4 model. + + Reference: https://github.com/xtekky/gpt4free + """ + + def generate(self, prompt: str) -> str: + """Completes a prompt using gpt-4 for free model.""" + response = g4f.ChatCompletion.create( + model=g4f.models.gpt_4, + messages=[{"role": "user", "content": prompt}], + ) + return response diff --git a/session_2/challenge/scripts/registry.py b/session_2/challenge/scripts/registry.py new file mode 100644 index 0000000..3659dec --- /dev/null +++ b/session_2/challenge/scripts/registry.py @@ -0,0 +1,30 @@ +"""Registry of all the submitted prompts.""" + +from typing import Type + +from scripts import base + +_SUBMISSIONS_REGISTRY: dict[str, Type[base.PromptSubmission]] = {} + + +def register(name: str): + """Returns a decorator that registers a submission with the given name.""" + + def _register(klass: Type[base.PromptSubmission]): + _SUBMISSIONS_REGISTRY[name] = klass + return klass + + return _register + + +def get(name: str) -> base.PromptSubmission: + """Returns the submission registered with the given name.""" + if name not in _SUBMISSIONS_REGISTRY: + raise NotImplementedError(f"Submission with name {name} not found.") + klass = _SUBMISSIONS_REGISTRY[name] + return klass() + + +def get_all() -> list[Type[base.PromptSubmission]]: + """Returns all the submissions.""" + return list(_SUBMISSIONS_REGISTRY.values()) diff --git a/session_2/challenge/submissions/baseline.py b/session_2/challenge/submissions/baseline.py new file mode 100644 index 0000000..1f76d67 --- /dev/null +++ b/session_2/challenge/submissions/baseline.py @@ -0,0 +1,34 @@ +"""Baseline submission for the job description classification challenge.""" + +from scripts import base, registry + + +@registry.register("baseline") +class Baseline(base.PromptSubmission): + """Baseline submission.""" + + def build_prompt(self, job_description: str) -> str: + """Builds a prompt for classification of job description.""" + prompt = f""" + + Say "YES" if the given job description is suitable for + a freshers other wise say "NO". + + {job_description}. + + """ + return prompt.strip() + + def parse_response(self, model_response: str) -> bool: + """Parses a response from the LLM to decide the final answer. + + Args: + model_response: Output of the llm for the given prompt. + + Returns: + True is the job_description is for a fresher otherwise False. + """ + model_response = model_response.lower() + if "yes" in model_response: + return True + return False From 0cac2b0287493dfdb329d14c26a99ae9d57091fd Mon Sep 17 00:00:00 2001 From: Hetul Patel Date: Sat, 20 Apr 2024 04:17:32 +0530 Subject: [PATCH 2/5] Added leaderboard md --- mkdocs.yaml | 1 + session_2/README.md | 2 +- session_2/challenge/.pages | 3 + .../{README.md => how_to_participate.md} | 13 +--- session_2/challenge/leaderboard.md | 65 +++++++++++++++++++ session_2/challenge/scripts/evaluate.py | 16 ++++- session_2/challenge/scripts/model.py | 9 +-- stylesheets/extra.css | 2 +- 8 files changed, 91 insertions(+), 20 deletions(-) create mode 100644 session_2/challenge/.pages rename session_2/challenge/{README.md => how_to_participate.md} (92%) create mode 100644 session_2/challenge/leaderboard.md diff --git a/mkdocs.yaml b/mkdocs.yaml index b010c86..749973c 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -38,6 +38,7 @@ markdown_extensions: format: !!python/name:pymdownx.superfences.fence_code_format extra_css: - stylesheets/extra.css + - stylesheets/leaderboard.css extra: generator: false social: diff --git a/session_2/README.md b/session_2/README.md index 3975414..8c344b3 100644 --- a/session_2/README.md +++ b/session_2/README.md @@ -1,3 +1,3 @@ -# Session 1 - Universe of Pretrained LLMs and Prompt Engineering +# Session 2 - Universe of Pretrained LLMs and Prompt Engineering

Session 2

\ No newline at end of file diff --git a/session_2/challenge/.pages b/session_2/challenge/.pages new file mode 100644 index 0000000..967e585 --- /dev/null +++ b/session_2/challenge/.pages @@ -0,0 +1,3 @@ +nav: + - Leaderboard: leaderboard.md + - How to participate ?: how_to_participate.md \ No newline at end of file diff --git a/session_2/challenge/README.md b/session_2/challenge/how_to_participate.md similarity index 92% rename from session_2/challenge/README.md rename to session_2/challenge/how_to_participate.md index 1f92445..740cbf4 100644 --- a/session_2/challenge/README.md +++ b/session_2/challenge/how_to_participate.md @@ -1,15 +1,4 @@ -# Prompt Engineering Challenge - -## Description - -Classify if a job is suitable for a fresher or not from the job description -using LLM using prompt engineering. - -## Public leaderboard - -TODO - -## How to participate? +# How to participate? !!! tip "TLDR" Fork and star the [llm_seminar_series](https://github.com/infocusp/llm_seminar_series) diff --git a/session_2/challenge/leaderboard.md b/session_2/challenge/leaderboard.md new file mode 100644 index 0000000..7a573bf --- /dev/null +++ b/session_2/challenge/leaderboard.md @@ -0,0 +1,65 @@ +# Leaderboard + +!!! tip "Description" + Test your prompt engineering skills to classify if a job description is suitable + for a fresher or not. Check [participation guide](how_to_participate.md). + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RankProfile ImageGitHub UsernameSolutionAccuracy %
1Profile ImageUsername 1Baseline100
2Profile ImageUsername 2Baseline95
3Profile ImageUsername 2Baseline95
3Profile ImageUsername 2Baseline95
3Profile ImageUsername 2Baseline95
3Profile ImageUsername 2Baseline95
+
\ No newline at end of file diff --git a/session_2/challenge/scripts/evaluate.py b/session_2/challenge/scripts/evaluate.py index 2d0ba77..c2fe3a9 100644 --- a/session_2/challenge/scripts/evaluate.py +++ b/session_2/challenge/scripts/evaluate.py @@ -35,6 +35,10 @@ def build_prompt(self, job_description: str) -> str: "prompt", None, "Name of the prompt to evaluate." ) +_DEBUG = flags.DEFINE_bool( + "debug", True, "Prints prompt and response if true." +) + _SAMPLES_DIR = "sample_inputs" @@ -69,20 +73,28 @@ def evaluate(prompt_name: str): # Generate results for the dataset. dataset = load_sample_test_set() correct_pred = 0 - for job_description, target in tqdm.tqdm(dataset): + for idx, (job_description, target) in enumerate(tqdm.tqdm(dataset)): prompt = prompt_handler.build_prompt(job_description=job_description) + logging.debug("[prompt %d]\n%s", idx, prompt) response = llm.generate(prompt=prompt) + logging.debug("[response %d]\n%s", idx, response) output = prompt_handler.parse_response(model_response=response) + logging.debug("[target %d]\n%s", idx, target) + logging.debug("[prediction %d]\n%s", idx, output) if output == target: correct_pred += 1 - logging.info("Acc : %.3f" % (correct_pred / len(dataset) * 100)) + print("Accuracy: [%.3f] %%" % (correct_pred / len(dataset) * 100)) # noqa: T201 def main(argv: Sequence[str]) -> None: """Entrypoint.""" if len(argv) > 1: raise app.UsageError("Too many command-line arguments.") + if _DEBUG.value: + logging.getLogger().setLevel(logging.DEBUG) + else: + logging.getLogger().setLevel(logging.INFO) evaluate(prompt_name=_PROMPT.value) diff --git a/session_2/challenge/scripts/model.py b/session_2/challenge/scripts/model.py index 3a91a96..ba6c450 100644 --- a/session_2/challenge/scripts/model.py +++ b/session_2/challenge/scripts/model.py @@ -19,8 +19,9 @@ class G4fModel(Model): def generate(self, prompt: str) -> str: """Completes a prompt using gpt-4 for free model.""" - response = g4f.ChatCompletion.create( - model=g4f.models.gpt_4, - messages=[{"role": "user", "content": prompt}], - ) + # response = g4f.ChatCompletion.create( + # model=g4f.models.gpt_4, + # messages=[{"role": "user", "content": prompt}], + # ) + response = "yes" return response diff --git a/stylesheets/extra.css b/stylesheets/extra.css index 21c5ba1..26a0b0b 100644 --- a/stylesheets/extra.css +++ b/stylesheets/extra.css @@ -4,4 +4,4 @@ .md-header { margin-top: 10px; -} \ No newline at end of file +} From 002f879321cc52bcf631bdf15ad1db31eb57425a Mon Sep 17 00:00:00 2001 From: Hetul Patel Date: Sat, 20 Apr 2024 11:35:37 +0530 Subject: [PATCH 3/5] Added script for leaderboard sorting --- session_2/challenge/leaderboard.md | 79 ++++++---------------- session_2/challenge/requirements.txt | 4 +- session_2/challenge/scripts/leaderboard.py | 76 +++++++++++++++++++++ stylesheets/extra.css | 9 ++- 4 files changed, 105 insertions(+), 63 deletions(-) create mode 100644 session_2/challenge/scripts/leaderboard.py diff --git a/session_2/challenge/leaderboard.md b/session_2/challenge/leaderboard.md index 7a573bf..e9937bd 100644 --- a/session_2/challenge/leaderboard.md +++ b/session_2/challenge/leaderboard.md @@ -1,65 +1,24 @@ # Leaderboard -!!! tip "Description" - Test your prompt engineering skills to classify if a job description is suitable - for a fresher or not. Check [participation guide](how_to_participate.md). +## Problem statement + +Test your prompt engineering skills to classify if a job description is suitable +for a fresher or not. + +Check [participation guide](how_to_participate.md). + +## Rankings
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
RankProfile ImageGitHub UsernameSolutionAccuracy %
1Profile ImageUsername 1Baseline100
2Profile ImageUsername 2Baseline95
3Profile ImageUsername 2Baseline95
3Profile ImageUsername 2Baseline95
3Profile ImageUsername 2Baseline95
3Profile ImageUsername 2Baseline95
+ + +| Rank | Profile Image | GitHub Username | Solution | Accuracy % | +|-------:|:------------------------------------------------------------------------------------------------|:-------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------|-------------:| +| 1 | | [New User](https://github.com/new_user) | [New Solution](https://github.com/new_solution) | 99.5 | +| 2 | | [Username 2](https://github.com/username2) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 95 | +| 3 | | [Username 4](https://github.com/username4) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 95 | +| 4 | | [Username 3](https://github.com/username3) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 10 | +| 5 | | [Username 1](https://github.com/username1) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 0 | + +
\ No newline at end of file diff --git a/session_2/challenge/requirements.txt b/session_2/challenge/requirements.txt index ec415f7..56cbd84 100644 --- a/session_2/challenge/requirements.txt +++ b/session_2/challenge/requirements.txt @@ -1,3 +1,5 @@ g4f>=0.2.9.9 tqdm>=4.66.2 -absl-py>=2.1.0 \ No newline at end of file +absl-py>=2.1.0 +pandas>=2.2.2 +tabulate>=0.9.0 \ No newline at end of file diff --git a/session_2/challenge/scripts/leaderboard.py b/session_2/challenge/scripts/leaderboard.py new file mode 100644 index 0000000..08e8631 --- /dev/null +++ b/session_2/challenge/scripts/leaderboard.py @@ -0,0 +1,76 @@ +"""Generates leaderboard.""" + +import re + +import pandas as pd + +# Read the markdown table into a DataFrame +with open("session_2/challenge/leaderboard.md", "r") as file: + content = file.read() + +start_marker = "\n" +start_index = content.find(start_marker) +end_index = content.find("\n") +table_content = content[start_index:end_index] + + +# Extract rows using regex +rows = re.findall( + r"\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|", table_content +)[2:] + +# Create a DataFrame from the extracted rows +df = pd.DataFrame( + rows, + columns=[ + "Rank", + "Profile Image", + "GitHub Username", + "Solution", + "Accuracy %", + ], +) + +# Strip extra spaces before and after text in each cell +df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x) + +# Convert "Rank" column to integer and "Accuracy %" column to float +df["Rank"] = df["Rank"].astype(int) +df["Accuracy %"] = df["Accuracy %"].astype(float) + +# Add a new entry to the DataFrame +new_entry = { + "Rank": len(df) + 1, + "Profile Image": '', + "GitHub Username": "[New User](https://github.com/new_user)", + "Solution": "[New Solution](https://github.com/new_solution)", + "Accuracy %": 99.5, +} # Example accuracy value + +df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True) + +# Keep only the highest submission for each user +highest_indices = df.groupby("GitHub Username")["Accuracy %"].idxmax() +df_highest = df.loc[highest_indices] + +# Sort the DataFrame by "Accuracy %" column in descending order +df_sorted = df_highest.sort_values( + by="Accuracy %", ascending=False +).reset_index(drop=True) + +# Update the "Rank" column after sorting +df_sorted["Rank"] = df_sorted.index + 1 + +# Convert the DataFrame back to markdown format +markdown_table = df_sorted.to_markdown(index=False) + +# Replace the existing table in the markdown file with the sorted table +new_content = ( + content[: start_index + len(start_marker)] + + markdown_table + + content[end_index:] +) + +# Write the updated content back to the markdown file +with open("session_2/challenge/leaderboard.md", "w") as file: + file.write(new_content) diff --git a/stylesheets/extra.css b/stylesheets/extra.css index 26a0b0b..ce1f7d4 100644 --- a/stylesheets/extra.css +++ b/stylesheets/extra.css @@ -1,7 +1,12 @@ .md-grid { - max-width: 1520px; - } + max-width: 1520px; +} .md-header { margin-top: 10px; } + +.profile-image { + border-radius: 50%; + box-shadow: 0px 8px 10px rgba(0, 0, 0, 0.15); +} \ No newline at end of file From 2461e6a46d4219500f4c5b49ee9c8f37bcc61852 Mon Sep 17 00:00:00 2001 From: Hetul Patel Date: Sat, 20 Apr 2024 15:17:11 +0530 Subject: [PATCH 4/5] Added github action to evaluate on private dataset --- ... check_star_for_challenge_submission.yaml} | 2 +- .github/workflows/github_pages.yaml | 2 +- .github/workflows/update_leaderboard.yaml | 82 ++++++++ session_2/challenge/how_to_participate.md | 16 +- session_2/challenge/leaderboard.md | 15 +- session_2/challenge/scripts/dataset.py | 24 +++ session_2/challenge/scripts/evaluate.py | 61 +----- session_2/challenge/scripts/evaluate_lib.py | 36 ++++ session_2/challenge/scripts/leaderboard.py | 192 ++++++++++++------ session_2/challenge/scripts/model.py | 2 +- session_2/challenge/scripts/registry.py | 5 +- session_2/challenge/submissions/baseline.py | 2 +- 12 files changed, 295 insertions(+), 144 deletions(-) rename .github/workflows/{check_star_for_challange_submission.yaml => check_star_for_challenge_submission.yaml} (93%) create mode 100644 .github/workflows/update_leaderboard.yaml create mode 100644 session_2/challenge/scripts/dataset.py create mode 100644 session_2/challenge/scripts/evaluate_lib.py diff --git a/.github/workflows/check_star_for_challange_submission.yaml b/.github/workflows/check_star_for_challenge_submission.yaml similarity index 93% rename from .github/workflows/check_star_for_challange_submission.yaml rename to .github/workflows/check_star_for_challenge_submission.yaml index e515d4f..d58f8c5 100644 --- a/.github/workflows/check_star_for_challange_submission.yaml +++ b/.github/workflows/check_star_for_challenge_submission.yaml @@ -20,6 +20,6 @@ jobs: id: check-star - if: ${{ (steps.changes.outputs.src == 'true') && (steps.check-star.outputs.is-stargazer != 'true') }} - uses: actions/github-script@v6 + uses: actions/github-script@v7 with: script: core.setFailed('⭐ Please, star this repository!') \ No newline at end of file diff --git a/.github/workflows/github_pages.yaml b/.github/workflows/github_pages.yaml index e2da726..f65b651 100644 --- a/.github/workflows/github_pages.yaml +++ b/.github/workflows/github_pages.yaml @@ -1,4 +1,4 @@ -name: ci +name: Deploy to github pages on: push: branches: diff --git a/.github/workflows/update_leaderboard.yaml b/.github/workflows/update_leaderboard.yaml new file mode 100644 index 0000000..c1fde3f --- /dev/null +++ b/.github/workflows/update_leaderboard.yaml @@ -0,0 +1,82 @@ +name: Update leaderboard. + +on: + pull_request: + types: [opened, reopened, synchronize] + +jobs: + leaderboard_evaluation: + runs-on: ubuntu-latest + steps: + - name: Check if there are any changes in submissions dir + uses: dorny/paths-filter@v3.0.2 + id: changes + with: + filters: | + src: + - 'session_2/challenge/submissions/**' + list-files: "shell" + + - name: Print changed files + run: | + echo '${{ toJSON(steps.changes.outputs) }}' + + - if: ${{ (steps.changes.outputs.src_count > 1) }} + uses: actions/github-script@v7 + with: + script: core.setFailed('More than one submissions are not allowed at once.') + + # Update leaderboard only if single file is changed in submission dir + - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }} + name: Checkout code + uses: actions/checkout@v4 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.ref }} + + - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }} + name: Install Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }} + name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r session_2/challenge/requirements.txt + + - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }} + name: Run leaderboard update script + id: leaderboard-update + run: | + cd session_2/challenge + filename=$(basename "${{ steps.changes.outputs.src_files }}") + filename_without_extension="${filename%.*}" # Remove extension + python -m scripts.leaderboard --github_user="${{ github.actor }}" --prompt="$filename_without_extension" + + - name: Commit changes + uses: EndBug/add-and-commit@v9 + with: + author_name: GitHub Actions + author_email: actions@github.com + message: 'Updated leader board' + add: 'session_2/challenge/leaderboard.md' + + # # Commit the updated leaderboard + # - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }} + # name: Commit updated leaderboard + # id: commit-leaderboard + # run: | + # git config --global user.name "GitHub Actions" + # git config --global user.email "actions@github.com" + # git add session_2/challenge/leaderboard.md + # git commit -m "Update leaderboard" + # git push -f origin HEAD:${{ github.ref }} + + + # # Print the commit SHA for reference + # - if: ${{ (steps.changes.outputs.src == 'true') && (steps.changes.outputs.src_count == 1) }} + # name: Print Commit SHA + # run: | + # echo "Commit SHA: ${{ steps.commit-leaderboard.outputs.commit_sha }}" \ No newline at end of file diff --git a/session_2/challenge/how_to_participate.md b/session_2/challenge/how_to_participate.md index 740cbf4..a0c0c94 100644 --- a/session_2/challenge/how_to_participate.md +++ b/session_2/challenge/how_to_participate.md @@ -20,19 +20,18 @@ ``` 3. To submit your own prompt, make a copy of `submissions/baseline.py` and - change the name of the prompt from `baseline` to something else which + change the name of the file from `baseline` to something else which describes your prompt. E.g, ```python # file: submissions/name_of_your_prompt.py - @registry.register("name_of_your_prompt") + @registry.register() class NameOfYourPrompt(base.PromptSubmission): ... ``` - Also change the class name and register it with a new name (can be same as the - filename.) + Also change the class name. 4. Update the `build_prompt` and `parse_response` method. @@ -62,11 +61,4 @@ your prompt. 8. Congratulations 🎉, once a repo maintainer approves your submission and merges - your PR, your rank based on a private test set will be published on the - public leader board. - -!!! note - You can test your prompt on your own samples by adding new files under - `sample_inputs` dir. The file name must ends with `"yes.txt"` if the JD is - for a fresher, otherwise it should end with `"no.txt"`. Do not commit - these files. \ No newline at end of file + your PR, your rank will be published on the public leader board. diff --git a/session_2/challenge/leaderboard.md b/session_2/challenge/leaderboard.md index e9937bd..b17a344 100644 --- a/session_2/challenge/leaderboard.md +++ b/session_2/challenge/leaderboard.md @@ -12,13 +12,14 @@ Check [participation guide](how_to_participate.md).
-| Rank | Profile Image | GitHub Username | Solution | Accuracy % | -|-------:|:------------------------------------------------------------------------------------------------|:-------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------|-------------:| -| 1 | | [New User](https://github.com/new_user) | [New Solution](https://github.com/new_solution) | 99.5 | -| 2 | | [Username 2](https://github.com/username2) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 95 | -| 3 | | [Username 4](https://github.com/username4) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 95 | -| 4 | | [Username 3](https://github.com/username3) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 10 | -| 5 | | [Username 1](https://github.com/username1) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 0 | +| Rank | Profile Image | GitHub Username | Solution | Accuracy % | +|-------:|:------------------------------------------------------------------------------------------------|:----------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------|-------------:| +| 1 | | [New User](https://github.com/new_user) | [New Solution](https://github.com/new_solution) | 99.5 | +| 2 | | [Username 2](https://github.com/username2) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 95 | +| 3 | | [Username 4](https://github.com/username4) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 95 | +| 4 | | [hetul-patel](https://github.com/hetul-patel) | [baseline](https://github.com/infocusp/llm_seminar_series/blob/main/session_2/challenge/submissions/baseline.py) | 50 | +| 6 | | [Username 3](https://github.com/username3) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 10 | +| 7 | | [Username 1](https://github.com/username1) | [Baseline](https://github.com/infocusp/llm_seminar_series/blob/hetul/prompting-leader-board/session_2/challenge/submissions/baseline.py) | 0 |
\ No newline at end of file diff --git a/session_2/challenge/scripts/dataset.py b/session_2/challenge/scripts/dataset.py new file mode 100644 index 0000000..74bd476 --- /dev/null +++ b/session_2/challenge/scripts/dataset.py @@ -0,0 +1,24 @@ +"""Utilities to load evaluation datasets.""" + +import glob +import os + + +def load_sample_test_set(samples_dir: str) -> list[tuple[str, bool]]: + """Loads sample job descriptions and answers for local testing.""" + sample_files = glob.glob(os.path.join(samples_dir, "*.txt")) + sample_inputs = [] + for filepath in sample_files: + content = open(filepath, "r").read() + filename = os.path.basename(filepath).lower() + if filename.endswith("_yes.txt"): + target = True + elif filename.endswith("_no.txt"): + target = False + else: + raise ValueError( + "File %s must end with yes.txt or no.txt" % filepath + ) + target = True if "yes" in filename.lower() else False + sample_inputs.append((content, target)) + return sample_inputs diff --git a/session_2/challenge/scripts/evaluate.py b/session_2/challenge/scripts/evaluate.py index c2fe3a9..1ca6baa 100644 --- a/session_2/challenge/scripts/evaluate.py +++ b/session_2/challenge/scripts/evaluate.py @@ -21,15 +21,11 @@ def build_prompt(self, job_description: str) -> str: python3 -m scripts.evaluate --prompt=baseline """ -import glob import logging -import os from collections.abc import Sequence -import tqdm from absl import app, flags -from scripts import model, registry -from submissions import baseline # noqa: F401 +from scripts import dataset, evaluate_lib _PROMPT = flags.DEFINE_string( "prompt", None, "Name of the prompt to evaluate." @@ -39,52 +35,12 @@ def build_prompt(self, job_description: str) -> str: "debug", True, "Prints prompt and response if true." ) -_SAMPLES_DIR = "sample_inputs" - - -def load_sample_test_set() -> list[tuple[str, bool]]: - """Loads sample job descriptions and answers for local testing.""" - sample_files = glob.glob(os.path.join(_SAMPLES_DIR, "*.txt")) - sample_inputs = [] - for filepath in sample_files: - content = open(filepath, "r").read() - filename = os.path.basename(filepath).lower() - if filename.endswith("_yes.txt"): - target = True - elif filename.endswith("_no.txt"): - target = False - else: - raise ValueError( - "File %s must end with yes.txt or no.txt" % filepath - ) - target = True if "yes" in filename.lower() else False - sample_inputs.append((content, target)) - return sample_inputs - - -def evaluate(prompt_name: str): - """Evaluates the prompt submission.""" - # Loads a free gpt4 model. - llm = model.G4fModel() - - # Loads a prompt submission. - prompt_handler = registry.get(name=prompt_name) - - # Generate results for the dataset. - dataset = load_sample_test_set() - correct_pred = 0 - for idx, (job_description, target) in enumerate(tqdm.tqdm(dataset)): - prompt = prompt_handler.build_prompt(job_description=job_description) - logging.debug("[prompt %d]\n%s", idx, prompt) - response = llm.generate(prompt=prompt) - logging.debug("[response %d]\n%s", idx, response) - output = prompt_handler.parse_response(model_response=response) - logging.debug("[target %d]\n%s", idx, target) - logging.debug("[prediction %d]\n%s", idx, output) - if output == target: - correct_pred += 1 - - print("Accuracy: [%.3f] %%" % (correct_pred / len(dataset) * 100)) # noqa: T201 + +def evaluate_on_sample_dataset(prompt_name: str): + """Evaluates the prompt on a sample_dataset.""" + sample_inputs = dataset.load_sample_test_set(samples_dir="sample_inputs") + acc = evaluate_lib.evaluate(dataset=sample_inputs, prompt_name=prompt_name) + print("Accuracy: [%.3f] %%" % acc) # noqa: T201 def main(argv: Sequence[str]) -> None: @@ -95,8 +51,9 @@ def main(argv: Sequence[str]) -> None: logging.getLogger().setLevel(logging.DEBUG) else: logging.getLogger().setLevel(logging.INFO) - evaluate(prompt_name=_PROMPT.value) + evaluate_on_sample_dataset(prompt_name=_PROMPT.value) if __name__ == "__main__": + flags.mark_flag_as_required("prompt") app.run(main) diff --git a/session_2/challenge/scripts/evaluate_lib.py b/session_2/challenge/scripts/evaluate_lib.py new file mode 100644 index 0000000..355979e --- /dev/null +++ b/session_2/challenge/scripts/evaluate_lib.py @@ -0,0 +1,36 @@ +"""Library function for evaluating a prompt on a particular dataset.""" + +import logging + +import tqdm +from scripts import model, registry +from submissions import * # noqa: F401, F403 +from submissions import baseline # noqa: F401 + + +def evaluate(dataset: list[tuple[str, bool]], prompt_name: str): + """Evaluates the prompt submission.""" + # Loads a free gpt4 model. + llm = model.G4fModel() + + # Loads a prompt submission. + prompt_handler = registry.get(name=prompt_name) + + # Generate results for the dataset. + correct_pred = 0 + for idx, (job_description, target) in enumerate(tqdm.tqdm(dataset)): + prompt = prompt_handler.build_prompt(job_description=job_description) + response = llm.generate(prompt=prompt) + prediction = prompt_handler.parse_response(model_response=response) + if prediction == target: + correct_pred += 1 + result = "[PASS]" + else: + result = "[FAIL]" + + logging.debug( + "No=%d. target=%s prediction=%s %s\n[prompt]\n%s\n[response]\n%s" + % (idx, target, prediction, result, prompt, response) + ) + acc = correct_pred / len(dataset) * 100 + return acc diff --git a/session_2/challenge/scripts/leaderboard.py b/session_2/challenge/scripts/leaderboard.py index 08e8631..c6b25e1 100644 --- a/session_2/challenge/scripts/leaderboard.py +++ b/session_2/challenge/scripts/leaderboard.py @@ -1,76 +1,134 @@ -"""Generates leaderboard.""" +"""Updates the public leaderboard after evaluating given submission. +Sample command: +python -m scripts.leaderboard \ + --github_user=your_github_user \ + --prompt_file=baseline +""" + +import logging import re +from collections.abc import Sequence import pandas as pd +from absl import app, flags +from scripts import dataset, evaluate_lib -# Read the markdown table into a DataFrame -with open("session_2/challenge/leaderboard.md", "r") as file: - content = file.read() - -start_marker = "\n" -start_index = content.find(start_marker) -end_index = content.find("\n") -table_content = content[start_index:end_index] - - -# Extract rows using regex -rows = re.findall( - r"\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|", table_content -)[2:] - -# Create a DataFrame from the extracted rows -df = pd.DataFrame( - rows, - columns=[ - "Rank", - "Profile Image", - "GitHub Username", - "Solution", - "Accuracy %", - ], +_PROMPT = flags.DEFINE_string( + "prompt", None, "Name of the submitted prompt to evaluate." ) -# Strip extra spaces before and after text in each cell -df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x) - -# Convert "Rank" column to integer and "Accuracy %" column to float -df["Rank"] = df["Rank"].astype(int) -df["Accuracy %"] = df["Accuracy %"].astype(float) - -# Add a new entry to the DataFrame -new_entry = { - "Rank": len(df) + 1, - "Profile Image": '', - "GitHub Username": "[New User](https://github.com/new_user)", - "Solution": "[New Solution](https://github.com/new_solution)", - "Accuracy %": 99.5, -} # Example accuracy value - -df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True) - -# Keep only the highest submission for each user -highest_indices = df.groupby("GitHub Username")["Accuracy %"].idxmax() -df_highest = df.loc[highest_indices] - -# Sort the DataFrame by "Accuracy %" column in descending order -df_sorted = df_highest.sort_values( - by="Accuracy %", ascending=False -).reset_index(drop=True) - -# Update the "Rank" column after sorting -df_sorted["Rank"] = df_sorted.index + 1 - -# Convert the DataFrame back to markdown format -markdown_table = df_sorted.to_markdown(index=False) - -# Replace the existing table in the markdown file with the sorted table -new_content = ( - content[: start_index + len(start_marker)] - + markdown_table - + content[end_index:] +_GITHUB_USER = flags.DEFINE_string( + "github_user", None, "Github username to add an entry in leaderboard." ) -# Write the updated content back to the markdown file -with open("session_2/challenge/leaderboard.md", "w") as file: - file.write(new_content) + +_LEADERBORAD = "leaderboard.md" # current leaderboard + + +def generate_leaderboard(prompt_name: str, accuracy: float, github_user: str): + """Generates leaderboard.""" + # Read the markdown table into a DataFrame + with open(_LEADERBORAD, "r") as file: + content = file.read() + + start_marker = "\n" + start_index = content.find(start_marker) + end_index = content.find("\n") + table_content = content[start_index:end_index] + + # Extract rows using regex + rows = re.findall( + r"\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|([^|]+)\|", table_content + )[2:] + + # Create a DataFrame from the extracted rows + df = pd.DataFrame( + rows, + columns=[ + "Rank", + "Profile Image", + "GitHub Username", + "Solution", + "Accuracy %", + ], + ) + + # Strip extra spaces before and after text in each cell + df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x) + + # Convert "Rank" column to integer and "Accuracy %" column to float + df["Rank"] = df["Rank"].astype(int) + df["Accuracy %"] = df["Accuracy %"].astype(float) + + # Add a new entry to the DataFrame + repo_url = "https://github.com/infocusp/llm_seminar_series/blob/main/session_2/challenge/submissions" + new_entry = { + "Rank": len(df) + 1, + "Profile Image": f'', + "GitHub Username": f"[{github_user}](https://github.com/{github_user})", + "Solution": f"[{prompt_name}]({repo_url}/{prompt_name}.py)", + "Accuracy %": accuracy, + } + + df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True) + + # Keep only the highest submission for each user + highest_indices = df.groupby("GitHub Username")["Accuracy %"].idxmax() + df_highest = df.loc[highest_indices] + + # Sort the DataFrame by "Accuracy %" column in descending order + df_sorted = df_highest.sort_values( + by="Accuracy %", ascending=False + ).reset_index(drop=True) + + # Update the "Rank" column after sorting + df_sorted["Rank"] = df_sorted.index + 1 + + # Convert the DataFrame back to markdown format + markdown_table = df_sorted.to_markdown(index=False) + + # Replace the existing table in the markdown file with the sorted table + new_content = ( + content[: start_index + len(start_marker)] + + markdown_table + + content[end_index:] + ) + + # Write the updated content back to the markdown file + with open(_LEADERBORAD, "w") as file: + file.write(new_content) + + logging.info( + "Submission by %s with prompt %s updated in the leaderboard.", + github_user, + prompt_name, + ) + + +def update_leaderboard(prompt_name: str, github_user: str): + """Generates a public leaderboard by evaluating given submission.""" + sample_dataset = dataset.load_sample_test_set(samples_dir="sample_inputs") + acc = evaluate_lib.evaluate( + dataset=sample_dataset, prompt_name=prompt_name + ) + generate_leaderboard( + prompt_name=prompt_name, accuracy=acc, github_user=github_user + ) + + +def main(argv: Sequence[str]) -> None: + """Entrypoint.""" + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + logging.getLogger().setLevel(logging.INFO) + update_leaderboard( + prompt_name=_PROMPT.value, github_user=_GITHUB_USER.value + ) + + +if __name__ == "__main__": + flags.mark_flag_as_required("prompt") + flags.mark_flag_as_required("github_user") + app.run(main) diff --git a/session_2/challenge/scripts/model.py b/session_2/challenge/scripts/model.py index ba6c450..67a554e 100644 --- a/session_2/challenge/scripts/model.py +++ b/session_2/challenge/scripts/model.py @@ -1,6 +1,6 @@ """Model inference.""" -import g4f +import g4f # noqa: F401 class Model: diff --git a/session_2/challenge/scripts/registry.py b/session_2/challenge/scripts/registry.py index 3659dec..02d1c1c 100644 --- a/session_2/challenge/scripts/registry.py +++ b/session_2/challenge/scripts/registry.py @@ -7,10 +7,11 @@ _SUBMISSIONS_REGISTRY: dict[str, Type[base.PromptSubmission]] = {} -def register(name: str): - """Returns a decorator that registers a submission with the given name.""" +def register(): + """Returns a decorator that registers a submission with its file as key.""" def _register(klass: Type[base.PromptSubmission]): + name = klass.__module__.split(".")[-1] _SUBMISSIONS_REGISTRY[name] = klass return klass diff --git a/session_2/challenge/submissions/baseline.py b/session_2/challenge/submissions/baseline.py index 1f76d67..c8f6b3e 100644 --- a/session_2/challenge/submissions/baseline.py +++ b/session_2/challenge/submissions/baseline.py @@ -3,7 +3,7 @@ from scripts import base, registry -@registry.register("baseline") +@registry.register() class Baseline(base.PromptSubmission): """Baseline submission.""" From d05d50bf610942b50249496772fbed06da0af8ce Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Sat, 20 Apr 2024 11:53:02 +0000 Subject: [PATCH 5/5] Updated leader board --- .github/CODEOWNERS | 9 +++++++++ .github/workflows/update_leaderboard.yaml | 6 ++++-- 2 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 .github/CODEOWNERS diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..9353ce9 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,9 @@ +############################################################## +# +# List of approvers/reviewers for llm_seminar_series repo +# +############################################################## +# +# +# These owners will be the default owners for everything in the repo. +* @hetulvp \ No newline at end of file diff --git a/.github/workflows/update_leaderboard.yaml b/.github/workflows/update_leaderboard.yaml index c1fde3f..844e705 100644 --- a/.github/workflows/update_leaderboard.yaml +++ b/.github/workflows/update_leaderboard.yaml @@ -2,8 +2,10 @@ name: Update leaderboard. on: pull_request: - types: [opened, reopened, synchronize] - + branches: + - development + types: + - merged jobs: leaderboard_evaluation: runs-on: ubuntu-latest