leaderboard: llama3 #10
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Leaderboard Submission | |
# Since we need write access to GH, we use pull_request_target here | |
# The eval workflow will need to clone the PR head branch alongside the base branch, then copy over only the changed | |
# files to run base eval code on (for security) | |
on: | |
pull_request_target: | |
paths: | |
- leaderboard-submissions/metadata/*.json | |
- leaderboard-submissions/*generations/*.jsonl | |
permissions: | |
pull-requests: write | |
contents: write | |
jobs: | |
evaluate: | |
runs-on: ubuntu-latest | |
env: | |
GH_TOKEN: ${{ github.token }} | |
steps: | |
# Clone the main repo | |
- uses: actions/checkout@v4 | |
# Clone the PR head to _pr_submission | |
- uses: actions/checkout@v4 | |
with: | |
path: _pr_submission | |
ref: ${{ github.head_ref }} | |
# copy submission files over to main repo | |
# results files go to a new dir to prevent weird commit behaviour when committing results | |
- name: Copy submission files to eval workspace | |
run: | | |
cp -r _pr_submission/leaderboard-submissions/closedbook-generations/. leaderboard-submissions/closedbook-generations | |
cp -r _pr_submission/leaderboard-submissions/openbook-generations/. leaderboard-submissions/openbook-generations | |
cp -r _pr_submission/leaderboard-submissions/evidenceprovided-generations/. leaderboard-submissions/evidenceprovided-generations | |
cp -r _pr_submission/leaderboard-submissions/metadata/. leaderboard-submissions/metadata | |
rm leaderboard-submissions/pr-results | |
cp -r _pr_submission/leaderboard-submissions/results/. leaderboard-submissions/pr-results | |
- name: Download test set answers | |
run: wget -q ${{ secrets.TEST_ANSWERS_URL }} -O fanoutqa-test-answers.json | |
# set up in local workdir and hydrate results | |
- name: Set up Python 3.10 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.10' | |
cache: 'pip' | |
- name: Install library for eval | |
run: | | |
python -m pip install --upgrade pip | |
pip install -r requirements.txt | |
wget -nv https://storage.googleapis.com/bleurt-oss-21/BLEURT-20.zip | |
unzip BLEURT-20.zip | |
rm BLEURT-20.zip | |
- name: Run eval script | |
id: eval | |
env: | |
LEADERBOARD_SALT: ${{ secrets.LEADERBOARD_SALT }} | |
FANOUTQA_OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
run: python leaderboard-submissions/hydrate.py | |
# ensure the PR comment exists so we can edit it in future steps | |
- name: Ensure PR comment | |
if: steps.eval.outputs.changed > 0 || failure() | |
run: | | |
if [[ -z $(gh pr view ${{ github.event.number }} --json comments | jq '.comments[].author.login | select(. == "github-actions")') ]]; then | |
gh pr comment ${{ github.event.number }} -b "Results incoming..."; | |
fi | |
- name: Add PR comment (failure) | |
if: failure() | |
run: gh pr comment ${{ github.event.number }} --edit-last -b "It looks like this eval run failed. Please check the [workflow logs](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) to see what went wrong, then push a new commit to your PR to rerun the eval." | |
- name: Add PR comment (success) | |
if: steps.eval.outputs.changed > 0 | |
env: | |
RUN_LINK: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
run: python leaderboard-submissions/gh-print-new-results.py ${{ steps.eval.outputs.written-results }} | gh pr comment ${{ github.event.number }} --edit-last -F - | |
- name: Commit results files to PR | |
if: steps.eval.outputs.changed > 0 | |
run: | | |
cp ${{ steps.eval.outputs.written-results }} _pr_submission/leaderboard-submissions/results/ | |
cd _pr_submission | |
git config user.name github-actions | |
git config user.email [email protected] | |
git add leaderboard-submissions/results | |
git commit -m "leaderboard: add eval results" | |
git push |