-
-
Notifications
You must be signed in to change notification settings - Fork 2
96 lines (82 loc) · 4.05 KB
/
leaderboard-submission.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
name: Leaderboard Submission
# Since we need write access to GH, we use pull_request_target here
# The eval workflow will need to clone the PR head branch alongside the base branch, then copy over only the changed
# files to run base eval code on (for security)
on:
pull_request_target:
paths:
- leaderboard-submissions/metadata/*.json
- leaderboard-submissions/*generations/*.jsonl
permissions:
pull-requests: write
contents: write
jobs:
evaluate:
runs-on: ubuntu-latest
env:
GH_TOKEN: ${{ github.token }}
steps:
# Clone the main repo
- uses: actions/checkout@v4
# Clone the PR head to _pr_submission
- uses: actions/checkout@v4
with:
path: _pr_submission
repository: ${{ github.event.pull_request.head.repo.full_name }}
ref: ${{ github.head_ref }}
# copy submission files over to main repo
# results files go to a new dir to prevent weird commit behaviour when committing results
- name: Copy submission files to eval workspace
run: |
cp -r _pr_submission/leaderboard-submissions/closedbook-generations/. leaderboard-submissions/closedbook-generations
cp -r _pr_submission/leaderboard-submissions/openbook-generations/. leaderboard-submissions/openbook-generations
cp -r _pr_submission/leaderboard-submissions/evidenceprovided-generations/. leaderboard-submissions/evidenceprovided-generations
cp -r _pr_submission/leaderboard-submissions/metadata/. leaderboard-submissions/metadata
rm leaderboard-submissions/pr-results
cp -r _pr_submission/leaderboard-submissions/results/. leaderboard-submissions/pr-results
- name: Download test set answers
run: wget -q ${{ secrets.TEST_ANSWERS_URL }} -O fanoutqa-test-answers.json
# set up in local workdir and hydrate results
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'
- name: Install library for eval
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
wget -nv https://storage.googleapis.com/bleurt-oss-21/BLEURT-20.zip
unzip BLEURT-20.zip
rm BLEURT-20.zip
- name: Run eval script
id: eval
env:
LEADERBOARD_SALT: ${{ secrets.LEADERBOARD_SALT }}
FANOUTQA_OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: python leaderboard-submissions/hydrate.py
# ensure the PR comment exists so we can edit it in future steps
- name: Ensure PR comment
if: steps.eval.outputs.changed > 0 || failure()
run: |
if [[ -z $(gh pr view ${{ github.event.number }} --json comments | jq '.comments[].author.login | select(. == "github-actions")') ]]; then
gh pr comment ${{ github.event.number }} -b "Results incoming...";
fi
- name: Add PR comment (failure)
if: failure()
run: gh pr comment ${{ github.event.number }} --edit-last -b "It looks like this eval run failed. Please check the [workflow logs](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) to see what went wrong, then push a new commit to your PR to rerun the eval."
- name: Add PR comment (success)
if: steps.eval.outputs.changed > 0
env:
RUN_LINK: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: python leaderboard-submissions/gh-print-new-results.py ${{ steps.eval.outputs.written-results }} | gh pr comment ${{ github.event.number }} --edit-last -F -
- name: Commit results files to PR
if: steps.eval.outputs.changed > 0
run: |
cp ${{ steps.eval.outputs.written-results }} _pr_submission/leaderboard-submissions/results/
cd _pr_submission
git config user.name github-actions
git config user.email [email protected]
git add leaderboard-submissions/results
git commit -m "leaderboard: add eval results"
git push