Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add separate workflow for OpenRouter experiments #8

Merged
merged 1 commit into from
Dec 20, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
335 changes: 335 additions & 0 deletions .github/workflows/openrouter-exp.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,335 @@
name: Mutation Testing Experiment

on:
workflow_dispatch:
inputs:
packages:
description: "JSON file that specifies packages to generate mutants for"
default: "benchmarks.json"
template:
description: "Template to use for generating mutants"
default: "template-full"
systemPrompt:
description: "System prompt to use when obtaining completions"
default: "SystemPrompt-MutationTestingExpert"
temperature:
type: number
description: "Sampling temperature to try when obtaining completions"
default: 0.0
model:
description: "Which LLM API to use"
type: choice
options:
- "meta-llama/llama-3.3-70b-instruct"
- "mistralai/codestral-mamba"
default: "meta-llama/llama-3.3-70b-instruct"
# rateLimit:
# description: "number of milliseconds between requests sent to the LLM"
# type: number
# default: 0
benchmarkMode:
description: "use custom rate limiting for benchmarking"
type: "boolean"
default: false
maxNrPrompts:
type: number
description: "maximum number of prompts to produce"
default: 2000
maxTokensInCompletion:
type: number
description: "maximum number of tokens in a completion"
default: 250
strykerOptions:
description: "stryker options (e.g., --concurrency 1) to pass to stryker"
default: "--concurrency 1"
debug_enabled:
type: boolean
description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)"
default: false

jobs:
setup:
runs-on: ubuntu-latest
outputs:
packages: "${{ steps.parse_packages.outputs.packages }}"
temperatures: "${{ github.event.inputs.temperature || '0.0' }}"
maxNrPrompts: "${{ github.event.inputs.maxNrPrompts || '2000' }}"
strykerOptions: "${{ github.event.inputs.strykerOptions }}"
steps:
- uses: actions/checkout@v3

- uses: actions/setup-node@v3
with:
node-version: 20

- id: parse_packages
run: |
packages=$(node \
.github/find_benchmarks.js " \
.github/${{ github.event.inputs.packages || 'benchmarks.json' }}")
packages=$(echo $packages | tr '\n' ' ')
echo "packages=$packages" >> $GITHUB_OUTPUT

benchmark:
needs:
- setup
runs-on: ubuntu-latest

continue-on-error: true
strategy:
fail-fast: false
matrix:
package: ${{ fromJson(needs.setup.outputs.packages) }}
steps:
- name: checkout master branch of StrykerJS from https://github.com/neu-se/stryker-js into directory stryker-js
uses: actions/checkout@v3
with:
repository: neu-se/stryker-js
ref: master
path: stryker-js

- name: build StrykerJS
run: |
cd stryker-js
npm install
npm run build

- name: print package info
run: |
echo "package.host=${{ matrix.package.host }}"
echo "package.name=${{ matrix.package.name }}"
echo "package.owner=${{ matrix.package.owner }}"
echo "package.repo=${{ matrix.package.repo }}"
echo "package.sha=${{ matrix.package.sha }}"
echo "package.edits=${{ matrix.package.edits }}"
echo "package.files=${{ matrix.package.files }}"
echo "package.ignore=${{ matrix.package.ignore }}"

# print the various options
echo "temperature=${{ needs.setup.outputs.temperatures }}"
echo "template=${{ github.event.inputs.template }}"
echo "systemPrompt=${{ github.event.inputs.systemPrompt }}"
echo "model=${{ github.event.inputs.model }}"
echo "benchmarkMode=${{ github.event.inputs.benchmarkMode }}"
echo "maxNrPrompts=${{ needs.setup.outputs.maxNrPrompts }}"
echo "maxTokensInCompletion=${{ github.event.inputs.maxTokensInCompletion }}"
echo "strykerOptions=${{ needs.setup.outputs.strykerOptions }}"

- name: check out benchmark
if: ${{ matrix.package.host == 'github.com' }}
uses: actions/checkout@v3
with:
repository: ${{ format('{0}/{1}', matrix.package.owner, matrix.package.repo) }}
ref: ${{ matrix.package.sha }}
path: ${{ matrix.package.name }}

- name: Checkout gitlab package repo
if: ${{ matrix.package.host == 'gitlab.com' }}
run: |
git clone ${{ format('https://gitlab.com/{0}/{1}', matrix.package.owner, matrix.package.repo) }} ${{ matrix.package.name }}
cd ${{ matrix.package.name }}
git checkout ${{ matrix.package.sha }}

- name: Setup tmate session
uses: mxschmitt/action-tmate@v3
if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}

- name: build project
run: |
cd ${{ matrix.package.name }}
# if an edit command for editing package.json is specified (e.g., to disable linting), run it
if [ -n "$MATRIX_PACKAGE_EDITS" ]; then
${{ matrix.package.edits }}
fi
npm install
# if a build script exists, run it
npm run build || echo "No build script found"
env:
MATRIX_PACKAGE_EDITS: ${{ matrix.package.edits }}

- name: Check out llm-mutation-testing
uses: actions/checkout@v3
with:
path: llm-mutation-testing

- name: Set up llm-mutation-testing
run: |
cd llm-mutation-testing
npm run build

- name: Generate mutants
env:
LLMORPHEUS_LLM_API_ENDPOINT: '${{ secrets.OPENROUTER_LLM_API_ENDPOINT }}'
LLMORPHEUS_LLM_AUTH_HEADERS: '${{ secrets.OPENROUTER_LLM_AUTH_HEADERS }}'
run: |
cd ${{ matrix.package.name }}
BENCHMARK_DIR=`pwd`
cd ../llm-mutation-testing
echo "Generating mutants for ${{ matrix.package.name }}"
LLMORPHEUS_OPTIONS="--temperature ${{ needs.setup.outputs.temperatures }} --maxNrPrompts ${{ needs.setup.outputs.maxNrPrompts }} --systemPrompt ${{ github.event.inputs.systemPrompt }}.txt --model ${{ github.event.inputs.model }} --benchmark ${{ github.event.inputs.benchmarkMode }} --maxTokens ${{ github.event.inputs.maxTokensInCompletion }}"
(time node --max-old-space-size=6144 benchmark/createMutants.js --path $BENCHMARK_DIR --mutate "${{ matrix.package.files }}" --ignore "${{ matrix.package.ignore }}" --template templates/${{ github.event.inputs.template }}.hb --caching false $LLMORPHEUS_OPTIONS) 2>&1 | tee -a ../${{ matrix.package.name }}/LLMorpheusOutput.txt

- name: extract mutant-related info
run: |
cd ${{ matrix.package.name }}
mkdir ${{ matrix.package.name }}
mkdir ${{ matrix.package.name }}/${{ matrix.package.name }}
cp -r MUTATION_TESTING/*/* ${{ matrix.package.name }}/${{ matrix.package.name }}
cp LLMorpheusOutput.txt ${{ matrix.package.name }}/${{ matrix.package.name }}/LLMorpheusOutput.txt

- name: Upload mutants info
uses: actions/upload-artifact@v4
with:
name: mutants-${{ matrix.package.name }}
path: ${{ matrix.package.name }}/${{ matrix.package.name }}

- name: install stryker-js into benchmark and run stryker (precomputed mutators)
run: |
cd ${{ matrix.package.name }}
npm install install-local # install-local is needed to install our custom version of stryker-js
npx install-local ../stryker-js/packages/{core,util,api,instrumenter,*-runner} --legacy-peer-deps
export MUTANTS_FILE=MUTATION_TESTING/${{ github.event.inputs.template }}_${{ github.event.inputs.model }}_${{ github.event.inputs.temperature }}/mutants.json
STRYKER_FILES=$(node ../llm-mutation-testing/.github/expandGlob.js $(pwd) "${{ matrix.package.files }}" "${{ matrix.package.ignore }}")
STRYKER_OPTIONS="${{ needs.setup.outputs.strykerOptions }}"
(time npx stryker run $STRYKER_OPTIONS --usePrecomputed --mutate $STRYKER_FILES) 2>&1 | tee -a StrykerOutput.txt

- name: extract summary from StrykerOutput.txt
run: |
cd ${{ matrix.package.name }}
node ../llm-mutation-testing/.github/parseStrykerReport.js StrykerOutput.txt

- name: gather reports/mutation/mutation.*, StrykerOutput.txt, and StrykerInfo.json into results.zip
run: |
cd ${{ matrix.package.name }}
mkdir results
mkdir results/results-${{ matrix.package.name }}
cp reports/mutation/mutation.* results/results-${{ matrix.package.name }}/
cp StrykerOutput.txt results/results-${{ matrix.package.name }}/StrykerOutput.txt
cp StrykerInfo.json results/results-${{ matrix.package.name }}/StrykerInfo.json

- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: results-${{ matrix.package.name }}
path: ${{ matrix.package.name }}/results

combine_output:
name: Combine output from all benchmarks
needs:
- setup
- benchmark
runs-on: ubuntu-latest
steps:
- name: Download output zips
uses: actions/download-artifact@v4
- name: Combine outputs
run: |
mkdir results
mkdir results/results
cd results/results

# move all benchmark info into a single directory
for benchmark in ../../results-*
do
# extract benchmark name
name=$(echo $benchmark | sed 's/..\/..\/results-//')
mv $benchmark/results-$name $name
done
cd ../..

- name: Upload combined output files
uses: actions/upload-artifact@v4
with:
name: results
path: results/results

generate_report:
name: Generate report
needs:
- setup
- combine_output
runs-on: ubuntu-latest

steps:


# download all files mutants-*
- name: Download LLMorpheus results
uses: actions/download-artifact@v4
with:
pattern: "mutants-*"

# download all files results-*
- name: Download Stryker results
uses: actions/download-artifact@v4
with:
pattern: "results-*"

# move all mutants-* directories into a single directory
- name: Gather mutants into single directory
run: |
mkdir mutants
cd mutants
for mutants in ../mutants-*
do
# remove "../mutants-" prefix, leaving only the benchmark name
name=$(echo $mutants | sed 's/\.\.\/mutants-//')
echo "moving $mutants to $name"
mv $mutants/* $name
done
cd ..

# move all results-* directories into a single directory
- name: Gather Stryker results into single directory
run: |
mkdir results
cd results
for results in ../results-*
do
# extract benchmark name
name=$(echo $results | sed 's/\.\.\/results-//')
echo "moving $results to $name"
mv $results/* $name
done
cd ..

- name: Check out llm-mutation-testing
uses: actions/checkout@v3
with:
path: llm-mutation-testing

- name: Generate report
run: |
cd llm-mutation-testing
npm run build
# if stryker options are specified, include them in the title
STRYKER_OPTIONS="${{ needs.setup.outputs.strykerOptions }}"
if [ -n "$STRYKER_OPTIONS" ]; then
title="Report (Precomputed mutators $STRYKER_OPTIONS)"
else
title="Report (Precomputed mutators)"
fi
node .github/generateReport.js "$title" ../results ../mutants > report.md
more report.md > $GITHUB_STEP_SUMMARY

- name: Upload report
uses: actions/upload-artifact@v4
with:
name: report.md
path: llm-mutation-testing/report.md

- name: generate Latex table
run: |
cd llm-mutation-testing
node .github/generateLatexTable.js "$title" ../results ../mutants > table.tex

- name: Upload table
uses: actions/upload-artifact@v4
with:
name: table.tex
path: llm-mutation-testing/table.tex





Loading