Mutation Testing Experiment (OpenRouter) #12
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Mutation Testing Experiment (OpenRouter) | |
on: | |
workflow_dispatch: | |
inputs: | |
packages: | |
description: "JSON file that specifies packages to generate mutants for" | |
default: "benchmarks.json" | |
template: | |
description: "Template to use for generating mutants" | |
default: "template-full" | |
systemPrompt: | |
description: "System prompt to use when obtaining completions" | |
default: "SystemPrompt-MutationTestingExpert" | |
temperature: | |
type: number | |
description: "Sampling temperature to try when obtaining completions" | |
default: 0.0 | |
model: | |
description: "Which LLM API to use" | |
type: choice | |
options: | |
- "meta-llama/llama-3.3-70b-instruct" | |
- "mistralai/codestral-mamba" | |
default: "meta-llama/llama-3.3-70b-instruct" | |
# rateLimit: | |
# description: "number of milliseconds between requests sent to the LLM" | |
# type: number | |
# default: 0 | |
benchmarkMode: | |
description: "use custom rate limiting for benchmarking" | |
type: "boolean" | |
default: false | |
maxNrPrompts: | |
type: number | |
description: "maximum number of prompts to produce" | |
default: 2000 | |
maxTokensInCompletion: | |
type: number | |
description: "maximum number of tokens in a completion" | |
default: 250 | |
strykerOptions: | |
description: "stryker options (e.g., --concurrency 1) to pass to stryker" | |
default: "--concurrency 1" | |
debug_enabled: | |
type: boolean | |
description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)" | |
default: false | |
jobs: | |
setup: | |
runs-on: ubuntu-latest | |
outputs: | |
packages: "${{ steps.parse_packages.outputs.packages }}" | |
temperatures: "${{ github.event.inputs.temperature || '0.0' }}" | |
maxNrPrompts: "${{ github.event.inputs.maxNrPrompts || '2000' }}" | |
strykerOptions: "${{ github.event.inputs.strykerOptions }}" | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: actions/setup-node@v3 | |
with: | |
node-version: 20 | |
- id: parse_packages | |
run: | | |
packages=$(node \ | |
.github/find_benchmarks.js " \ | |
.github/${{ github.event.inputs.packages || 'benchmarks.json' }}") | |
packages=$(echo $packages | tr '\n' ' ') | |
echo "packages=$packages" >> $GITHUB_OUTPUT | |
benchmark: | |
needs: | |
- setup | |
runs-on: ubuntu-latest | |
continue-on-error: true | |
strategy: | |
fail-fast: false | |
matrix: | |
package: ${{ fromJson(needs.setup.outputs.packages) }} | |
steps: | |
- name: checkout master branch of StrykerJS from https://github.com/neu-se/stryker-js into directory stryker-js | |
uses: actions/checkout@v3 | |
with: | |
repository: neu-se/stryker-js | |
ref: master | |
path: stryker-js | |
- name: build StrykerJS | |
run: | | |
cd stryker-js | |
npm install | |
npm run build | |
- name: print package info | |
run: | | |
echo "package.host=${{ matrix.package.host }}" | |
echo "package.name=${{ matrix.package.name }}" | |
echo "package.owner=${{ matrix.package.owner }}" | |
echo "package.repo=${{ matrix.package.repo }}" | |
echo "package.sha=${{ matrix.package.sha }}" | |
echo "package.edits=${{ matrix.package.edits }}" | |
echo "package.files=${{ matrix.package.files }}" | |
echo "package.ignore=${{ matrix.package.ignore }}" | |
# print the various options | |
echo "temperature=${{ needs.setup.outputs.temperatures }}" | |
echo "template=${{ github.event.inputs.template }}" | |
echo "systemPrompt=${{ github.event.inputs.systemPrompt }}" | |
echo "model=${{ github.event.inputs.model }}" | |
echo "benchmarkMode=${{ github.event.inputs.benchmarkMode }}" | |
echo "maxNrPrompts=${{ needs.setup.outputs.maxNrPrompts }}" | |
echo "maxTokensInCompletion=${{ github.event.inputs.maxTokensInCompletion }}" | |
echo "strykerOptions=${{ needs.setup.outputs.strykerOptions }}" | |
- name: check out benchmark | |
if: ${{ matrix.package.host == 'github.com' }} | |
uses: actions/checkout@v3 | |
with: | |
repository: ${{ format('{0}/{1}', matrix.package.owner, matrix.package.repo) }} | |
ref: ${{ matrix.package.sha }} | |
path: ${{ matrix.package.name }} | |
- name: Checkout gitlab package repo | |
if: ${{ matrix.package.host == 'gitlab.com' }} | |
run: | | |
git clone ${{ format('https://gitlab.com/{0}/{1}', matrix.package.owner, matrix.package.repo) }} ${{ matrix.package.name }} | |
cd ${{ matrix.package.name }} | |
git checkout ${{ matrix.package.sha }} | |
- name: build project | |
run: | | |
cd ${{ matrix.package.name }} | |
# if an edit command for editing package.json is specified (e.g., to disable linting), run it | |
if [ -n "$MATRIX_PACKAGE_EDITS" ]; then | |
${{ matrix.package.edits }} | |
fi | |
npm install | |
# if a build script exists, run it | |
npm run build || echo "No build script found" | |
env: | |
MATRIX_PACKAGE_EDITS: ${{ matrix.package.edits }} | |
- name: Check out llm-mutation-testing | |
uses: actions/checkout@v3 | |
with: | |
path: llm-mutation-testing | |
- name: Set up llm-mutation-testing | |
run: | | |
cd llm-mutation-testing | |
npm run build | |
- name: Generate mutants | |
env: | |
LLMORPHEUS_LLM_API_ENDPOINT: '${{ secrets.OPENROUTER_LLM_API_ENDPOINT }}' | |
LLMORPHEUS_LLM_AUTH_HEADERS: '${{ secrets.OPENROUTER_LLM_AUTH_HEADERS }}' | |
run: | | |
cd ${{ matrix.package.name }} | |
BENCHMARK_DIR=`pwd` | |
cd ../llm-mutation-testing | |
echo "Generating mutants for ${{ matrix.package.name }}" | |
LLMORPHEUS_OPTIONS="--temperature ${{ needs.setup.outputs.temperatures }} --maxNrPrompts ${{ needs.setup.outputs.maxNrPrompts }} --systemPrompt ${{ github.event.inputs.systemPrompt }}.txt --model ${{ github.event.inputs.model }} --benchmark ${{ github.event.inputs.benchmarkMode }} --maxTokens ${{ github.event.inputs.maxTokensInCompletion }}" | |
(time node --max-old-space-size=6144 benchmark/createMutants.js --path $BENCHMARK_DIR --mutate "${{ matrix.package.files }}" --ignore "${{ matrix.package.ignore }}" --template templates/${{ github.event.inputs.template }}.hb --caching false $LLMORPHEUS_OPTIONS) 2>&1 | tee -a ../${{ matrix.package.name }}/LLMorpheusOutput.txt | |
- name: extract mutant-related info | |
run: | | |
cd ${{ matrix.package.name }} | |
mkdir ${{ matrix.package.name }} | |
mkdir ${{ matrix.package.name }}/${{ matrix.package.name }} | |
cp -r MUTATION_TESTING/*/* ${{ matrix.package.name }}/${{ matrix.package.name }} | |
cp LLMorpheusOutput.txt ${{ matrix.package.name }}/${{ matrix.package.name }}/LLMorpheusOutput.txt | |
- name: Upload mutants info | |
uses: actions/upload-artifact@v4 | |
with: | |
name: mutants-${{ matrix.package.name }} | |
path: ${{ matrix.package.name }}/${{ matrix.package.name }} | |
- name: Setup tmate session | |
uses: mxschmitt/action-tmate@v3 | |
if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} | |
- name: install stryker-js into benchmark and run stryker (precomputed mutators) | |
run: | | |
cd ${{ matrix.package.name }} | |
npm install install-local # install-local is needed to install our custom version of stryker-js | |
npx install-local ../stryker-js/packages/{core,util,api,instrumenter,*-runner} --legacy-peer-deps | |
# need to update modelname by replacing slash with underscore | |
export MODELNAME=$(echo ${{ github.event.inputs.model }} | sed 's/\//_/g') | |
echo "template is ${{ github.event.inputs.template }}" | |
echo "MODELNAME is $MODELNAME" | |
echo "temperature is ${{ github.event.inputs.temperature }}" | |
export MUTANTS_FILE="MUTATION_TESTING/${{ github.event.inputs.template }}_${MODELNAME}_${{ github.event.inputs.temperature }}/mutants.json" | |
echo "MUTANTS_FILE is $MUTANTS_FILE" | |
STRYKER_FILES=$(node ../llm-mutation-testing/.github/expandGlob.js $(pwd) "${{ matrix.package.files }}" "${{ matrix.package.ignore }}") | |
STRYKER_OPTIONS="${{ needs.setup.outputs.strykerOptions }}" | |
(time npx stryker run $STRYKER_OPTIONS --usePrecomputed --mutate $STRYKER_FILES) 2>&1 | tee -a StrykerOutput.txt | |
- name: extract summary from StrykerOutput.txt | |
run: | | |
cd ${{ matrix.package.name }} | |
node ../llm-mutation-testing/.github/parseStrykerReport.js StrykerOutput.txt | |
- name: gather reports/mutation/mutation.*, StrykerOutput.txt, and StrykerInfo.json into results.zip | |
run: | | |
cd ${{ matrix.package.name }} | |
mkdir results | |
mkdir results/results-${{ matrix.package.name }} | |
cp reports/mutation/mutation.* results/results-${{ matrix.package.name }}/ | |
cp StrykerOutput.txt results/results-${{ matrix.package.name }}/StrykerOutput.txt | |
cp StrykerInfo.json results/results-${{ matrix.package.name }}/StrykerInfo.json | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: results-${{ matrix.package.name }} | |
path: ${{ matrix.package.name }}/results | |
combine_output: | |
name: Combine output from all benchmarks | |
needs: | |
- setup | |
- benchmark | |
runs-on: ubuntu-latest | |
steps: | |
- name: Download output zips | |
uses: actions/download-artifact@v4 | |
- name: Combine outputs | |
run: | | |
mkdir results | |
mkdir results/results | |
cd results/results | |
# move all benchmark info into a single directory | |
for benchmark in ../../results-* | |
do | |
# extract benchmark name | |
name=$(echo $benchmark | sed 's/..\/..\/results-//') | |
mv $benchmark/results-$name $name | |
done | |
cd ../.. | |
- name: Upload combined output files | |
uses: actions/upload-artifact@v4 | |
with: | |
name: results | |
path: results/results | |
generate_report: | |
name: Generate report | |
needs: | |
- setup | |
- combine_output | |
runs-on: ubuntu-latest | |
steps: | |
# download all files mutants-* | |
- name: Download LLMorpheus results | |
uses: actions/download-artifact@v4 | |
with: | |
pattern: "mutants-*" | |
# download all files results-* | |
- name: Download Stryker results | |
uses: actions/download-artifact@v4 | |
with: | |
pattern: "results-*" | |
# move all mutants-* directories into a single directory | |
- name: Gather mutants into single directory | |
run: | | |
mkdir mutants | |
cd mutants | |
for mutants in ../mutants-* | |
do | |
# remove "../mutants-" prefix, leaving only the benchmark name | |
name=$(echo $mutants | sed 's/\.\.\/mutants-//') | |
echo "moving $mutants to $name" | |
mv $mutants/* $name | |
done | |
cd .. | |
# move all results-* directories into a single directory | |
- name: Gather Stryker results into single directory | |
run: | | |
mkdir results | |
cd results | |
for results in ../results-* | |
do | |
# extract benchmark name | |
name=$(echo $results | sed 's/\.\.\/results-//') | |
echo "moving $results to $name" | |
mv $results/* $name | |
done | |
cd .. | |
- name: Check out llm-mutation-testing | |
uses: actions/checkout@v3 | |
with: | |
path: llm-mutation-testing | |
- name: Generate report | |
run: | | |
cd llm-mutation-testing | |
npm run build | |
# if stryker options are specified, include them in the title | |
STRYKER_OPTIONS="${{ needs.setup.outputs.strykerOptions }}" | |
if [ -n "$STRYKER_OPTIONS" ]; then | |
title="Report (Precomputed mutators $STRYKER_OPTIONS)" | |
else | |
title="Report (Precomputed mutators)" | |
fi | |
node .github/generateReport.js "$title" ../results ../mutants > report.md | |
more report.md > $GITHUB_STEP_SUMMARY | |
- name: Upload report | |
uses: actions/upload-artifact@v4 | |
with: | |
name: report.md | |
path: llm-mutation-testing/report.md | |
- name: generate Latex table | |
run: | | |
cd llm-mutation-testing | |
node .github/generateLatexTable.js "$title" ../results ../mutants > table.tex | |
- name: Upload table | |
uses: actions/upload-artifact@v4 | |
with: | |
name: table.tex | |
path: llm-mutation-testing/table.tex | |