Skip to content

Mutation Testing Experiment (OpenRouter) #12

Mutation Testing Experiment (OpenRouter)

Mutation Testing Experiment (OpenRouter) #12

name: Mutation Testing Experiment (OpenRouter)
on:
workflow_dispatch:
inputs:
packages:
description: "JSON file that specifies packages to generate mutants for"
default: "benchmarks.json"
template:
description: "Template to use for generating mutants"
default: "template-full"
systemPrompt:
description: "System prompt to use when obtaining completions"
default: "SystemPrompt-MutationTestingExpert"
temperature:
type: number
description: "Sampling temperature to try when obtaining completions"
default: 0.0
model:
description: "Which LLM API to use"
type: choice
options:
- "meta-llama/llama-3.3-70b-instruct"
- "mistralai/codestral-mamba"
default: "meta-llama/llama-3.3-70b-instruct"
# rateLimit:
# description: "number of milliseconds between requests sent to the LLM"
# type: number
# default: 0
benchmarkMode:
description: "use custom rate limiting for benchmarking"
type: "boolean"
default: false
maxNrPrompts:
type: number
description: "maximum number of prompts to produce"
default: 2000
maxTokensInCompletion:
type: number
description: "maximum number of tokens in a completion"
default: 250
strykerOptions:
description: "stryker options (e.g., --concurrency 1) to pass to stryker"
default: "--concurrency 1"
debug_enabled:
type: boolean
description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)"
default: false
jobs:
setup:
runs-on: ubuntu-latest
outputs:
packages: "${{ steps.parse_packages.outputs.packages }}"
temperatures: "${{ github.event.inputs.temperature || '0.0' }}"
maxNrPrompts: "${{ github.event.inputs.maxNrPrompts || '2000' }}"
strykerOptions: "${{ github.event.inputs.strykerOptions }}"
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
with:
node-version: 20
- id: parse_packages
run: |
packages=$(node \
.github/find_benchmarks.js " \
.github/${{ github.event.inputs.packages || 'benchmarks.json' }}")
packages=$(echo $packages | tr '\n' ' ')
echo "packages=$packages" >> $GITHUB_OUTPUT
benchmark:
needs:
- setup
runs-on: ubuntu-latest
continue-on-error: true
strategy:
fail-fast: false
matrix:
package: ${{ fromJson(needs.setup.outputs.packages) }}
steps:
- name: checkout master branch of StrykerJS from https://github.com/neu-se/stryker-js into directory stryker-js
uses: actions/checkout@v3
with:
repository: neu-se/stryker-js
ref: master
path: stryker-js
- name: build StrykerJS
run: |
cd stryker-js
npm install
npm run build
- name: print package info
run: |
echo "package.host=${{ matrix.package.host }}"
echo "package.name=${{ matrix.package.name }}"
echo "package.owner=${{ matrix.package.owner }}"
echo "package.repo=${{ matrix.package.repo }}"
echo "package.sha=${{ matrix.package.sha }}"
echo "package.edits=${{ matrix.package.edits }}"
echo "package.files=${{ matrix.package.files }}"
echo "package.ignore=${{ matrix.package.ignore }}"
# print the various options
echo "temperature=${{ needs.setup.outputs.temperatures }}"
echo "template=${{ github.event.inputs.template }}"
echo "systemPrompt=${{ github.event.inputs.systemPrompt }}"
echo "model=${{ github.event.inputs.model }}"
echo "benchmarkMode=${{ github.event.inputs.benchmarkMode }}"
echo "maxNrPrompts=${{ needs.setup.outputs.maxNrPrompts }}"
echo "maxTokensInCompletion=${{ github.event.inputs.maxTokensInCompletion }}"
echo "strykerOptions=${{ needs.setup.outputs.strykerOptions }}"
- name: check out benchmark
if: ${{ matrix.package.host == 'github.com' }}
uses: actions/checkout@v3
with:
repository: ${{ format('{0}/{1}', matrix.package.owner, matrix.package.repo) }}
ref: ${{ matrix.package.sha }}
path: ${{ matrix.package.name }}
- name: Checkout gitlab package repo
if: ${{ matrix.package.host == 'gitlab.com' }}
run: |
git clone ${{ format('https://gitlab.com/{0}/{1}', matrix.package.owner, matrix.package.repo) }} ${{ matrix.package.name }}
cd ${{ matrix.package.name }}
git checkout ${{ matrix.package.sha }}
- name: build project
run: |
cd ${{ matrix.package.name }}
# if an edit command for editing package.json is specified (e.g., to disable linting), run it
if [ -n "$MATRIX_PACKAGE_EDITS" ]; then
${{ matrix.package.edits }}
fi
npm install
# if a build script exists, run it
npm run build || echo "No build script found"
env:
MATRIX_PACKAGE_EDITS: ${{ matrix.package.edits }}
- name: Check out llm-mutation-testing
uses: actions/checkout@v3
with:
path: llm-mutation-testing
- name: Set up llm-mutation-testing
run: |
cd llm-mutation-testing
npm run build
- name: Generate mutants
env:
LLMORPHEUS_LLM_API_ENDPOINT: '${{ secrets.OPENROUTER_LLM_API_ENDPOINT }}'
LLMORPHEUS_LLM_AUTH_HEADERS: '${{ secrets.OPENROUTER_LLM_AUTH_HEADERS }}'
run: |
cd ${{ matrix.package.name }}
BENCHMARK_DIR=`pwd`
cd ../llm-mutation-testing
echo "Generating mutants for ${{ matrix.package.name }}"
LLMORPHEUS_OPTIONS="--temperature ${{ needs.setup.outputs.temperatures }} --maxNrPrompts ${{ needs.setup.outputs.maxNrPrompts }} --systemPrompt ${{ github.event.inputs.systemPrompt }}.txt --model ${{ github.event.inputs.model }} --benchmark ${{ github.event.inputs.benchmarkMode }} --maxTokens ${{ github.event.inputs.maxTokensInCompletion }}"
(time node --max-old-space-size=6144 benchmark/createMutants.js --path $BENCHMARK_DIR --mutate "${{ matrix.package.files }}" --ignore "${{ matrix.package.ignore }}" --template templates/${{ github.event.inputs.template }}.hb --caching false $LLMORPHEUS_OPTIONS) 2>&1 | tee -a ../${{ matrix.package.name }}/LLMorpheusOutput.txt
- name: extract mutant-related info
run: |
cd ${{ matrix.package.name }}
mkdir ${{ matrix.package.name }}
mkdir ${{ matrix.package.name }}/${{ matrix.package.name }}
cp -r MUTATION_TESTING/*/* ${{ matrix.package.name }}/${{ matrix.package.name }}
cp LLMorpheusOutput.txt ${{ matrix.package.name }}/${{ matrix.package.name }}/LLMorpheusOutput.txt
- name: Upload mutants info
uses: actions/upload-artifact@v4
with:
name: mutants-${{ matrix.package.name }}
path: ${{ matrix.package.name }}/${{ matrix.package.name }}
- name: Setup tmate session
uses: mxschmitt/action-tmate@v3
if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
- name: install stryker-js into benchmark and run stryker (precomputed mutators)
run: |
cd ${{ matrix.package.name }}
npm install install-local # install-local is needed to install our custom version of stryker-js
npx install-local ../stryker-js/packages/{core,util,api,instrumenter,*-runner} --legacy-peer-deps
# need to update modelname by replacing slash with underscore
export MODELNAME=$(echo ${{ github.event.inputs.model }} | sed 's/\//_/g')
echo "template is ${{ github.event.inputs.template }}"
echo "MODELNAME is $MODELNAME"
echo "temperature is ${{ github.event.inputs.temperature }}"
export MUTANTS_FILE="MUTATION_TESTING/${{ github.event.inputs.template }}_${MODELNAME}_${{ github.event.inputs.temperature }}/mutants.json"
echo "MUTANTS_FILE is $MUTANTS_FILE"
STRYKER_FILES=$(node ../llm-mutation-testing/.github/expandGlob.js $(pwd) "${{ matrix.package.files }}" "${{ matrix.package.ignore }}")
STRYKER_OPTIONS="${{ needs.setup.outputs.strykerOptions }}"
(time npx stryker run $STRYKER_OPTIONS --usePrecomputed --mutate $STRYKER_FILES) 2>&1 | tee -a StrykerOutput.txt
- name: extract summary from StrykerOutput.txt
run: |
cd ${{ matrix.package.name }}
node ../llm-mutation-testing/.github/parseStrykerReport.js StrykerOutput.txt
- name: gather reports/mutation/mutation.*, StrykerOutput.txt, and StrykerInfo.json into results.zip
run: |
cd ${{ matrix.package.name }}
mkdir results
mkdir results/results-${{ matrix.package.name }}
cp reports/mutation/mutation.* results/results-${{ matrix.package.name }}/
cp StrykerOutput.txt results/results-${{ matrix.package.name }}/StrykerOutput.txt
cp StrykerInfo.json results/results-${{ matrix.package.name }}/StrykerInfo.json
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: results-${{ matrix.package.name }}
path: ${{ matrix.package.name }}/results
combine_output:
name: Combine output from all benchmarks
needs:
- setup
- benchmark
runs-on: ubuntu-latest
steps:
- name: Download output zips
uses: actions/download-artifact@v4
- name: Combine outputs
run: |
mkdir results
mkdir results/results
cd results/results
# move all benchmark info into a single directory
for benchmark in ../../results-*
do
# extract benchmark name
name=$(echo $benchmark | sed 's/..\/..\/results-//')
mv $benchmark/results-$name $name
done
cd ../..
- name: Upload combined output files
uses: actions/upload-artifact@v4
with:
name: results
path: results/results
generate_report:
name: Generate report
needs:
- setup
- combine_output
runs-on: ubuntu-latest
steps:
# download all files mutants-*
- name: Download LLMorpheus results
uses: actions/download-artifact@v4
with:
pattern: "mutants-*"
# download all files results-*
- name: Download Stryker results
uses: actions/download-artifact@v4
with:
pattern: "results-*"
# move all mutants-* directories into a single directory
- name: Gather mutants into single directory
run: |
mkdir mutants
cd mutants
for mutants in ../mutants-*
do
# remove "../mutants-" prefix, leaving only the benchmark name
name=$(echo $mutants | sed 's/\.\.\/mutants-//')
echo "moving $mutants to $name"
mv $mutants/* $name
done
cd ..
# move all results-* directories into a single directory
- name: Gather Stryker results into single directory
run: |
mkdir results
cd results
for results in ../results-*
do
# extract benchmark name
name=$(echo $results | sed 's/\.\.\/results-//')
echo "moving $results to $name"
mv $results/* $name
done
cd ..
- name: Check out llm-mutation-testing
uses: actions/checkout@v3
with:
path: llm-mutation-testing
- name: Generate report
run: |
cd llm-mutation-testing
npm run build
# if stryker options are specified, include them in the title
STRYKER_OPTIONS="${{ needs.setup.outputs.strykerOptions }}"
if [ -n "$STRYKER_OPTIONS" ]; then
title="Report (Precomputed mutators $STRYKER_OPTIONS)"
else
title="Report (Precomputed mutators)"
fi
node .github/generateReport.js "$title" ../results ../mutants > report.md
more report.md > $GITHUB_STEP_SUMMARY
- name: Upload report
uses: actions/upload-artifact@v4
with:
name: report.md
path: llm-mutation-testing/report.md
- name: generate Latex table
run: |
cd llm-mutation-testing
node .github/generateLatexTable.js "$title" ../results ../mutants > table.tex
- name: Upload table
uses: actions/upload-artifact@v4
with:
name: table.tex
path: llm-mutation-testing/table.tex