diff --git a/.github/workflows/openrouter-exp.yml b/.github/workflows/openrouter-exp.yml new file mode 100644 index 0000000..61e3224 --- /dev/null +++ b/.github/workflows/openrouter-exp.yml @@ -0,0 +1,335 @@ +name: Mutation Testing Experiment + +on: + workflow_dispatch: + inputs: + packages: + description: "JSON file that specifies packages to generate mutants for" + default: "benchmarks.json" + template: + description: "Template to use for generating mutants" + default: "template-full" + systemPrompt: + description: "System prompt to use when obtaining completions" + default: "SystemPrompt-MutationTestingExpert" + temperature: + type: number + description: "Sampling temperature to try when obtaining completions" + default: 0.0 + model: + description: "Which LLM API to use" + type: choice + options: + - "meta-llama/llama-3.3-70b-instruct" + - "mistralai/codestral-mamba" + default: "meta-llama/llama-3.3-70b-instruct" + # rateLimit: + # description: "number of milliseconds between requests sent to the LLM" + # type: number + # default: 0 + benchmarkMode: + description: "use custom rate limiting for benchmarking" + type: "boolean" + default: false + maxNrPrompts: + type: number + description: "maximum number of prompts to produce" + default: 2000 + maxTokensInCompletion: + type: number + description: "maximum number of tokens in a completion" + default: 250 + strykerOptions: + description: "stryker options (e.g., --concurrency 1) to pass to stryker" + default: "--concurrency 1" + debug_enabled: + type: boolean + description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)" + default: false + +jobs: + setup: + runs-on: ubuntu-latest + outputs: + packages: "${{ steps.parse_packages.outputs.packages }}" + temperatures: "${{ github.event.inputs.temperature || '0.0' }}" + maxNrPrompts: "${{ github.event.inputs.maxNrPrompts || '2000' }}" + strykerOptions: "${{ github.event.inputs.strykerOptions }}" + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + with: + node-version: 20 + + - id: parse_packages + run: | + packages=$(node \ + .github/find_benchmarks.js " \ + .github/${{ github.event.inputs.packages || 'benchmarks.json' }}") + packages=$(echo $packages | tr '\n' ' ') + echo "packages=$packages" >> $GITHUB_OUTPUT + + benchmark: + needs: + - setup + runs-on: ubuntu-latest + + continue-on-error: true + strategy: + fail-fast: false + matrix: + package: ${{ fromJson(needs.setup.outputs.packages) }} + steps: + - name: checkout master branch of StrykerJS from https://github.com/neu-se/stryker-js into directory stryker-js + uses: actions/checkout@v3 + with: + repository: neu-se/stryker-js + ref: master + path: stryker-js + + - name: build StrykerJS + run: | + cd stryker-js + npm install + npm run build + + - name: print package info + run: | + echo "package.host=${{ matrix.package.host }}" + echo "package.name=${{ matrix.package.name }}" + echo "package.owner=${{ matrix.package.owner }}" + echo "package.repo=${{ matrix.package.repo }}" + echo "package.sha=${{ matrix.package.sha }}" + echo "package.edits=${{ matrix.package.edits }}" + echo "package.files=${{ matrix.package.files }}" + echo "package.ignore=${{ matrix.package.ignore }}" + + # print the various options + echo "temperature=${{ needs.setup.outputs.temperatures }}" + echo "template=${{ github.event.inputs.template }}" + echo "systemPrompt=${{ github.event.inputs.systemPrompt }}" + echo "model=${{ github.event.inputs.model }}" + echo "benchmarkMode=${{ github.event.inputs.benchmarkMode }}" + echo "maxNrPrompts=${{ needs.setup.outputs.maxNrPrompts }}" + echo "maxTokensInCompletion=${{ github.event.inputs.maxTokensInCompletion }}" + echo "strykerOptions=${{ needs.setup.outputs.strykerOptions }}" + + - name: check out benchmark + if: ${{ matrix.package.host == 'github.com' }} + uses: actions/checkout@v3 + with: + repository: ${{ format('{0}/{1}', matrix.package.owner, matrix.package.repo) }} + ref: ${{ matrix.package.sha }} + path: ${{ matrix.package.name }} + + - name: Checkout gitlab package repo + if: ${{ matrix.package.host == 'gitlab.com' }} + run: | + git clone ${{ format('https://gitlab.com/{0}/{1}', matrix.package.owner, matrix.package.repo) }} ${{ matrix.package.name }} + cd ${{ matrix.package.name }} + git checkout ${{ matrix.package.sha }} + + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 + if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} + + - name: build project + run: | + cd ${{ matrix.package.name }} + # if an edit command for editing package.json is specified (e.g., to disable linting), run it + if [ -n "$MATRIX_PACKAGE_EDITS" ]; then + ${{ matrix.package.edits }} + fi + npm install + # if a build script exists, run it + npm run build || echo "No build script found" + env: + MATRIX_PACKAGE_EDITS: ${{ matrix.package.edits }} + + - name: Check out llm-mutation-testing + uses: actions/checkout@v3 + with: + path: llm-mutation-testing + + - name: Set up llm-mutation-testing + run: | + cd llm-mutation-testing + npm run build + + - name: Generate mutants + env: + LLMORPHEUS_LLM_API_ENDPOINT: '${{ secrets.OPENROUTER_LLM_API_ENDPOINT }}' + LLMORPHEUS_LLM_AUTH_HEADERS: '${{ secrets.OPENROUTER_LLM_AUTH_HEADERS }}' + run: | + cd ${{ matrix.package.name }} + BENCHMARK_DIR=`pwd` + cd ../llm-mutation-testing + echo "Generating mutants for ${{ matrix.package.name }}" + LLMORPHEUS_OPTIONS="--temperature ${{ needs.setup.outputs.temperatures }} --maxNrPrompts ${{ needs.setup.outputs.maxNrPrompts }} --systemPrompt ${{ github.event.inputs.systemPrompt }}.txt --model ${{ github.event.inputs.model }} --benchmark ${{ github.event.inputs.benchmarkMode }} --maxTokens ${{ github.event.inputs.maxTokensInCompletion }}" + (time node --max-old-space-size=6144 benchmark/createMutants.js --path $BENCHMARK_DIR --mutate "${{ matrix.package.files }}" --ignore "${{ matrix.package.ignore }}" --template templates/${{ github.event.inputs.template }}.hb --caching false $LLMORPHEUS_OPTIONS) 2>&1 | tee -a ../${{ matrix.package.name }}/LLMorpheusOutput.txt + + - name: extract mutant-related info + run: | + cd ${{ matrix.package.name }} + mkdir ${{ matrix.package.name }} + mkdir ${{ matrix.package.name }}/${{ matrix.package.name }} + cp -r MUTATION_TESTING/*/* ${{ matrix.package.name }}/${{ matrix.package.name }} + cp LLMorpheusOutput.txt ${{ matrix.package.name }}/${{ matrix.package.name }}/LLMorpheusOutput.txt + + - name: Upload mutants info + uses: actions/upload-artifact@v4 + with: + name: mutants-${{ matrix.package.name }} + path: ${{ matrix.package.name }}/${{ matrix.package.name }} + + - name: install stryker-js into benchmark and run stryker (precomputed mutators) + run: | + cd ${{ matrix.package.name }} + npm install install-local # install-local is needed to install our custom version of stryker-js + npx install-local ../stryker-js/packages/{core,util,api,instrumenter,*-runner} --legacy-peer-deps + export MUTANTS_FILE=MUTATION_TESTING/${{ github.event.inputs.template }}_${{ github.event.inputs.model }}_${{ github.event.inputs.temperature }}/mutants.json + STRYKER_FILES=$(node ../llm-mutation-testing/.github/expandGlob.js $(pwd) "${{ matrix.package.files }}" "${{ matrix.package.ignore }}") + STRYKER_OPTIONS="${{ needs.setup.outputs.strykerOptions }}" + (time npx stryker run $STRYKER_OPTIONS --usePrecomputed --mutate $STRYKER_FILES) 2>&1 | tee -a StrykerOutput.txt + + - name: extract summary from StrykerOutput.txt + run: | + cd ${{ matrix.package.name }} + node ../llm-mutation-testing/.github/parseStrykerReport.js StrykerOutput.txt + + - name: gather reports/mutation/mutation.*, StrykerOutput.txt, and StrykerInfo.json into results.zip + run: | + cd ${{ matrix.package.name }} + mkdir results + mkdir results/results-${{ matrix.package.name }} + cp reports/mutation/mutation.* results/results-${{ matrix.package.name }}/ + cp StrykerOutput.txt results/results-${{ matrix.package.name }}/StrykerOutput.txt + cp StrykerInfo.json results/results-${{ matrix.package.name }}/StrykerInfo.json + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: results-${{ matrix.package.name }} + path: ${{ matrix.package.name }}/results + + combine_output: + name: Combine output from all benchmarks + needs: + - setup + - benchmark + runs-on: ubuntu-latest + steps: + - name: Download output zips + uses: actions/download-artifact@v4 + - name: Combine outputs + run: | + mkdir results + mkdir results/results + cd results/results + + # move all benchmark info into a single directory + for benchmark in ../../results-* + do + # extract benchmark name + name=$(echo $benchmark | sed 's/..\/..\/results-//') + mv $benchmark/results-$name $name + done + cd ../.. + + - name: Upload combined output files + uses: actions/upload-artifact@v4 + with: + name: results + path: results/results + + generate_report: + name: Generate report + needs: + - setup + - combine_output + runs-on: ubuntu-latest + + steps: + + + # download all files mutants-* + - name: Download LLMorpheus results + uses: actions/download-artifact@v4 + with: + pattern: "mutants-*" + + # download all files results-* + - name: Download Stryker results + uses: actions/download-artifact@v4 + with: + pattern: "results-*" + + # move all mutants-* directories into a single directory + - name: Gather mutants into single directory + run: | + mkdir mutants + cd mutants + for mutants in ../mutants-* + do + # remove "../mutants-" prefix, leaving only the benchmark name + name=$(echo $mutants | sed 's/\.\.\/mutants-//') + echo "moving $mutants to $name" + mv $mutants/* $name + done + cd .. + + # move all results-* directories into a single directory + - name: Gather Stryker results into single directory + run: | + mkdir results + cd results + for results in ../results-* + do + # extract benchmark name + name=$(echo $results | sed 's/\.\.\/results-//') + echo "moving $results to $name" + mv $results/* $name + done + cd .. + + - name: Check out llm-mutation-testing + uses: actions/checkout@v3 + with: + path: llm-mutation-testing + + - name: Generate report + run: | + cd llm-mutation-testing + npm run build + # if stryker options are specified, include them in the title + STRYKER_OPTIONS="${{ needs.setup.outputs.strykerOptions }}" + if [ -n "$STRYKER_OPTIONS" ]; then + title="Report (Precomputed mutators $STRYKER_OPTIONS)" + else + title="Report (Precomputed mutators)" + fi + node .github/generateReport.js "$title" ../results ../mutants > report.md + more report.md > $GITHUB_STEP_SUMMARY + + - name: Upload report + uses: actions/upload-artifact@v4 + with: + name: report.md + path: llm-mutation-testing/report.md + + - name: generate Latex table + run: | + cd llm-mutation-testing + node .github/generateLatexTable.js "$title" ../results ../mutants > table.tex + + - name: Upload table + uses: actions/upload-artifact@v4 + with: + name: table.tex + path: llm-mutation-testing/table.tex + + + + + \ No newline at end of file