Skip to content

Performance Regression Test - Score Director #42

Performance Regression Test - Score Director

Performance Regression Test - Score Director #42

# - Runs entirely on a single machine.
# - The baseline is established first, then the branch under test is measured.
# - Each benchmark gives a 99.9 % confidence interval.
# - The confidence intervals are compared to determine if the branch under test is a regression or an improvement.
# - The error threshold is expected to be below +/- 2.5 %,
# but sometimes it gets higher due to the nature of public GitHub runners.
# We have yet to see an error of over +/- 4 %.
# With the error so high, the impact is that small regressions are not considered statistically significant.
name: Performance Regression Test - Score Director
on:
workflow_dispatch:
inputs:
jdk:
description: 'JDK version'
default: '21'
required: true
baseline:
description: 'Timefold Solver release'
default: '1.14.0'
required: true
branch:
description: 'Branch to benchmark (needs to use 999-SNAPSHOT)'
default: 'main'
required: true
branch_owner:
description: 'User owning the branch'
default: 'TimefoldAI'
required: true
async_profiler_version:
description: 'async-profiler version'
default: '3.0'
required: true
jobs:
benchmark:
runs-on: ubuntu-latest
strategy:
fail-fast: false # Jobs fail if the benchmark error is over predefined thresholds; other benchmarks continue.
matrix:
example: [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp, vehicle_routing]
env:
MVN_USERNAME: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}'
MVN_PASSWORD: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}'
steps:
- name: Phase 0 - Checkout timefold-solver-benchmarks
uses: actions/checkout@v4
with:
repository: TimefoldAI/timefold-solver-benchmarks
path: ./timefold-solver-benchmarks
- name: Phase 0 - Setup JDK and Maven
uses: actions/setup-java@v4
with:
java-version: ${{ github.event.inputs.jdk }}
distribution: 'temurin'
cache: 'maven'
server-id: 'timefold-solver-enterprise'
server-username: 'MVN_USERNAME'
server-password: 'MVN_PASSWORD'
- name: Phase 0 - Setup Async Profiler
working-directory: ./timefold-solver-benchmarks
run: |
export FILENAME=async-profiler-${{ github.event.inputs.async_profiler_version }}-linux-x64.tar.gz
wget https://github.com/async-profiler/async-profiler/releases/download/v${{ github.event.inputs.async_profiler_version }}/$FILENAME
tar -xzf $FILENAME
ls -l
# Fine-tuned for stability on GHA.
- name: Phase 0 - Configure the benchmark
working-directory: ./timefold-solver-benchmarks
shell: bash
run: |
echo "forks=2" > scoredirector-benchmark.properties
echo "warmup_iterations=1" >> scoredirector-benchmark.properties
echo "measurement_iterations=1" >> scoredirector-benchmark.properties
echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties
echo "score_director_type=cs" >> scoredirector-benchmark.properties
echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties
cat scoredirector-benchmark.properties
chmod +x run-scoredirector.sh
- name: Phase 1 - Compile the benchmark
working-directory: ./timefold-solver-benchmarks
shell: bash
run: mvn clean install -B -Dquickly -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}"
- name: Phase 1 - Run the baseline configuration
working-directory: ./timefold-solver-benchmarks
id: benchmark_baseline
env:
RUN_ID: ${{ github.event.inputs.baseline }}
shell: bash
run: |
./run-scoredirector.sh
echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT"
echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT"
- name: Phase 2 - Checkout timefold-solver
uses: actions/checkout@v4
with:
repository: ${{ github.event.inputs.branch_owner }}/timefold-solver
ref: ${{ github.event.inputs.branch }}
path: ./timefold-solver
- name: Phase 2 - Quickly build timefold-solver
working-directory: ./timefold-solver
shell: bash
run: mvn -B -Dquickly clean install
# Clone timefold-solver-enterprise
- name: Phase 2 - Checkout timefold-solver-enterprise (PR) # Checkout the PR branch first, if it exists
id: checkout-solver-enterprise
uses: actions/checkout@v4
continue-on-error: true
with:
repository: TimefoldAI/timefold-solver-enterprise
ref: ${{ github.event.inputs.branch }}
token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }}
path: ./timefold-solver-enterprise
- name: Phase 2 - Checkout timefold-solver-enterprise (main) # Checkout the main branch if the PR branch does not exist
if: steps.checkout-solver-enterprise.outcome != 'success'
uses: actions/checkout@v4
with:
repository: TimefoldAI/timefold-solver-enterprise
ref: main
token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }}
path: ./timefold-solver-enterprise
- name: Phase 2 - Quickly build timefold-solver-enterprise
working-directory: ./timefold-solver-enterprise
shell: bash
run: mvn -B -Dquickly clean install
- name: Phase 2 - Compile the benchmarks
working-directory: ./timefold-solver-benchmarks
shell: bash
run: mvn clean install -B -Dquickly -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}"
- name: Phase 2 - Run the benchmark on the new code
id: benchmark_new
working-directory: ./timefold-solver-benchmarks
env:
RUN_ID: ${{ github.event.inputs.branch }}
shell: bash
run: |
./run-scoredirector.sh
echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT"
echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT"
- name: Phase 3 - Archive benchmark data
uses: actions/upload-artifact@v4
with:
name: results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }}
path: |
./timefold-solver-benchmarks/scoredirector-benchmark.properties
./timefold-solver-benchmarks/results/scoredirector
- name: Phase 3 - Report results
working-directory: ./timefold-solver-benchmarks
env:
OLD_RANGE_START: ${{ steps.benchmark_baseline.outputs.RANGE_START }}
OLD_RANGE_END: ${{ steps.benchmark_baseline.outputs.RANGE_END }}
NEW_RANGE_START: ${{ steps.benchmark_new.outputs.RANGE_START }}
NEW_RANGE_END: ${{ steps.benchmark_new.outputs.RANGE_END }}
shell: bash
run: |
export FAIL=false
if [ "$NEW_RANGE_START" -le "$OLD_RANGE_END" ] && [ "$NEW_RANGE_END" -ge "$OLD_RANGE_START" ]; then
export OLD_MEAN=$(((OLD_RANGE_END - OLD_RANGE_START)/2)+OLD_RANGE_START)
export NEW_MEAN=$(((NEW_RANGE_END - NEW_RANGE_START)/2)+NEW_RANGE_START)
if [ "$NEW_RANGE_START" -ge "$OLD_MEAN" ]; then
echo "### Possible improvement ⁉️" >> $GITHUB_STEP_SUMMARY
elif [ "$OLD_RANGE_END" -le "$NEW_MEAN" ]; then
echo "### Possible regression ⁉️" >> $GITHUB_STEP_SUMMARY
else
echo "### Statistically insignificant result ⁉️" >> $GITHUB_STEP_SUMMARY
fi
elif [ "$NEW_RANGE_START" -gt "$OLD_RANGE_END" ]; then
echo "### Statistically significant improvement 🚀" >> $GITHUB_STEP_SUMMARY
else
echo "### Statistically significant regression 🛑" >> $GITHUB_STEP_SUMMARY
export FAIL=true
fi
echo "| | **Ref** | **Min** | **Max** |" >> $GITHUB_STEP_SUMMARY
echo "|:-----:|:-----------:|:-----------:|:-----------:|" >> $GITHUB_STEP_SUMMARY
echo "| _Old_ | `${{ github.event.inputs.baseline }}` | ${OLD_RANGE_START%.*} | ${OLD_RANGE_END%.*} |" >> $GITHUB_STEP_SUMMARY
echo "| _New_ | `${{ github.event.inputs.branch }}` | ${NEW_RANGE_START%.*} | ${NEW_RANGE_END%.*} |" >> $GITHUB_STEP_SUMMARY
if [ "$FAIL" = true ]; then
exit 1
fi