Skip to content

Pulsar CI Flaky

Pulsar CI Flaky #459

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
name: Pulsar CI Flaky
on:
pull_request:
branches:
- master
schedule:
# scheduled job with JDK 17
- cron: '0 12 * * *'
# scheduled job with JDK 21
# if cron expression is changed, make sure to update the expression in jdk_major_version step in preconditions job
- cron: '0 6 * * *'
workflow_dispatch:
inputs:
collect_coverage:
description: 'Collect test coverage and upload to Codecov'
required: true
type: boolean
default: true
jdk_major_version:
description: 'JDK major version to use for the build'
required: true
type: choice
options:
- '17'
- '21'
default: '17'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}${{ github.event_name == 'workflow_dispatch' && github.event.inputs.jdk_major_version || '' }}
cancel-in-progress: true
env:
MAVEN_OPTS: -Xss1500k -Xmx1500m -Daether.connector.http.reuseConnections=false -Daether.connector.requestTimeout=60000 -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3 -Dmaven.wagon.http.retryHandler.requestSentEnabled=true -Dmaven.wagon.http.serviceUnavailableRetryStrategy.class=standard -Dmaven.wagon.rto=60000
# defines the retention period for the intermediate build artifacts needed for rerunning a failed build job
# it's possible to rerun individual failed jobs when the build artifacts are available
# if the artifacts have already been expired, the complete workflow can be rerun by closing and reopening the PR or by rebasing the PR
ARTIFACT_RETENTION_DAYS: 3
jobs:
preconditions:
name: Preconditions
runs-on: ubuntu-22.04
outputs:
docs_only: ${{ steps.check_changes.outputs.docs_only }}
changed_tests: ${{ steps.changes.outputs.tests_files }}
need_owasp: ${{ steps.changes.outputs.need_owasp }}
collect_coverage: ${{ steps.check_coverage.outputs.collect_coverage }}
jdk_major_version: ${{ steps.jdk_major_version.outputs.jdk_major_version }}
steps:
- name: Cancel scheduled jobs in forks by default
if: ${{ github.repository != 'apache/pulsar' && github.event_name == 'schedule' }}
uses: actions/github-script@v6
with:
script: |
await github.rest.actions.cancelWorkflowRun({owner: context.repo.owner, repo: context.repo.repo, run_id: context.runId});
process.exit(1);
- name: Select JDK major version
id: jdk_major_version
run: |
# use JDK 21 for the scheduled build with cron expression '0 6 * * *'
if [[ "${{ github.event_name == 'schedule' && github.event.schedule == '0 6 * * *' && 'true' || 'false' }}" == "true" ]]; then
echo "jdk_major_version=21" >> $GITHUB_OUTPUT
exit 0
fi
# use JDK 17 for build unless overridden with workflow_dispatch input
echo "jdk_major_version=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.jdk_major_version || '17'}}" >> $GITHUB_OUTPUT
- name: checkout
if: ${{ github.event_name == 'pull_request' }}
uses: actions/checkout@v3
- name: Detect changed files
if: ${{ github.event_name == 'pull_request' }}
id: changes
uses: apache/pulsar-test-infra/paths-filter@master
with:
filters: .github/changes-filter.yaml
list-files: csv
- name: Check changed files
if: ${{ github.event_name == 'pull_request' }}
id: check_changes
run: |
if [[ "${GITHUB_EVENT_NAME}" != "schedule" && "${GITHUB_EVENT_NAME}" != "workflow_dispatch" ]]; then
echo "docs_only=${{ fromJSON(steps.changes.outputs.all_count) == fromJSON(steps.changes.outputs.docs_count) && fromJSON(steps.changes.outputs.docs_count) > 0 }}" >> $GITHUB_OUTPUT
else
echo docs_only=false >> $GITHUB_OUTPUT
fi
- name: Check if coverage should be collected
id: check_coverage
run: |
echo "collect_coverage=${{
(steps.check_changes.outputs.docs_only != 'true' && github.event_name != 'workflow_dispatch'
&& (github.base_ref == 'master' || github.ref_name == 'master'))
|| (github.event_name == 'workflow_dispatch' && github.event.inputs.collect_coverage == 'true')
}}" >> $GITHUB_OUTPUT
- name: Check if the PR has been approved for testing
if: ${{ steps.check_changes.outputs.docs_only != 'true' && github.repository == 'apache/pulsar' && github.event_name == 'pull_request' }}
env:
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
GITHUB_TOKEN: ${{ github.token }}
run: |
build/pulsar_ci_tool.sh check_ready_to_test
build-and-test:
needs: preconditions
name: Flaky tests suite
env:
JOB_NAME: Flaky tests suite
COLLECT_COVERAGE: "${{ needs.preconditions.outputs.collect_coverage }}"
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
CI_JDK_MAJOR_VERSION: ${{ needs.preconditions.outputs.jdk_major_version }}
runs-on: ubuntu-22.04
timeout-minutes: 100
if: ${{ needs.preconditions.outputs.docs_only != 'true' }}
steps:
- name: checkout
uses: actions/checkout@v3
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Setup ssh access to build runner VM
# ssh access is enabled for builds in own forks
if: ${{ github.repository != 'apache/pulsar' && github.event_name == 'pull_request' }}
uses: ./.github/actions/ssh-access
continue-on-error: true
with:
limit-access-to-actor: true
- name: Cache local Maven repository
uses: actions/cache@v3
timeout-minutes: 5
with:
path: |
~/.m2/repository/*/*/*
!~/.m2/repository/org/apache/pulsar
key: ${{ runner.os }}-m2-dependencies-core-modules-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-m2-dependencies-core-modules-
- name: Set up JDK ${{ env.CI_JDK_MAJOR_VERSION }}
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: ${{ env.CI_JDK_MAJOR_VERSION }}
- name: Build core-modules
run: |
mvn -B -T 1C -ntp -Pcore-modules,-main clean install -DskipTests -Dlicense.skip=true -Drat.skip=true
- name: Run unit test group BROKER_FLAKY
run: |
CHANGED_TESTS="${{ needs.preconditions.outputs.tests_files }}" ./build/run_unit_group.sh BROKER_FLAKY
- name: print JVM thread dumps when cancelled
if: cancelled()
run: $GITHUB_WORKSPACE/build/pulsar_ci_tool.sh print_thread_dumps
- name: Aggregates all test reports to ./test-reports and ./surefire-reports directories
if: ${{ always() }}
uses: ./.github/actions/copy-test-reports
- name: Create Jacoco reports
if: ${{ needs.preconditions.outputs.collect_coverage == 'true' }}
run: |
$GITHUB_WORKSPACE/build/pulsar_ci_tool.sh create_test_coverage_report
cd $GITHUB_WORKSPACE/target
zip -qr jacoco_test_coverage_report_flaky.zip jacoco_test_coverage_report || true
- name: Upload Jacoco report files to build artifacts
if: ${{ needs.preconditions.outputs.collect_coverage == 'true' }}
uses: actions/upload-artifact@v3
with:
name: Jacoco-coverage-report-flaky
path: target/jacoco_test_coverage_report_flaky.zip
retention-days: 3
- name: Upload to Codecov
if: ${{ needs.preconditions.outputs.collect_coverage == 'true' }}
uses: ./.github/actions/upload-coverage
with:
flags: unittests
- name: Publish Test Report
uses: apache/pulsar-test-infra/action-junit-report@master
if: ${{ always() }}
with:
report_paths: 'test-reports/TEST-*.xml'
annotate_only: 'true'
- name: Upload Surefire reports
uses: actions/upload-artifact@v3
if: ${{ !success() }}
with:
name: Unit-BROKER_FLAKY-surefire-reports
path: surefire-reports
retention-days: 7
- name: Upload possible heap dump
uses: actions/upload-artifact@v3
if: ${{ always() }}
with:
name: Unit-BROKER_FLAKY-heapdump
path: /tmp/*.hprof
retention-days: 7
if-no-files-found: ignore