From 21af64b75fd7c311f839da6f06ae14bf30dc4c49 Mon Sep 17 00:00:00 2001 From: Adam Farley Date: Wed, 29 Nov 2023 19:28:48 +0000 Subject: [PATCH] Creating an automated build triage script (#3549) This will create a weekly issue that contains a list of the latest failing Temurin builds at Adoptium. Signed-off-by: Adam Farley --- .github/workflows/build-autotriage.yml | 32 ++ .../build_autotriage/autotriage_regexes.sh | 29 ++ tooling/build_autotriage/build_autotriage.sh | 279 ++++++++++++++++++ 3 files changed, 340 insertions(+) create mode 100644 .github/workflows/build-autotriage.yml create mode 100644 tooling/build_autotriage/autotriage_regexes.sh create mode 100644 tooling/build_autotriage/build_autotriage.sh diff --git a/.github/workflows/build-autotriage.yml b/.github/workflows/build-autotriage.yml new file mode 100644 index 000000000..e010b3391 --- /dev/null +++ b/.github/workflows/build-autotriage.yml @@ -0,0 +1,32 @@ +--- +# Runs a script to triage the latest timer-initiated Temurin build pipelines. + +name: "Build Autotriage" + +on: + schedule: + - cron: '0 0 * * MON' + push: + paths: + - '**build-autotriage.yml' + - '**build_autotriage.sh' + - '**autotriage_regexes.sh' + +env: + TRIAGE_SCRIPT: "${PWD}/tooling/build_autotriage/build_autotriage.sh" + +jobs: + Label: + runs-on: ubuntu-latest + name: Run Build Triage + steps: + - uses: actions/checkout@v3 + - name: "Run Build Auto Triage" + run: bash "${TRIAGE_SCRIPT}" jdk8u jdk11u jdk17u jdk21u jdk22head + + - name: Create Issue From File + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + uses: JasonEtco/create-an-issue@v2 + with: + filename: ./build_triage_output.md \ No newline at end of file diff --git a/tooling/build_autotriage/autotriage_regexes.sh b/tooling/build_autotriage/autotriage_regexes.sh new file mode 100644 index 000000000..9a2a5999c --- /dev/null +++ b/tooling/build_autotriage/autotriage_regexes.sh @@ -0,0 +1,29 @@ +#!/bin/bash +################################################################################ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +################################################################################ +# +# This script produces arrays of regular expressions that match a type of failure +# that can be searched for in the output of an Eclipse Temurin build of OpenJDK. +# +# Each regular expression comes paired with metadata (providing useful information) +# +################################################################################ + +# declare -a arrayOfRegexes +# declare -a arrayOfRegexMetadata +# declare -a arrayOfRegexPreventability + +# TODO. \ No newline at end of file diff --git a/tooling/build_autotriage/build_autotriage.sh b/tooling/build_autotriage/build_autotriage.sh new file mode 100644 index 000000000..8ab8d2e74 --- /dev/null +++ b/tooling/build_autotriage/build_autotriage.sh @@ -0,0 +1,279 @@ +#!/bin/bash +# shellcheck disable=SC1091 +################################################################################ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +################################################################################ +# +# This script turns a list of Temurin build jobs into two things: +# 1. A markdown summary table that gives pass and fail numbers. +# 2. A list of each failing job/subjob link, plus information that can +# help identify the specific issue causing the failure. +# +################################################################################ + +declare -a arrayOfFailedJobs +# declare -a arrayOfFailedJobRegexs +declare -a arrayOfAllJDKVersions +declare -a arrayOfUs +declare -a buildIssues + +headJDKVersion=9999 + +# Imports arrayOfRegexes, arrayOfRegexMetadata, and arrayOfRegexPreventability +. ./tooling/build_autotriage/autotriage_regexes.sh + +# All temurin-available platforms. +declare -a temurinPlatforms +# The first jdk major version on that platform. +declare -a platformStart +# The last jdk major version on that platform ("99" for ongoing). +declare -a platformEnd + +temurinPlatforms+=("aix-ppc64"); platformStart+=(8); platformEnd+=(99) +temurinPlatforms+=("alpine-linux-aarch64"); platformStart+=(21); platformEnd+=(99) +temurinPlatforms+=("alpine-linux-x64"); platformStart+=(8); platformEnd+=(99) +temurinPlatforms+=("linux-aarch64"); platformStart+=(8); platformEnd+=(99) +temurinPlatforms+=("linux-arm"); platformStart+=(8); platformEnd+=(99) +temurinPlatforms+=("linux-ppc64le"); platformStart+=(8); platformEnd+=(99) +temurinPlatforms+=("linux-s390x"); platformStart+=(11); platformEnd+=(99) +temurinPlatforms+=("linux-x64"); platformStart+=(8); platformEnd+=(99) +temurinPlatforms+=("mac-aarch64"); platformStart+=(11); platformEnd+=(99) +temurinPlatforms+=("mac-x64"); platformStart+=(8); platformEnd+=(99) +temurinPlatforms+=("solaris-sparcv9"); platformStart+=(8); platformEnd+=(8) +temurinPlatforms+=("solaris-x64"); platformStart+=(8); platformEnd+=(8) +temurinPlatforms+=("windows-x64"); platformStart+=(8); platformEnd+=(99) +temurinPlatforms+=("windows-x86-32"); platformStart+=(8); platformEnd+=(17) + +errorLog() { + buildIssues+=("$1") + echo "ERROR FOUND: Issue ${#buildIssues[@]}: $1" +} + +# Parses the arguments to this script. +argumentParser() { + if [ "$#" -lt 1 ]; then + echo "Illegal argument/s. You must specify at least one jdk version." + exit 1 + fi + + while [[ $# -gt 0 ]] + do + if [[ ! $1 =~ ^jdk[0-9]+[u]?$ ]]; then + if [[ ! $1 =~ ^jdk[0-9]+head?$ ]]; then + echo "Script has failed. Reason: Illegal argument/s (name)." + echo "Correct argument format is: jdk#[u|head]" + exit 1 + fi + fi + + if [[ $1 =~ ^jdk[0-9]+$ ]]; then + arrayOfAllJDKVersions+=("${1:3}") + arrayOfUs+=("") + elif [[ $1 =~ ^jdk[0-9]+head$ ]]; then + arrayOfAllJDKVersions+=("${1:3:-4}") + arrayOfUs+=("") + headJDKVersion="${1:3:-4}" + else + arrayOfAllJDKVersions+=("${1:3:-1}") + arrayOfUs+=("u") + fi + + echo "JDK version identified: ${1:3}" + shift + done +} + +# Iterates over the supplied JDK versions and identifies the latest timer-triggered build URLs for each version. +# This function then checks that we're building Eclipse Temurin on every platform we should be, and makes a list +# of all the failing builds. +identifyFailedBuildsInTimerPipelines() { + # Iterate over jdk versions. + echo "Iterating over jdk versions." + for v in "${!arrayOfAllJDKVersions[@]}" + do + # First we find the latest timer-initiated pipeline for this JDK version. + echo "wgetting https://trss.adoptium.net/api/getBuildHistory?buildName=openjdk${arrayOfAllJDKVersions[v]}-pipeline" + latestTimerPipelineRaw=$(wget -q -O - "https://trss.adoptium.net/api/getBuildHistory?buildName=openjdk${arrayOfAllJDKVersions[v]}-pipeline") + latestTimerPipelineRaw="${latestTimerPipelineRaw},HereIsTheEndOfAVeryLongFile" + latestTimerPipeline="" + latestTimerJenkinsJobID="" + oldIFS=$IFS + IFS="," + for jsonEntry in $latestTimerPipelineRaw + do + if [[ $jsonEntry =~ ^\[\{\"_id\".* ]]; then + latestTimerPipeline=${jsonEntry:9:-1} + elif [[ $jsonEntry =~ ^\{\"_id\".* ]]; then + latestTimerPipeline=${jsonEntry:8:-1} + fi + + if [[ $jsonEntry =~ ^\"buildNum\"\:[0-9]+$ ]]; then + latestTimerJenkinsJobID=${jsonEntry:11} + fi + + if [[ ! $jsonEntry =~ .*user.* ]]; then + if [[ $jsonEntry =~ ^\"startBy\"\:\"timer\"[\}]?$ ]]; then + break + elif [[ $jsonEntry =~ ^\"startBy\"\:\".*build-scripts/weekly-openjdk.* ]]; then + break + elif [[ $jsonEntry =~ ^\"startBy\"\:\".*releaseTrigger_[0-9]+ea.* ]]; then + break + fi + fi + + if [[ $jsonEntry =~ ^HereIsTheEndOfAVeryLongFile$ ]]; then + errorLog "Could not find any timer/ea-tag triggered pipeline jobs for jdk${arrayOfAllJDKVersions[v]}${arrayOfUs[v]}. Skipping to the next jdk version." + continue 2 + fi + done + + echo "Found TRSS pipeline id for jdk${arrayOfAllJDKVersions[v]}${arrayOfUs[v]} - ${latestTimerPipeline}" + echo "Whose URL is: https://ci.adoptium.net/job/build-scripts/job/openjdk${arrayOfAllJDKVersions[v]}-pipeline/${latestTimerJenkinsJobID}/" + + # Now grab a full list of builds launched by this pipeline. + jdkJenkinsJobVersion="jdk${arrayOfAllJDKVersions[v]}${arrayOfUs[v]}" + if [[ ${arrayOfAllJDKVersions[v]} -eq headJDKVersion ]]; then + jdkJenkinsJobVersion="jdk" + fi + echo "wgetting https://trss.adoptium.net/api/getAllChildBuilds?parentId=${latestTimerPipeline}&buildNameRegex=^jdk${arrayOfAllJDKVersions[v]}${arrayOfUs[v]}.*temurin$" + listOfPipelineBuilds=$(wget -q -O - "https://trss.adoptium.net/api/getAllChildBuilds?parentId=${latestTimerPipeline}&buildNameRegex=^${jdkJenkinsJobVersion}\-.*temurin$") + declare -a listOfBuildNames + declare -a listOfBuildNums + declare -a listOfBuildResults + + shorterListOfBuilds="" + for jsonEntry in $listOfPipelineBuilds + do + if [[ $jsonEntry =~ ^\"buildName\"\:.* ]]; then + listOfBuildNames+=("${jsonEntry:13:-1}") + shorterListOfBuilds+="${jsonEntry}," + elif [[ $jsonEntry =~ .*\"buildNum\"\.* ]]; then + listOfBuildNums+=("${jsonEntry:11}") + elif [[ $jsonEntry =~ .*\"buildResult\".* ]]; then + listOfBuildResults+=("${jsonEntry:15:-1}") + continue + fi + done + + echo "That pipeline's builds have been identified. Now validating them." + + IFS=$oldIFS + + # Now iterate over platforms to make sure we're launching every platform we should. + triageThesePlatforms="," + for p in "${!temurinPlatforms[@]}" + do + if [[ $shorterListOfBuilds =~ .*\"buildName\"\:\"${jdkJenkinsJobVersion}\-${temurinPlatforms[p]}\-temurin\".* ]]; then + if [[ ${arrayOfAllJDKVersions[v]} -lt ${platformStart[p]} ]]; then + errorLog "Error: Platform ${temurinPlatforms[p]} should not be built for ${jdkJenkinsJobVersion}. Will not triage." + continue + fi + if [[ ${arrayOfAllJDKVersions[v]} -gt ${platformEnd[p]} ]]; then + errorLog "Error: Platform ${temurinPlatforms[p]} should not be built for ${jdkJenkinsJobVersion}. Will not triage." + continue + fi + else + if [[ ${arrayOfAllJDKVersions[v]} -ge ${platformStart[p]} ]]; then + if [[ ${arrayOfAllJDKVersions[v]} -le ${platformEnd[p]} ]]; then + errorLog "Error: Platform ${temurinPlatforms[p]} should be built for ${jdkJenkinsJobVersion}, but was not launched." + echo "DEBUG: Looked for this: \"buildName\":\"${jdkJenkinsJobVersion}-${temurinPlatforms[p]}-temurin\"" + echo "DEBUG: In this: $shorterListOfBuilds" + echo "------" + continue + fi + fi + fi + # If we get to this stage of the loop, then this is a platform that was both *meant* to be built, and *was* built (or attempted). + triageThesePlatforms+="${jdkJenkinsJobVersion}-${temurinPlatforms[p]}-temurin," + done + + if [[ ${#triageThesePlatforms[@]} -gt 1 ]]; then + errorLog "Cannot find any valid build platforms launched by jdk ${arrayOfAllJDKVersions[v]}${arrayOfUs[v]} pipeline ${latestTimerJenkinsJobID}. Skipping to the next jdk version." + continue + fi + echo "Platforms validated. Identifying build numbers for these platforms: ${triageThesePlatforms:1:-1}" + + for b in "${!listOfBuildNames[@]}" + do + if [[ $triageThesePlatforms =~ .*,${listOfBuildNames[$b]},.* ]]; then + if [[ ! ${listOfBuildResults[$b]} =~ ^SUCCESS$ ]]; then + if [[ ! ${listOfBuildResults[$b]} =~ ^UNSTABLE$ ]]; then + jdkJenkinsJobVersion="jdk${arrayOfAllJDKVersions[v]}${arrayOfUs[v]}" + if [[ ${arrayOfAllJDKVersions[v]} -eq headJDKVersion ]]; then + jdkJenkinsJobVersion="jdk" + fi + failedJobLink="https://ci.adoptium.net/job/build-scripts/job/jobs/job/${jdkJenkinsJobVersion}/job/${listOfBuildNames[$b]}/${listOfBuildNums[$b]}/" + echo "Identified a failed build for triage: ${failedJobLink}" + arrayOfFailedJobs+=("${failedJobLink}") + fi + fi + fi + done + echo "Build numbers found, and failures will be added to the array of builds to be triaged." + done +} + +# Takes a single failed jenkins build job URL as a string, and identifies the source of +# the failure if possible. +buildFailureTriager() { + echo "Attempting to triage a job: ${1}" + echo "- Failed job: ${1}" >> build_triage_output.md + # Todo: Iterate over the failures found and triage them against the pending array of regexes. + # For now we'll put them in a tidy md-style file for issue inclusion. + +} + +startOutputFile() { + { echo "---"; + echo "name: Build Issue Summary"; + echo "about: For triaging the nightly and weekend build failures"; + echo "title: Build Issue Summary for {{ date | date('YYYY-MM-DD') }}"; + echo "labels: 'weekly-build-triage'"; + echo "---"; + echo ""; + } >> build_triage_output.md +} + +# @@@@@@@@@@@@ Script execution starts here @@@@@@@@@@@@ + +echo "Build AutoTriage is starting." + +argumentParser "$@" + +identifyFailedBuildsInTimerPipelines + +startOutputFile + +if [[ ${#arrayOfFailedJobs[@]} -gt 0 ]]; then + echo "# Failed Builds" >> build_triage_output.md + for failedJob in "${arrayOfFailedJobs[@]}" + do + buildFailureTriager "$failedJob" + done + echo "# End of list" >> build_triage_output.md +else + echo "All build jobs passed. Huzzah!" +fi + +if [[ ${#buildIssues[@]} -gt 0 ]]; then + echo "# Script Issues" >> build_triage_output.md + for issueID in "${!buildIssues[@]}" + do + echo "- Issue ${issueID}: ${buildIssues[issueID]}\n" >> build_triage_output.md + done + echo "# End of Issues" >> build_triage_output.md +fi + +echo "Build AutoTriage is complete."