tpcds #1022
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: tpcds | |
on: | |
push: | |
branches: | |
- 'main' | |
paths: | |
- '**/tpcds.yml' | |
pull_request: | |
branches: | |
- 'main' | |
paths: | |
- '**/tpcds.yml' | |
schedule: | |
- cron: '30 20 * * *' | |
workflow_dispatch: | |
inputs: | |
debug: | |
type: boolean | |
description: "Run the build with tmate debugging enabled" | |
required: false | |
default: false | |
jobs: | |
tpcds: | |
runs-on: ubuntu-20.04 | |
services: | |
redis: | |
image: redis | |
options: >- | |
--health-cmd "redis-cli ping" | |
--health-interval 10s | |
--health-timeout 5s | |
--health-retries 5 | |
ports: | |
- 6379:6379 | |
steps: | |
- run: sudo swapoff -a | |
- name: Set up Java | |
uses: actions/setup-java@v3 | |
with: | |
distribution: 'temurin' | |
java-version: '8' | |
- name: Checkout | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- name: Build | |
timeout-minutes: 10 | |
uses: ./.github/actions/build | |
- name: Cache local Maven repository | |
uses: actions/cache@v3 | |
with: | |
path: ~/.m2/repository | |
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} | |
restore-keys: | | |
${{ runner.os }}-maven- | |
- name: Build Hadoop SDK | |
run: make -C sdk/java | |
- name: Juicefs Format | |
run: | | |
sudo ./juicefs format redis://127.0.0.1:6379/1 dev --trash-days 0 --bucket /tmp/jfs | |
sudo chmod -R 777 /tmp/jfs/dev/ | |
- name: Set up Spark | |
working-directory: /tmp | |
run: | | |
curl https://dlcdn.apache.org/spark/ > a.html | |
spark_version=$(grep -oP '(?<=href=")spark-[^/]+(?=/")' a.html | grep -v preview | tail -1) | |
echo "spark_version is $spark_version" | |
wget -q https://dlcdn.apache.org/spark/$spark_version/$spark_version-bin-hadoop3.tgz | |
tar -zxf $spark_version-bin-hadoop3.tgz | |
ln -s $spark_version-bin-hadoop3 spark | |
cp ~/work/juicefs/juicefs/sdk/java/target/juicefs-hadoop*jar /tmp/spark/jars | |
cp ~/work/juicefs/juicefs/.github/workflows/resources/core-site.xml /tmp/spark/conf | |
export PATH=$PATH:/tmp/spark/bin:/tmp/spark/sbin | |
echo /tmp/spark/bin >> $GITHUB_PATH | |
echo /tmp/spark/sbin >> $GITHUB_PATH | |
start-master.sh -i localhost | |
start-slave.sh spark://localhost:7077 | |
- name: Set up tpcds-kit | |
working-directory: /tmp | |
run: | | |
echo workspace is ${{ github.workspace }} | |
sudo ${{ github.workspace }}/.github/scripts/apt_install.sh gcc make flex bison byacc git | |
git clone --depth 1 https://github.com/databricks/tpcds-kit.git | |
cd tpcds-kit/tools | |
make OS=LINUX | |
- name: Set up spark-sql-perf | |
working-directory: /tmp | |
run: | | |
sudo ${{ github.workspace }}/.github/scripts/apt_install.sh apt-transport-https curl gnupg sbt | |
echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list | |
echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | sudo tee /etc/apt/sources.list.d/sbt_old.list | |
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo -H gpg --no-default-keyring --keyring gnupg-ring:/etc/apt/trusted.gpg.d/scalasbt-release.gpg --import | |
sudo chmod 644 /etc/apt/trusted.gpg.d/scalasbt-release.gpg | |
mkdir ~/.sbt | |
cat > ~/.sbt/repositories <<EOF | |
[repositories] | |
local | |
maven-central: https://repo1.maven.org/maven2/ | |
typesafe-releases: https://repo.typesafe.com/typesafe/ivy-releases/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext] | |
EOF | |
git clone --depth 1 https://github.com/databricks/spark-sql-perf.git | |
cd spark-sql-perf | |
sbt package | |
- name: Gen data | |
timeout-minutes: 35 | |
working-directory: /tmp | |
run: | | |
spark-shell \ | |
--jars /tmp/spark-sql-perf/target/scala-2.12/spark-sql-perf*.jar \ | |
--master spark://localhost:7077 \ | |
--deploy-mode client \ | |
--executor-memory 2G \ | |
--driver-memory 2G \ | |
--executor-cores 1 \ | |
--conf spark.sql.shuffle.partitions=10 \ | |
-i ~/work/juicefs/juicefs/.github/workflows/resources/tpcds_datagen.scala \ | |
|| spark-shell \ | |
--jars /tmp/spark-sql-perf/target/scala-2.12/spark-sql-perf*.jar \ | |
--master spark://localhost:7077 \ | |
--deploy-mode client \ | |
--executor-memory 2G \ | |
--driver-memory 2G \ | |
--executor-cores 1 \ | |
--conf spark.sql.shuffle.partitions=10 \ | |
-i ~/work/juicefs/juicefs/.github/workflows/resources/tpcds_datagen.scala | |
- name: Run tpcds | |
timeout-minutes: 30 | |
working-directory: /tmp | |
run: | | |
spark-shell \ | |
--jars /tmp/spark-sql-perf/target/scala-2.12/spark-sql-perf*.jar \ | |
--master spark://localhost:7077 \ | |
--deploy-mode client \ | |
--executor-memory 2G \ | |
--driver-memory 2G \ | |
--executor-cores 1 \ | |
--conf spark.sql.shuffle.partitions=10 \ | |
-i ~/work/juicefs/juicefs/.github/workflows/resources/tpcds_run.scala \ | |
|| spark-shell \ | |
--jars /tmp/spark-sql-perf/target/scala-2.12/spark-sql-perf*.jar \ | |
--master spark://localhost:7077 \ | |
--deploy-mode client \ | |
--executor-memory 2G \ | |
--driver-memory 2G \ | |
--executor-cores 1 \ | |
--conf spark.sql.shuffle.partitions=10 \ | |
-i ~/work/juicefs/juicefs/.github/workflows/resources/tpcds_run.scala | |
- name: Log | |
if: always() | |
run: | | |
if [ -f /var/log/juicefs.log ]; then | |
echo "juicefs log" | |
sudo tail -n 1000 /var/log/juicefs.log | |
grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true | |
fi | |
- name: Send Slack Notification | |
if: failure() | |
uses: juicedata/slack-notify-action@main | |
with: | |
channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}" | |
slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
- name: Setup upterm session | |
if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1) | |
timeout-minutes: 60 | |
uses: lhotari/action-upterm@v1 |