-
Notifications
You must be signed in to change notification settings - Fork 988
183 lines (165 loc) · 6.41 KB
/
tpcds.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
name: tpcds
on:
push:
branches:
- 'main'
paths:
- '**/tpcds.yml'
pull_request:
branches:
- 'main'
paths:
- '**/tpcds.yml'
schedule:
- cron: '30 20 * * *'
workflow_dispatch:
inputs:
debug:
type: boolean
description: "Run the build with tmate debugging enabled"
required: false
default: false
jobs:
tpcds:
runs-on: ubuntu-20.04
services:
redis:
image: redis
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379
steps:
- run: sudo swapoff -a
- name: Set up Java
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: '8'
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 1
- name: Build
timeout-minutes: 10
uses: ./.github/actions/build
- name: Cache local Maven repository
uses: actions/cache@v3
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Build Hadoop SDK
run: make -C sdk/java
- name: Juicefs Format
run: |
sudo ./juicefs format redis://127.0.0.1:6379/1 dev --trash-days 0 --bucket /tmp/jfs
sudo chmod -R 777 /tmp/jfs/dev/
- name: Set up Spark
working-directory: /tmp
run: |
curl https://dlcdn.apache.org/spark/ > a.html
spark_version=$(grep -oP '(?<=href=")spark-[^/]+(?=/")' a.html | grep -v preview | tail -1)
echo "spark_version is $spark_version"
wget -q https://dlcdn.apache.org/spark/$spark_version/$spark_version-bin-hadoop3.tgz
tar -zxf $spark_version-bin-hadoop3.tgz
ln -s $spark_version-bin-hadoop3 spark
cp ~/work/juicefs/juicefs/sdk/java/target/juicefs-hadoop*jar /tmp/spark/jars
cp ~/work/juicefs/juicefs/.github/workflows/resources/core-site.xml /tmp/spark/conf
export PATH=$PATH:/tmp/spark/bin:/tmp/spark/sbin
echo /tmp/spark/bin >> $GITHUB_PATH
echo /tmp/spark/sbin >> $GITHUB_PATH
start-master.sh -i localhost
start-slave.sh spark://localhost:7077
- name: Set up tpcds-kit
working-directory: /tmp
run: |
echo workspace is ${{ github.workspace }}
sudo ${{ github.workspace }}/.github/scripts/apt_install.sh gcc make flex bison byacc git
git clone --depth 1 https://github.com/databricks/tpcds-kit.git
cd tpcds-kit/tools
make OS=LINUX
- name: Set up spark-sql-perf
working-directory: /tmp
run: |
sudo ${{ github.workspace }}/.github/scripts/apt_install.sh apt-transport-https curl gnupg sbt
echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list
echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | sudo tee /etc/apt/sources.list.d/sbt_old.list
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo -H gpg --no-default-keyring --keyring gnupg-ring:/etc/apt/trusted.gpg.d/scalasbt-release.gpg --import
sudo chmod 644 /etc/apt/trusted.gpg.d/scalasbt-release.gpg
mkdir ~/.sbt
cat > ~/.sbt/repositories <<EOF
[repositories]
local
maven-central: https://repo1.maven.org/maven2/
typesafe-releases: https://repo.typesafe.com/typesafe/ivy-releases/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext]
EOF
git clone --depth 1 https://github.com/databricks/spark-sql-perf.git
cd spark-sql-perf
sbt package
- name: Gen data
timeout-minutes: 35
working-directory: /tmp
run: |
spark-shell \
--jars /tmp/spark-sql-perf/target/scala-2.12/spark-sql-perf*.jar \
--master spark://localhost:7077 \
--deploy-mode client \
--executor-memory 2G \
--driver-memory 2G \
--executor-cores 1 \
--conf spark.sql.shuffle.partitions=10 \
-i ~/work/juicefs/juicefs/.github/workflows/resources/tpcds_datagen.scala \
|| spark-shell \
--jars /tmp/spark-sql-perf/target/scala-2.12/spark-sql-perf*.jar \
--master spark://localhost:7077 \
--deploy-mode client \
--executor-memory 2G \
--driver-memory 2G \
--executor-cores 1 \
--conf spark.sql.shuffle.partitions=10 \
-i ~/work/juicefs/juicefs/.github/workflows/resources/tpcds_datagen.scala
- name: Run tpcds
timeout-minutes: 30
working-directory: /tmp
run: |
spark-shell \
--jars /tmp/spark-sql-perf/target/scala-2.12/spark-sql-perf*.jar \
--master spark://localhost:7077 \
--deploy-mode client \
--executor-memory 2G \
--driver-memory 2G \
--executor-cores 1 \
--conf spark.sql.shuffle.partitions=10 \
-i ~/work/juicefs/juicefs/.github/workflows/resources/tpcds_run.scala \
|| spark-shell \
--jars /tmp/spark-sql-perf/target/scala-2.12/spark-sql-perf*.jar \
--master spark://localhost:7077 \
--deploy-mode client \
--executor-memory 2G \
--driver-memory 2G \
--executor-cores 1 \
--conf spark.sql.shuffle.partitions=10 \
-i ~/work/juicefs/juicefs/.github/workflows/resources/tpcds_run.scala
- name: Log
if: always()
run: |
if [ -f /var/log/juicefs.log ]; then
echo "juicefs log"
sudo tail -n 1000 /var/log/juicefs.log
grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true
fi
- name: Send Slack Notification
if: failure()
uses: juicedata/slack-notify-action@main
with:
channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"
- name: Setup upterm session
if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
timeout-minutes: 60
uses: lhotari/action-upterm@v1