-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.sh
executable file
·57 lines (49 loc) · 1.52 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/bin/sh
set -e
################# PARAMETERS #################
SCALE=100
FORMAT=parquet
ITERATIONS=2
LOCATION="jfs://demo/tmp/performance-datasets/tpcds/sf${SCALE}-parquet/"
DATABASE=tpcds_${FORMAT}_${SCALE}_jfs
FILTER_QUERIES="q1-v2.4,q2-v2.4,q3-v2.4,q4-v2.4,q5-v2.4,q6-v2.4,q7-v2.4,q8-v2.4,q9-v2.4,q10-v2.4"
ENABLE_HIVE=false
ENABLE_KERBEROS=false
KEYTAB=/root/hdfs.keytab
PRINCIPAL=hdfs
SPARK_CONF="
--master yarn
--deploy-mode client
--driver-memory 4g
--executor-memory 8G
--executor-cores 4
--num-executors 4
--conf spark.sql.adaptive.enabled=true
--conf spark.driver.memoryOverhead=1g
--conf spark.executor.memoryOverhead=2g
"
################# PARAMETERS #################
CURRENT_DIR=$(cd `dirname $0`; pwd)
cd ${CURRENT_DIR}
if $ENABLE_KERBEROS; then
SPARK_CONF="${SPARK_CONF} --keytab ${KEYTAB} --principal ${PRINCIPAL}"
fi
set -x
# Generate data for tpcds
/opt/spark/bin/spark-submit ${SPARK_CONF} \
--class com.databricks.spark.sql.perf.tpcds.GenTPCDSData \
spark-sql-perf/target/scala-2.12/spark-sql-perf-assembly-0.5.2-SNAPSHOT.jar \
--dsdgenTools tpcds-kit/tools.tar.gz \
--scaleFactor ${SCALE} \
--location ${LOCATION} \
--format ${FORMAT}
# Run tpcds benchmark
/opt/spark/bin/spark-submit ${SPARK_CONF} \
--class com.databricks.spark.sql.perf.tpcds.RunTPCDS \
spark-sql-perf/target/scala-2.12/spark-sql-perf-assembly-0.5.2-SNAPSHOT.jar \
--location ${LOCATION} \
--database ${DATABASE} \
--format ${FORMAT} \
--iterations ${ITERATIONS} \
--filterQueries ${FILTER_QUERIES} \
--enableHive ${ENABLE_HIVE}