Skip to content

Commit

Permalink
Merge pull request #60 from samansmink/add-benchmarking-suite
Browse files Browse the repository at this point in the history
Add basic benchmarking suite
  • Loading branch information
samansmink authored Jul 24, 2024
2 parents 163d65e + 628c5ad commit f5a94a4
Show file tree
Hide file tree
Showing 279 changed files with 2,150 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
build
benchmark_results
duckdb_benchmark_data/
.idea
cmake-build-debug
duckdb_unittest_tempdir/
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests
# Include the Makefile from extension-ci-tools
include extension-ci-tools/makefiles/duckdb_extension.Makefile

# Include the Makefile from the benchmark directory
include benchmark/benchmark.Makefile

# Generate some test data to test with
generate-data:
python3 -m pip install delta-spark duckdb pandas deltalake pyspark delta
Expand Down
41 changes: 41 additions & 0 deletions benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Benchmarking the Delta Extension

## Basics
A primitive benchmarking suite exists for the Delta extension.

To run the benchmarks, firstly run the build using:
```shell
BUILD_BENCHMARK=1 make
```

Then, make sure that the generated data is created using:
```shell
make generate-data
```

Then to run a benchmark, use one of the benchmark Makefile targets prefixed with `bench-run-`:
```shell
make bench-run-tpch-sf1
```
Now the TPCH benchmark will be run twice, once on parquet files and once on a delta table.

To create a plot from the results run:
```shell
make plot
```

## More options
Specific benchmarks can be run from a suite using the `BENCHMARK_PATTERN` variable. For example to compare
only Q01 from TPCH SF1, run:
```shell
BENCHMARK_PATTERN=q01.benchmark make bench-run-tpch-sf1
```

Also, we can run all local benchmarks using:
```shell
make bench-run-all-local
```
Or all remote benchmarks using
```shell
make bench-run-all-remote
```
68 changes: 68 additions & 0 deletions benchmark/benchmark.Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
.PHONY: bench-output-dir clean_benchmark plot

# Set this flag during building to enable the benchmark runner
ifeq (${BUILD_BENCHMARK}, 1)
TOOLCHAIN_FLAGS:=${TOOLCHAIN_FLAGS} -DBUILD_BENCHMARKS=1
endif

ifeq ("${BENCHMARK_PATTERN}", "")
BENCHMARK_PATTERN:=.*
endif

bench-output-dir:
mkdir -p benchmark_results

clean_benchmark:
rm -rf benchmark_results

plot:
python3 scripts/plot.py


############### BENCHMARK TARGETS ###############

###
# TPCH LOCAL
###

# TPCH SF1 on delta table
bench-run-tpch-sf1-delta: bench-output-dir
./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-delta.csv
# TPCH SF1 on parquet files
bench-run-tpch-sf1-parquet: bench-output-dir
./build/release/benchmark/benchmark_runner 'benchmark/tpch/sf1-parquet/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-parquet.csv
# COMPARES TPCH SF1 on parquet file vs on delta files
bench-run-tpch-sf1: bench-run-tpch-sf1-delta bench-run-tpch-sf1-parquet

###
# TPCH REMOTE
###

# TPCH on remote delta table (set BENCHMARK_DATA_S3_LINEITEM_SF1)
bench-run-tpch-sf1-remote-delta: bench-output-dir
./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta-remote/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-remote-delta.csv
# TPCH on remote parquet table (set BENCHMARK_DATA_S3_LINEITEM_SF1)
bench-run-tpch-sf1-remote-parquet: bench-output-dir
./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-parquet-remote/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-remote-parquet.csv
# COMPARES TPCH SF1 on parquet file vs on delta files
bench-run-tpch-sf1-remote: bench-run-tpch-sf1-remote-parquet bench-run-tpch-sf1-remote-delta

###
# TPCDS LOCAL
###

# TPCDS SF1 on delta table
bench-run-tpcds-sf1-delta: bench-output-dir
./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-delta/${BENCHMARK_PATTERN}' &> benchmark_results/tpcds-sf1-delta.csv
# TPCDS SF1 on parquet files
bench-run-tpcds-sf1-parquet: bench-output-dir
./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-parquet/${BENCHMARK_PATTERN}' &> benchmark_results/tpcds-sf1-parquet.csv
# COMPARES TPCDS SF1 on parquet file vs on delta files
bench-run-tpcds-sf1: bench-run-tpcds-sf1-delta bench-run-tpcds-sf1-parquet

###
# ALL
###
bench-run-all-local: bench-run-tpcds-sf1 bench-run-tpch-sf1

bench-run-all-remote: bench-run-tpch-sf1-remote
24 changes: 24 additions & 0 deletions benchmark/tpcds/sf1-delta/load.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
create view call_center as from delta_scan('./data/generated/tpcds_sf1/call_center/delta_lake');
create view catalog_page as from delta_scan('./data/generated/tpcds_sf1/catalog_page/delta_lake');
create view catalog_returns as from delta_scan('./data/generated/tpcds_sf1/catalog_returns/delta_lake');
create view catalog_sales as from delta_scan('./data/generated/tpcds_sf1/catalog_sales/delta_lake');
create view customer as from delta_scan('./data/generated/tpcds_sf1/customer/delta_lake');
create view customer_demographics as from delta_scan('./data/generated/tpcds_sf1/customer_demographics/delta_lake');
create view customer_address as from delta_scan('./data/generated/tpcds_sf1/customer_address/delta_lake');
create view date_dim as from delta_scan('./data/generated/tpcds_sf1/date_dim/delta_lake');
create view household_demographics as from delta_scan('./data/generated/tpcds_sf1/household_demographics/delta_lake');
create view inventory as from delta_scan('./data/generated/tpcds_sf1/inventory/delta_lake');
create view income_band as from delta_scan('./data/generated/tpcds_sf1/income_band/delta_lake');
create view item as from delta_scan('./data/generated/tpcds_sf1/item/delta_lake');
create view promotion as from delta_scan('./data/generated/tpcds_sf1/promotion/delta_lake');
create view reason as from delta_scan('./data/generated/tpcds_sf1/reason/delta_lake');
create view ship_mode as from delta_scan('./data/generated/tpcds_sf1/ship_mode/delta_lake');
create view store as from delta_scan('./data/generated/tpcds_sf1/store/delta_lake');
create view store_returns as from delta_scan('./data/generated/tpcds_sf1/store_returns/delta_lake');
create view store_sales as from delta_scan('./data/generated/tpcds_sf1/store_sales/delta_lake');
create view time_dim as from delta_scan('./data/generated/tpcds_sf1/time_dim/delta_lake');
create view warehouse as from delta_scan('./data/generated/tpcds_sf1/warehouse/delta_lake');
create view web_page as from delta_scan('./data/generated/tpcds_sf1/web_page/delta_lake');
create view web_returns as from delta_scan('./data/generated/tpcds_sf1/web_returns/delta_lake');
create view web_sales as from delta_scan('./data/generated/tpcds_sf1/web_sales/delta_lake');
create view web_site as from delta_scan('./data/generated/tpcds_sf1/web_site/delta_lake');
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q01.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q01.benchmark
# description: Run query 01 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=1
QUERY_NUMBER_PADDED=01
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q02.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q02.benchmark
# description: Run query 02 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=2
QUERY_NUMBER_PADDED=02
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q03.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q03.benchmark
# description: Run query 03 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=3
QUERY_NUMBER_PADDED=03
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q04.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q04.benchmark
# description: Run query 04 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=4
QUERY_NUMBER_PADDED=04
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q05.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q05.benchmark
# description: Run query 05 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=5
QUERY_NUMBER_PADDED=05
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q06.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q06.benchmark
# description: Run query 06 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=6
QUERY_NUMBER_PADDED=06
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q07.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q07.benchmark
# description: Run query 07 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=7
QUERY_NUMBER_PADDED=07
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q08.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q08.benchmark
# description: Run query 08 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=8
QUERY_NUMBER_PADDED=08
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q09.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q09.benchmark
# description: Run query 09 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=9
QUERY_NUMBER_PADDED=09
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q10.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q10.benchmark
# description: Run query 10 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=10
QUERY_NUMBER_PADDED=10
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q11.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q11.benchmark
# description: Run query 11 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=11
QUERY_NUMBER_PADDED=11
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q12.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q12.benchmark
# description: Run query 12 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=12
QUERY_NUMBER_PADDED=12
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q13.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q13.benchmark
# description: Run query 13 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=13
QUERY_NUMBER_PADDED=13
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q14.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q14.benchmark
# description: Run query 14 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=14
QUERY_NUMBER_PADDED=14
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q15.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q15.benchmark
# description: Run query 15 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=15
QUERY_NUMBER_PADDED=15
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q16.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q16.benchmark
# description: Run query 16 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=16
QUERY_NUMBER_PADDED=16
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q17.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q17.benchmark
# description: Run query 17 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=17
QUERY_NUMBER_PADDED=17
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q18.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q18.benchmark
# description: Run query 18 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=18
QUERY_NUMBER_PADDED=18
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q19.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q19.benchmark
# description: Run query 19 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=19
QUERY_NUMBER_PADDED=19
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q20.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q20.benchmark
# description: Run query 20 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=20
QUERY_NUMBER_PADDED=20
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q21.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q21.benchmark
# description: Run query 21 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=21
QUERY_NUMBER_PADDED=21
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q22.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q22.benchmark
# description: Run query 22 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=22
QUERY_NUMBER_PADDED=22
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q23.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q23.benchmark
# description: Run query 23 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=23
QUERY_NUMBER_PADDED=23
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q24.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q24.benchmark
# description: Run query 24 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=24
QUERY_NUMBER_PADDED=24
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q25.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q25.benchmark
# description: Run query 25 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=25
QUERY_NUMBER_PADDED=25
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q26.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q26.benchmark
# description: Run query 26 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=26
QUERY_NUMBER_PADDED=26
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q27.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q27.benchmark
# description: Run query 27 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=27
QUERY_NUMBER_PADDED=27
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q28.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q28.benchmark
# description: Run query 28 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=28
QUERY_NUMBER_PADDED=28
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q29.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q29.benchmark
# description: Run query 29 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=29
QUERY_NUMBER_PADDED=29
7 changes: 7 additions & 0 deletions benchmark/tpcds/sf1-delta/q30.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/tpcds/sf1/q30.benchmark
# description: Run query 30 from the TPC-DS benchmark
# group: [sf1]

template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
QUERY_NUMBER=30
QUERY_NUMBER_PADDED=30
Loading

0 comments on commit f5a94a4

Please sign in to comment.