Skip to content

Commit

Permalink
Initial version of reproducibility package
Browse files Browse the repository at this point in the history
  • Loading branch information
jungmair committed Jul 1, 2022
0 parents commit 1d6e5d4
Show file tree
Hide file tree
Showing 34 changed files with 3,170 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
lingodb
duckdb
noisepage
plots
results
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# LingoDB Reproducibility Package

## Execution Environment
* Linux with the following tools installed:
* standard tools (bash, csplit, cat, tail, make, sed, tail)
* git
* [jq](https://stedolan.github.io/jq/)
* docker (installed to be used directly with the current user)

## Steps to reproduce

```bash
$ bash checkout.sh
...
$ bash reproduce.sh
...
Figure 9: plots/runtime.pdf
Figure 10: plots/compilation.pdf
Figure 11: plots/compilation-phases.pdf
Figure 12: plots/qopt-codestats.pdf
Figure 13: plots/exec-codestats.pdf
Figure 14:
unoptimized: results/linear-regression.mlir
after cross-domain optimization: results/linear-regression-optimized.mlir
```
Afterward, have a look at the produced files in the directories `plots` and `results`.

13 changes: 13 additions & 0 deletions checkout.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
git clone https://github.com/duckdb/duckdb.git duckdb
pushd duckdb
git checkout 2aa4b87218717c16bcd6c65caaa68a7c4ff1483c
popd
git clone https://github.com/cmu-db/noisepage.git noisepage
pushd noisepage
git checkout 79276e68fe83322f1249e8a8be96bd63c583ae56
popd
git clone https://github.com/lingo-db/lingo-db.git lingodb
pushd lingodb
git checkout 2c70df0424710d128d2d1411721d41ef37ad0f16
popd
62 changes: 62 additions & 0 deletions reproduce.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/bash
rm -rf results
mkdir -p results
pushd lingodb
make reproduce > ../results/lingodb-execution.log
popd
pushd results
csplit lingodb-execution.log "/#Query/"
mv xx00 tpch-results.txt
echo ' query time opt db std llvm conversion compile error'> tpch-times.csv
#add everything from old file starting from second line
cat xx01 |tail -n+2>> tpch-times.csv
popd


docker build -t hyper-benchmark -fscripts/hyper-tpch/Dockerfile .

docker run --privileged -it hyper-benchmark /bin/bash -c "python3 hyper-script/benchmark.py /tpch-data-1" > results/hyper-tpch-1.log
docker run --privileged -it hyper-benchmark /bin/bash -c "python3 hyper-script/benchmark.py /tpch-data-10" > results/hyper-tpch-10.log


docker build -t duckdb-benchmark -fscripts/duckdb-tpch/Dockerfile .

docker run --privileged -it duckdb-benchmark /bin/bash -c "./build/release/benchmark/benchmark_runner --threads=1 \"benchmark/tpch/sf1/.*\"" > results/duckdb-tpch-1.log

sed -i 's/\.benchmark//' results/duckdb-tpch-1.log
sed -i 's/benchmark\/tpch\/sf1\/q0/Q/' results/duckdb-tpch-1.log
sed -i 's/benchmark\/tpch\/sf1\/q/Q/' results/duckdb-tpch-1.log

docker build -t cloc-docker -f scripts/codestats/Dockerfile .
echo "System Component LoC" >> results/exec-codestats.csv
echo "System Component LoC" >> results/qopt-codestats.csv

bash scripts/codestats/codestats.sh

docker build -t r-plot -fscripts/plots/Dockerfile .

rm -rf plots
mkdir -p plots
docker run -v $PWD/plots:/output -it r-plot /bin/bash -c "Rscript /plot-scripts/plot-runtime.r"
docker run -v $PWD/plots:/output -it r-plot /bin/bash -c "Rscript /plot-scripts/plot-compilation.r"
docker run -v $PWD/plots:/output -it r-plot /bin/bash -c "Rscript /plot-scripts/plot-compilation-phases.r"
docker run -v $PWD/plots:/output -it r-plot /bin/bash -c "Rscript /plot-scripts/plot-exec-locs.r"
docker run -v $PWD/plots:/output -it r-plot /bin/bash -c "Rscript /plot-scripts/plot-qopt-locs.r"


docker run --privileged -it mlirdb-repr /bin/bash -c "/build/mlirdb/mlir-db-opt test/lit/pytorch/linear-regression.mlir" > results/linear-regression.mlir
docker run --privileged -it mlirdb-repr /bin/bash -c "/build/mlirdb/mlir-db-opt --torch-backend-to-linalg-on-tensors-backend-pipeline --canonicalize --inline --scf-bufferize --linalg-bufferize --refback-munge-memref-copy --func-bufferize --arith-bufferize --tensor-bufferize -finalizing-bufferize --refback-insert-rng-globals --convert-linalg-to-affine-loops --affine-loop-fusion --affine-loop-unroll=\"unroll-full unroll-num-reps=3\" --affine-scalrep --canonicalize --lower-affine --canonicalize --simplify-memrefs --db-simplify-to-arith --simplify-arithmetics --canonicalize -symbol-privatize=\"exclude=main\" --symbol-dce test/lit/pytorch/linear-regression.mlir" > results/linear-regression-optimized.mlir

#todo

echo "===================== Reproduced Figures ====================="

echo "Figure 9: plots/runtime.pdf"
echo "Figure 10: plots/compilation.pdf"
echo "Figure 11: plots/compilation-phases.pdf"
echo "Figure 12: plots/qopt-codestats.pdf"
echo "Figure 13: plots/exec-codestats.pdf"
echo "Figure 14:"
echo "unoptimized: results/linear-regression.mlir"
echo "after cross-domain optimization: results/linear-regression-optimized.mlir "

10 changes: 10 additions & 0 deletions scripts/codestats/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM ubuntu:impish

RUN apt-get update && apt-get install -y cloc

COPY scripts/codestats/cloc.defs /cloc.defs
WORKDIR /data
VOLUME "/data"
VOLUME "/filters"
ENTRYPOINT ["/usr/bin/cloc"]
CMD ["--force-lang-def=/cloc.defs"]
Loading

0 comments on commit 1d6e5d4

Please sign in to comment.