From a727815cb38ae78cba8dd724cbb0bee15b16f8a5 Mon Sep 17 00:00:00 2001 From: azim Date: Sat, 16 Nov 2024 22:23:52 +0100 Subject: [PATCH] generate tables 4 and 7 --- .../{alp_compression_ratio.csv => alp.csv} | 0 ...lp_rd_compression_ratio.csv => alp_rd.csv} | 0 .../evalimplsts.csv => evalimplsts/alp.csv} | 0 .../float/{alp32.csv => alp.csv} | 0 .../{draw_table_4.py => generate_tables.py} | 35 +++++++---- publication/master_script/master_script.sh | 5 +- .../bench_compression_ratio/alp.cpp | 6 +- .../bench_compression_ratio/alp32.cpp | 2 +- publication/tables/table_4.md | 60 +++++++++---------- publication/tables/table_7.md | 6 ++ 10 files changed, 65 insertions(+), 49 deletions(-) rename publication/compression_ratio_result/double/{alp_compression_ratio.csv => alp.csv} (100%) rename publication/compression_ratio_result/double/{alp_rd_compression_ratio.csv => alp_rd.csv} (100%) rename publication/compression_ratio_result/{double/evalimplsts.csv => evalimplsts/alp.csv} (100%) rename publication/compression_ratio_result/float/{alp32.csv => alp.csv} (100%) rename publication/master_script/{draw_table_4.py => generate_tables.py} (78%) create mode 100644 publication/tables/table_7.md diff --git a/publication/compression_ratio_result/double/alp_compression_ratio.csv b/publication/compression_ratio_result/double/alp.csv similarity index 100% rename from publication/compression_ratio_result/double/alp_compression_ratio.csv rename to publication/compression_ratio_result/double/alp.csv diff --git a/publication/compression_ratio_result/double/alp_rd_compression_ratio.csv b/publication/compression_ratio_result/double/alp_rd.csv similarity index 100% rename from publication/compression_ratio_result/double/alp_rd_compression_ratio.csv rename to publication/compression_ratio_result/double/alp_rd.csv diff --git a/publication/compression_ratio_result/double/evalimplsts.csv b/publication/compression_ratio_result/evalimplsts/alp.csv similarity index 100% rename from publication/compression_ratio_result/double/evalimplsts.csv rename to publication/compression_ratio_result/evalimplsts/alp.csv diff --git a/publication/compression_ratio_result/float/alp32.csv b/publication/compression_ratio_result/float/alp.csv similarity index 100% rename from publication/compression_ratio_result/float/alp32.csv rename to publication/compression_ratio_result/float/alp.csv diff --git a/publication/master_script/draw_table_4.py b/publication/master_script/generate_tables.py similarity index 78% rename from publication/master_script/draw_table_4.py rename to publication/master_script/generate_tables.py index 9bf2913..34554c1 100644 --- a/publication/master_script/draw_table_4.py +++ b/publication/master_script/generate_tables.py @@ -3,12 +3,10 @@ import os -def generate_markdown_table(): - # Define the path pattern for the CSV files in the parent directory - script_dir = os.path.dirname(os.path.abspath(__file__)) - +def generate_markdown_table(input_folder, output_file, column_order): # Define the path pattern for the CSV files relative to the script's directory - file_pattern = os.path.join(script_dir, "../compression_ratio_result/double/*.csv") + script_dir = os.path.dirname(os.path.abspath(__file__)) + file_pattern = os.path.join(script_dir, f"../compression_ratio_result/{input_folder}/*.csv") csv_files = glob.glob(file_pattern) # Define a dictionary to rename files to match the specified column names @@ -30,7 +28,7 @@ def generate_markdown_table(): # Load data from each file and insert it into the correct column for file in csv_files: # Extract the prefix of the file to use as the column name - prefix = file.split("/")[-1].split("_compression_ratio.csv")[0].lower() + prefix = file.split("/")[-1].split(".csv")[0].lower() if prefix in file_to_column: column_name = file_to_column[prefix] df = pd.read_csv(file) @@ -45,7 +43,6 @@ def generate_markdown_table(): df_combined.rename(columns={"dataset": "Dataset"}, inplace=True) # Ensure all expected columns are included in the specified order - column_order = ["Dataset", "Gor", "Ch", "Ch128", "Patas", "PDE", "Elf", "Alp", "LWC+Alp", "Zstd"] for col in column_order: if col not in df_combined.columns: df_combined[col] = "" # Add empty columns if missing @@ -77,13 +74,27 @@ def generate_markdown_table(): for _, row in df_combined.iterrows(): markdown_table += "| " + " | ".join(map(str, row)) + " |\n" - # Define the path pattern for the CSV files in the parent directory - script_dir = os.path.dirname(os.path.abspath(__file__)) - output_file = os.path.join(script_dir, "../tables/table_4.md") - + # Write the Markdown table to the specified output file with open(output_file, "w") as f: f.write(markdown_table) +def generate_table_4(): + generate_markdown_table( + input_folder="double", + output_file="../tables/table_4.md", + column_order=["Dataset", "Gor", "Ch", "Ch128", "Patas", "PDE", "Elf", "Alp", "LWC+Alp", "Zstd"] + ) + + +def generate_table_7(): + generate_markdown_table( + input_folder="float", + output_file="../tables/table_7.md", + column_order=["Dataset", "Gor", "Ch", "Ch128", "Patas", "Alp", "Zstd"] + ) + + if __name__ == "__main__": - generate_markdown_table() + generate_table_4() + generate_table_7() diff --git a/publication/master_script/master_script.sh b/publication/master_script/master_script.sh index fdecaa3..722e357 100755 --- a/publication/master_script/master_script.sh +++ b/publication/master_script/master_script.sh @@ -84,10 +84,9 @@ if [ $? -ne 0 ]; then exit 1 fi -green_echo "Generating compression ratio table 4 ..." -output=$(python3 "$CLONED_DIR/publication/master_script/draw_table_4.py") +green_echo "Generating compression ratio tables ..." +output=$(python3 "$CLONED_DIR/publication/master_script/generate_tables.py") brown_echo "$output" -green_echo "Table 4 also saved as compression_ratios_table.md." green_echo "Running benchmarks based on system architecture..." # Run benchmarks based on system architecture diff --git a/publication/source_code/bench_compression_ratio/alp.cpp b/publication/source_code/bench_compression_ratio/alp.cpp index feb0a1b..bbc29f2 100644 --- a/publication/source_code/bench_compression_ratio/alp.cpp +++ b/publication/source_code/bench_compression_ratio/alp.cpp @@ -328,7 +328,7 @@ class alp_test : public ::testing::Test { * This test will output and write a file with the estimated bits/value after compression with alp */ TEST_F(alp_test, test_alp_on_whole_datasets) { - std::ofstream ofile(alp_bench::get_paths().result_dir_path + "alp_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "compression_ratio_result/double/alp_rd.csv", std::ios::out); ofile << "dataset,size,rowgroups_count,vectors_count\n"; for (auto& dataset : alp_bench::get_alp_dataset()) { @@ -341,7 +341,7 @@ TEST_F(alp_test, test_alp_on_whole_datasets) { * This test will output and write a file with the estimated bits/value after compression with alp */ TEST_F(alp_test, test_alprd_on_whole_datasets) { - std::ofstream ofile(alp_bench::get_paths().result_dir_path + "alp_rd_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "compression_ratio_result/double/alp_rd.csv", std::ios::out); ofile << "dataset,size,rowgroups_count,vectors_count\n"; for (auto& dataset : alp_bench::get_alp_dataset()) { @@ -350,7 +350,7 @@ TEST_F(alp_test, test_alprd_on_whole_datasets) { } TEST_F(alp_test, test_alprd_on_evalimplsts) { - std::ofstream ofile(alp_bench::get_paths().result_dir_path + "evalimplsts.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "compression_ratio_result/double/evalimplsts.csv", std::ios::out); ofile << "dataset,size,rowgroups_count,vectors_count\n"; for (auto& dataset : alp_bench::get_evalimplsts()) { diff --git a/publication/source_code/bench_compression_ratio/alp32.cpp b/publication/source_code/bench_compression_ratio/alp32.cpp index cfd1cc1..77a9943 100644 --- a/publication/source_code/bench_compression_ratio/alp32.cpp +++ b/publication/source_code/bench_compression_ratio/alp32.cpp @@ -128,7 +128,7 @@ class alp32_test : public ::testing::Test { * This test will output and write a file with the estimated bits/value after compression with alp */ TEST_F(alp32_test, test_alprd32_on_whole_datasets) { - std::ofstream ofile(alp_bench::get_paths().result_dir_path + "alp_rd32_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "compression_ratio_result/float/alp.csv", std::ios::out); ofile << "dataset,size,rowgroups_count,vectors_count\n"; for (auto& dataset : alp_bench::get_sp_datasets()) { diff --git a/publication/tables/table_4.md b/publication/tables/table_4.md index 3873fae..006223c 100644 --- a/publication/tables/table_4.md +++ b/publication/tables/table_4.md @@ -1,32 +1,32 @@ | Dataset | Gor | Ch | Ch128 | Patas | PDE | Elf | Alp | LWC+Alp | Zstd | |---|---|---|---|---|---|---|---|---|---| -| Air-Pressure | 24.48 | 22.95 | | 27.87 | | | 16.43 | | 9.39 | -| Arade/4 | 58.0 | 55.56 | | 59.1 | | | 24.94 | | 33.9 | -| Basel-Temp | 60.47 | 54.12 | | 36.41 | | | 30.72 | | 18.44 | -| Basel-Wind | 62.2 | 54.73 | | 48.86 | | | 29.81 | | 14.66 | -| Bio-Temp | 50.69 | 46.34 | | 22.88 | | | 10.75 | | 17.46 | -| Bird-Mig | 47.82 | 42.06 | | 36.08 | | | 20.14 | | 21.02 | -| Blockchain | 62.89 | 58.29 | | 62.58 | | | 36.49 | | 43.97 | -| Btc-Price | 54.67 | 48.07 | | 56.8 | | | 26.37 | | 42.08 | -| CMS/1 | 37.23 | 34.77 | | 36.78 | | | 35.65 | | 26.56 | -| CMS/25 | 64.35 | 59.53 | | 70.09 | | | 41.11 | | 58.27 | -| CMS/9 | 16.01 | 18.71 | | 25.97 | | | 11.67 | | 14.73 | -| City-Temp | 58.82 | 46.25 | | 24.16 | | | 10.74 | | 16.77 | -| Dew-Temp | 54.86 | 51.81 | | 39.01 | | | 13.4 | | 25.07 | -| Food-prices | 38.31 | 27.98 | | 28.29 | | | 23.65 | | 18.32 | -| Gov/10 | 57.45 | 45.75 | | 35.84 | | | 30.99 | | 28.09 | -| Gov/26 | 2.37 | 2.34 | | 16.21 | | | 0.41 | | 0.23 | -| Gov/30 | 10.22 | 8.86 | | 19.28 | | | 7.48 | | 4.48 | -| Gov/31 | 5.61 | 5.0 | | 17.07 | | | 3.05 | | 1.63 | -| Gov/40 | 2.71 | 2.63 | | 16.34 | | | 0.83 | | 0.46 | -| Medicare/1 | 45.49 | 42.72 | | 39.91 | | | 39.35 | | 31.18 | -| Medicare/9 | 16.97 | 19.08 | | 26.27 | | | 12.26 | | 15.03 | -| NYC/29 | 30.56 | 29.57 | | 38.78 | | | 40.38 | | 27.5 | -| PM10-dust | 27.5 | 24.44 | | 19.9 | | | 8.56 | | 7.78 | -| POI-lat | 65.95 | 57.7 | | 71.72 | | | | | 59.34 | -| POI-lon | 66.1 | 63.36 | | 75.88 | | | | | 60.98 | -| SD-bench | 50.7 | 45.29 | | 22.8 | | | 16.21 | | 11.34 | -| Stocks-DE | 46.18 | 42.89 | | 20.77 | | | 11.01 | | 10.54 | -| Stocks-UK | 34.75 | 31.33 | | 21.49 | | | 12.59 | | 10.28 | -| Stocks-USA | 37.2 | 34.98 | | 19.21 | | | 7.9 | | 8.56 | -| Wind-dir | 58.14 | 53.89 | | 28.14 | | | 15.89 | | 25.53 | +| Air-Pressure | 24.48 | 22.95 | 19.24 | 27.87 | | | 16.43 | | 9.39 | +| Arade/4 | 58.0 | 55.56 | 48.95 | 59.1 | | | 24.94 | | 33.9 | +| Basel-Temp | 60.47 | 54.12 | 31.14 | 36.41 | | | 30.72 | | 18.44 | +| Basel-Wind | 62.2 | 54.73 | 38.35 | 48.86 | | | 29.81 | | 14.66 | +| Bio-Temp | 50.69 | 46.34 | 18.86 | 22.88 | | | 10.75 | | 17.46 | +| Bird-Mig | 47.82 | 42.06 | 26.43 | 36.08 | | | 20.14 | | 21.02 | +| Blockchain | 62.89 | 58.29 | 53.19 | 62.58 | | | 36.49 | | 43.97 | +| Btc-Price | 54.67 | 48.07 | 44.96 | 56.8 | | | 26.37 | | 42.08 | +| CMS/1 | 37.23 | 34.77 | 28.14 | 36.78 | | | 35.65 | | 26.56 | +| CMS/25 | 64.35 | 59.53 | 57.2 | 70.09 | | | 41.11 | | 58.27 | +| CMS/9 | 16.01 | 18.71 | 25.65 | 25.97 | | | 11.67 | | 14.73 | +| City-Temp | 58.82 | 46.25 | 22.96 | 24.16 | | | 10.74 | | 16.77 | +| Dew-Temp | 54.86 | 51.81 | 32.63 | 39.01 | | | 13.4 | | 25.07 | +| Food-prices | 38.31 | 27.98 | 24.64 | 28.29 | | | 23.65 | | 18.32 | +| Gov/10 | 57.45 | 45.75 | 34.15 | 35.84 | | | 30.99 | | 28.09 | +| Gov/26 | 2.37 | 2.34 | 9.26 | 16.21 | | | 0.41 | | 0.23 | +| Gov/30 | 10.22 | 8.86 | 12.92 | 19.28 | | | 7.48 | | 4.48 | +| Gov/31 | 5.61 | 5.0 | 10.43 | 17.07 | | | 3.05 | | 1.63 | +| Gov/40 | 2.71 | 2.63 | 9.41 | 16.34 | | | 0.83 | | 0.46 | +| Medicare/1 | 45.49 | 42.72 | 32.32 | 39.91 | | | 39.35 | | 31.18 | +| Medicare/9 | 16.97 | 19.08 | 26.02 | 26.27 | | | 12.26 | | 15.03 | +| NYC/29 | 30.56 | 29.57 | 28.71 | 38.78 | | | 40.38 | | 27.5 | +| PM10-dust | 27.5 | 24.44 | 13.66 | 19.9 | | | 8.56 | | 7.78 | +| POI-lat | 65.95 | 57.7 | 57.49 | 71.72 | | | | | 59.34 | +| POI-lon | 66.1 | 63.36 | 63.19 | 75.88 | | | | | 60.98 | +| SD-bench | 50.7 | 45.29 | 18.78 | 22.8 | | | 16.21 | | 11.34 | +| Stocks-DE | 46.18 | 42.89 | 13.63 | 20.77 | | | 11.01 | | 10.54 | +| Stocks-UK | 34.75 | 31.33 | 16.76 | 21.49 | | | 12.59 | | 10.28 | +| Stocks-USA | 37.2 | 34.98 | 12.19 | 19.21 | | | 7.9 | | 8.56 | +| Wind-dir | 58.14 | 53.89 | 27.8 | 28.14 | | | 15.89 | | 25.53 | diff --git a/publication/tables/table_7.md b/publication/tables/table_7.md new file mode 100644 index 0000000..3f78bf5 --- /dev/null +++ b/publication/tables/table_7.md @@ -0,0 +1,6 @@ +| Dataset | Gor | Ch | Ch128 | Patas | Alp | Zstd | +|---|---|---|---|---|---|---| +| Dino-Vitb16 | 32.89 | 33.42 | 33.43 | 35.25 | 28.78 | 39.57 | +| GPT2 | 32.24 | 33.46 | 33.48 | 35.24 | 28.01 | 39.76 | +| Grammarly-lg | 32.19 | 33.42 | 33.43 | 35.23 | 29.16 | 39.37 | +| WAV2VEC | 32.25 | 27.42 | 34.39 | 26.28 | 28.01 | 43.15 |