From 9326155174702180b433b58fdb84bf9de4496c32 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Wed, 26 Jul 2023 14:42:29 +0100 Subject: [PATCH 1/6] Add operations benchmark --- Cargo.toml | 4 +++ benches/compression.rs | 2 +- benches/operations.rs | 69 ++++++++++++++++++++++++++++++++++++++++++ benches/shuffle.rs | 2 +- 4 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 benches/operations.rs diff --git a/Cargo.toml b/Cargo.toml index 3a11ae0..e43276f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,10 @@ serde_test = "1.0" name = "byte_order" harness = false +[[bench]] +name = "operations" +harness = false + [[bench]] name = "shuffle" harness = false diff --git a/benches/compression.rs b/benches/compression.rs index 0966ada..5db3ca6 100644 --- a/benches/compression.rs +++ b/benches/compression.rs @@ -37,7 +37,7 @@ fn criterion_benchmark(c: &mut Criterion) { (models::Compression::Zlib, "zlib"), ]; for (compression, name) in compression_algs { - for size_k in [64, 256, 1024] { + for size_k in [64, 256, 1024, 4096] { let size = size_k * 1024; let data: Vec = (0_u32..size) .map(|i| u8::try_from(i % 256).unwrap()) diff --git a/benches/operations.rs b/benches/operations.rs new file mode 100644 index 0000000..ba27941 --- /dev/null +++ b/benches/operations.rs @@ -0,0 +1,69 @@ +/// Benchmarks for numerical operations. +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use reductionist::error::ActiveStorageError; +use reductionist::models::{DType, RequestData, Response}; +use reductionist::operation::Operation; +use reductionist::operations; +use reductionist::types::Missing; +use url::Url; +// Bring trait into scope to use as_bytes method. +use zerocopy::AsBytes; + +fn get_test_request_data() -> RequestData { + RequestData { + source: Url::parse("http://example.com").unwrap(), + bucket: "bar".to_string(), + object: "baz".to_string(), + dtype: DType::Int32, + byte_order: None, + offset: None, + size: None, + shape: None, + order: None, + selection: None, + compression: None, + filters: None, + missing: None, + } +} + +type ExecuteFn = dyn Fn(&RequestData, Vec) -> Result; + +fn criterion_benchmark(c: &mut Criterion) { + for size_k in [64, 256, 1024, 4096] { + let size = size_k * 1024; + let data: Vec = (0_i64..size).map(|i| i % 256).collect::>(); + let data: Vec = data.as_bytes().into(); + let missings = vec![ + None, + Some(Missing::MissingValue(42.into())), + Some(Missing::MissingValues(vec![42.into()])), + Some(Missing::ValidMax(128.into())), + Some(Missing::ValidMin(128.into())), + Some(Missing::ValidRange(5.into(), 250.into())), + ]; + let operations: [(&str, Box); 5] = [ + ("count", Box::new(operations::Count::execute)), + ("max", Box::new(operations::Max::execute)), + ("min", Box::new(operations::Min::execute)), + ("select", Box::new(operations::Select::execute)), + ("sum", Box::new(operations::Sum::execute)), + ]; + for (op_name, execute) in operations { + for missing in missings.clone() { + let name = format!("{}({}, {:?})", op_name, size, missing); + c.bench_function(&name, |b| { + b.iter(|| { + let mut request_data = get_test_request_data(); + request_data.dtype = DType::Int64; + request_data.missing = missing.clone(); + execute(&request_data, black_box(data.clone())).unwrap(); + }) + }); + } + } + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/benches/shuffle.rs b/benches/shuffle.rs index 59e5dd5..03910a8 100644 --- a/benches/shuffle.rs +++ b/benches/shuffle.rs @@ -3,7 +3,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use reductionist::filters::shuffle; fn criterion_benchmark(c: &mut Criterion) { - for size_k in [64, 256, 1024] { + for size_k in [64, 256, 1024, 4096] { let size = size_k * 1024; let data: Vec = (0_u32..size) .map(|i| u8::try_from(i % 256).unwrap()) From c2a4fc1845aa34668c26f07ad6a3b16600d3e78d Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Wed, 2 Aug 2023 17:26:55 +0100 Subject: [PATCH 2/6] benches: switch to i64 for compression & shuffle --- benches/compression.rs | 11 ++++++----- benches/shuffle.rs | 9 +++++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/benches/compression.rs b/benches/compression.rs index 5db3ca6..87398f5 100644 --- a/benches/compression.rs +++ b/benches/compression.rs @@ -7,6 +7,8 @@ use axum::body::Bytes; use flate2::read::{GzEncoder, ZlibEncoder}; use flate2::Compression; use std::io::Read; +// Bring trait into scope to use as_bytes method. +use zerocopy::AsBytes; fn compress_gzip(data: &[u8]) -> Bytes { // Adapated from flate2 documentation. @@ -37,12 +39,11 @@ fn criterion_benchmark(c: &mut Criterion) { (models::Compression::Zlib, "zlib"), ]; for (compression, name) in compression_algs { - for size_k in [64, 256, 1024, 4096] { + for size_k in [64, 256, 1024] { let size = size_k * 1024; - let data: Vec = (0_u32..size) - .map(|i| u8::try_from(i % 256).unwrap()) - .collect::>(); - let compressed = compress(compression, data.as_ref()); + let data: Vec = (0_i64..size).map(|i| i % 256).collect::>(); + let bytes = Bytes::copy_from_slice(data.as_bytes()); + let compressed = compress(compression, &bytes); let name = format!("decompress({}, {})", name, size); c.bench_function(&name, |b| { b.iter(|| { diff --git a/benches/shuffle.rs b/benches/shuffle.rs index 03910a8..903c589 100644 --- a/benches/shuffle.rs +++ b/benches/shuffle.rs @@ -1,14 +1,15 @@ /// Benchmarks for the byte shuffle filter implementation. +use axum::body::Bytes; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use reductionist::filters::shuffle; +// Bring trait into scope to use as_bytes method. +use zerocopy::AsBytes; fn criterion_benchmark(c: &mut Criterion) { for size_k in [64, 256, 1024, 4096] { let size = size_k * 1024; - let data: Vec = (0_u32..size) - .map(|i| u8::try_from(i % 256).unwrap()) - .collect::>(); - let bytes = data.into(); + let data: Vec = (0_i64..size).map(|i| i % 256).collect::>(); + let bytes = Bytes::copy_from_slice(data.as_bytes()); for element_size in [2, 4, 8] { let name = format!("deshuffle({}, {})", size, element_size); c.bench_function(&name, |b| { From 43de1d71ec89d1f8a8fc8072d304bb803cbe85c3 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Thu, 3 Aug 2023 09:21:09 +0100 Subject: [PATCH 3/6] Fix formatting issues cargo fmt --- src/array.rs | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/src/array.rs b/src/array.rs index e8f242b..a31fa2c 100644 --- a/src/array.rs +++ b/src/array.rs @@ -218,10 +218,7 @@ mod tests { let value: u32 = 42; let mut buf = maligned::align_first::(4); buf.extend_from_slice(&value.to_ne_bytes()); - assert_eq!( - [value], - from_bytes::(&mut buf).unwrap() - ); + assert_eq!([value], from_bytes::(&mut buf).unwrap()); } #[test] @@ -229,10 +226,7 @@ mod tests { let value: u64 = u64::max_value(); let mut buf = maligned::align_first::(8); buf.extend_from_slice(&value.to_ne_bytes()); - assert_eq!( - [value], - from_bytes::(&mut buf).unwrap() - ); + assert_eq!([value], from_bytes::(&mut buf).unwrap()); } #[test] @@ -240,10 +234,7 @@ mod tests { let value: i32 = -42; let mut buf = maligned::align_first::(4); buf.extend_from_slice(&value.to_ne_bytes()); - assert_eq!( - [value], - from_bytes::(&mut buf).unwrap() - ); + assert_eq!([value], from_bytes::(&mut buf).unwrap()); } #[test] @@ -251,10 +242,7 @@ mod tests { let value: i64 = i64::min_value(); let mut buf = maligned::align_first::(8); buf.extend_from_slice(&value.to_ne_bytes()); - assert_eq!( - [value], - from_bytes::(&mut buf).unwrap() - ); + assert_eq!([value], from_bytes::(&mut buf).unwrap()); } #[test] @@ -262,10 +250,7 @@ mod tests { let value: f32 = f32::min_value(); let mut buf = maligned::align_first::(4); buf.extend_from_slice(&value.to_ne_bytes()); - assert_eq!( - [value], - from_bytes::(&mut buf).unwrap() - ); + assert_eq!([value], from_bytes::(&mut buf).unwrap()); } #[test] @@ -273,10 +258,7 @@ mod tests { let value: f64 = f64::max_value(); let mut buf = maligned::align_first::(8); buf.extend_from_slice(&value.to_ne_bytes()); - assert_eq!( - [value], - from_bytes::(&mut buf).unwrap() - ); + assert_eq!([value], from_bytes::(&mut buf).unwrap()); } fn assert_from_bytes_error(result: Result) { From 585c4a85672bb43e37ba73a80a4d1acdd62efe31 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Thu, 3 Aug 2023 09:35:28 +0100 Subject: [PATCH 4/6] Fix documentation warnings --- src/app.rs | 4 ++-- src/operation.rs | 2 +- src/types/missing.rs | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/app.rs b/src/app.rs index 5f74697..75860df 100644 --- a/src/app.rs +++ b/src/app.rs @@ -104,8 +104,8 @@ fn router() -> Router { /// Reductionist Server Service type alias /// -/// This type implements [tower_service::Service]. -// FIXME: The Service type should be some form of tower_service::Service, but couldn't find the +/// This type implements [tower::Service]. +// FIXME: The Service type should be some form of tower::Service, but couldn't find the // necessary trait bounds. pub type Service = tower_http::normalize_path::NormalizePath; diff --git a/src/operation.rs b/src/operation.rs index 869ac8e..c45c148 100644 --- a/src/operation.rs +++ b/src/operation.rs @@ -56,7 +56,7 @@ pub trait Operation { /// # Arguments /// /// * `request_data`: RequestData object for the request - /// * `data`: Vec containing data to operate on. + /// * `data`: [`Vec`] containing data to operate on. fn execute( request_data: &models::RequestData, data: Vec, diff --git a/src/types/missing.rs b/src/types/missing.rs index de18983..cd60b3c 100644 --- a/src/types/missing.rs +++ b/src/types/missing.rs @@ -30,7 +30,8 @@ pub enum Missing { } impl Missing { - /// Validate a Missing object for a given DType. + /// Validate a [`Missing`](crate::types::Missing) object for a given + /// [DType](crate::models::DType). pub fn validate(&self, dtype: DType) -> Result<(), ValidationError> { match dtype { DType::Int32 => Missing::::validate_dvalue(self), @@ -44,7 +45,8 @@ impl Missing { } impl Missing { - /// Validate a Missing for Missing where T is a supported primitive numeric type. + /// Validate a [`Missing`](crate::types::Missing) for Missing where T is a supported + /// primitive numeric type. fn validate_dvalue(missing: &Missing) -> Result<(), ValidationError> { // Perform a conversion to the primitive based type. let missing_primitive = Self::try_from(missing).map_err(|err| { From 1f28389e17c6c13a11c942bb82285bf8ccca8a44 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Thu, 3 Aug 2023 09:37:18 +0100 Subject: [PATCH 5/6] Add documentation build to pre-commit --- tools/pre-commit | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/pre-commit b/tools/pre-commit index 2a2899e..3e7e091 100755 --- a/tools/pre-commit +++ b/tools/pre-commit @@ -23,4 +23,10 @@ then exit 1 fi +if ! RUSTDOCFLAGS="-D warnings" cargo doc --no-deps +then + echo "There are some documentation issues." + exit 1 +fi + exit 0 From 6759667095c9a5e722560db89bf104f4585953c8 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Thu, 3 Aug 2023 09:40:51 +0100 Subject: [PATCH 6/6] CI: Add documentation build and lint jobs --- .github/workflows/pull-request.yml | 16 ++++++++++++++++ Makefile | 12 ++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 623a06f..8d6129f 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -14,6 +14,22 @@ jobs: - name: Build run: make build + docs: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Build documentation + run: make docs + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Lint + run: make lint test: runs-on: ubuntu-latest steps: diff --git a/Makefile b/Makefile index 6d16776..8da4d4b 100644 --- a/Makefile +++ b/Makefile @@ -2,12 +2,20 @@ build: @docker buildx build -t reductionist . +.PHONY: docs +docs: + @docker buildx build --build-arg PROFILE=dev --target builder -t reductionist-test . + @docker run --rm -e RUSTDOCFLAGS="-D warnings" reductionist-test cargo doc --no-deps + +.PHONY: lint +lint: + @docker buildx build --build-arg PROFILE=dev --target builder -t reductionist-test . + @docker run --rm reductionist-test cargo check --color always + .PHONY: test test: @docker buildx build --build-arg PROFILE=dev --target builder -t reductionist-test . - @docker run --rm reductionist-test cargo check --color always @docker run --rm reductionist-test cargo test --color always - @docker run --rm reductionist-test cargo bench --color always .PHONY: run run: