From 32740b0b191d24e744debfa1aef89979e35805d0 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Wed, 30 Oct 2024 15:59:54 -0700 Subject: [PATCH 01/48] wip expose ChunkResponse --- .../src/operation/download.rs | 6 ++--- .../src/operation/download/body.rs | 27 ++++++++++++------- .../src/operation/download/service.rs | 13 +++------ .../src/operation/download_objects/worker.rs | 2 +- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index b265e79..2de2f59 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -26,9 +26,9 @@ mod service; use crate::error; use crate::runtime::scheduler::OwnedWorkPermit; use aws_smithy_types::byte_stream::ByteStream; -use body::Body; +use body::{Body, ChunkResponse}; use discovery::discover_obj; -use service::{distribute_work, ChunkResponse}; +use service::distribute_work; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use tokio::sync::mpsc; @@ -147,7 +147,7 @@ fn handle_discovery_chunk( .await .map(|aggregated| ChunkResponse { seq, - data: Some(aggregated), + data: aggregated, }) .map_err(error::discovery_failed); diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index aa965dd..6e03419 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -2,7 +2,6 @@ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 */ -use crate::operation::download::service::ChunkResponse; use aws_smithy_types::byte_stream::AggregatedBytes; use std::cmp; use std::cmp::Ordering; @@ -21,6 +20,16 @@ pub struct Body { type BodyChannel = mpsc::Receiver>; +#[derive(Debug, Clone)] +/// TODO: DOcs +pub struct ChunkResponse { + // TODO(aws-sdk-rust#1159, design) - consider PartialOrd for ChunkResponse and hiding `seq` as internal only detail + // the seq number + pub(crate) seq: u64, + /// data: chunk data + pub data: AggregatedBytes, +} + impl Body { /// Create a new empty Body pub fn empty() -> Self { @@ -50,7 +59,7 @@ impl Body { /// Returns [None] when there is no more data. /// Chunks returned from a [Body] are guaranteed to be sequenced /// in the right order. - pub async fn next(&mut self) -> Option> { + pub async fn next(&mut self) -> Option> { // TODO(aws-sdk-rust#1159, design) - do we want ChunkResponse (or similar) rather than AggregatedBytes? Would // make additional retries of an individual chunk/part more feasible (though theoretically already exhausted retries) loop { @@ -67,15 +76,13 @@ impl Body { let chunk = self .sequencer - .pop() - .map(|r| Ok(r.data.expect("chunk data"))); - - if chunk.is_some() { - // if we actually pulled data out, advance the next sequence we expect + .pop(); + if let Some(chunk) = chunk { self.sequencer.advance(); + Some(Ok(chunk)) + } else { + None } - - chunk } /// Close the body, no more data will flow from it and all publishers will be notified. @@ -219,7 +226,7 @@ mod tests { let mut received = Vec::new(); while let Some(chunk) = body.next().await { let chunk = chunk.expect("chunk ok"); - let data = String::from_utf8(chunk.to_vec()).unwrap(); + let data = String::from_utf8(chunk.data.to_vec()).unwrap(); received.push(data); } diff --git a/aws-s3-transfer-manager/src/operation/download/service.rs b/aws-s3-transfer-manager/src/operation/download/service.rs index 2183b06..3495232 100644 --- a/aws-s3-transfer-manager/src/operation/download/service.rs +++ b/aws-s3-transfer-manager/src/operation/download/service.rs @@ -8,7 +8,7 @@ use crate::middleware::limit::concurrency::ConcurrencyLimitLayer; use crate::middleware::retry; use crate::operation::download::DownloadContext; use aws_smithy_types::body::SdkBody; -use aws_smithy_types::byte_stream::{AggregatedBytes, ByteStream}; +use aws_smithy_types::byte_stream::ByteStream; use std::cmp; use std::mem; use std::ops::RangeInclusive; @@ -16,6 +16,7 @@ use tokio::sync::mpsc; use tower::{service_fn, Service, ServiceBuilder, ServiceExt}; use tracing::Instrument; +use super::body::ChunkResponse; use super::{DownloadHandle, DownloadInput, DownloadInputBuilder}; /// Request/input type for our "chunk" service. @@ -85,7 +86,7 @@ async fn download_specific_chunk( Ok(ChunkResponse { seq, - data: Some(bytes), + data: bytes, }) } @@ -104,14 +105,6 @@ pub(super) fn chunk_service( .service(svc) } -#[derive(Debug, Clone)] -pub(crate) struct ChunkResponse { - // TODO(aws-sdk-rust#1159, design) - consider PartialOrd for ChunkResponse and hiding `seq` as internal only detail - // the seq number - pub(crate) seq: u64, - // chunk data - pub(crate) data: Option, -} /// Spawn tasks to download the remaining chunks of object data /// diff --git a/aws-s3-transfer-manager/src/operation/download_objects/worker.rs b/aws-s3-transfer-manager/src/operation/download_objects/worker.rs index eef9284..64d3400 100644 --- a/aws-s3-transfer-manager/src/operation/download_objects/worker.rs +++ b/aws-s3-transfer-manager/src/operation/download_objects/worker.rs @@ -152,7 +152,7 @@ async fn download_single_obj( while let Some(chunk) = body.next().await { let chunk = chunk?; - for segment in chunk.into_segments() { + for segment in chunk.data.into_segments() { dest.write_all(segment.as_ref()).await?; } } From 636588a946066e3f914f9be3aaa24322a16402b5 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Wed, 30 Oct 2024 16:03:03 -0700 Subject: [PATCH 02/48] expose metadata --- aws-s3-transfer-manager/src/operation/download.rs | 6 ++++++ aws-s3-transfer-manager/src/operation/download/body.rs | 4 ++++ aws-s3-transfer-manager/src/operation/download/service.rs | 1 + 3 files changed, 11 insertions(+) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 2de2f59..9315def 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -18,6 +18,7 @@ mod discovery; mod handle; pub use handle::DownloadHandle; +use object_meta::ObjectMetadata; use tracing::Instrument; mod object_meta; @@ -84,6 +85,8 @@ impl Download { let mut discovery = discover_obj(&ctx, &input).await?; let (comp_tx, comp_rx) = mpsc::channel(concurrency); + // TODO: waahm7 fix + let meta_data = discovery.meta.clone(); let initial_chunk = discovery.initial_chunk.take(); let mut handle = DownloadHandle { @@ -104,6 +107,7 @@ impl Download { &comp_tx, permit, parent_span_for_tasks.clone(), + meta_data, ); if !discovery.remaining.is_empty() { @@ -135,6 +139,7 @@ fn handle_discovery_chunk( completed: &mpsc::Sender>, permit: OwnedWorkPermit, parent_span_for_tasks: tracing::Span, + meta_data: ObjectMetadata, ) -> u64 { if let Some(stream) = initial_chunk { let seq = handle.ctx.next_seq(); @@ -148,6 +153,7 @@ fn handle_discovery_chunk( .map(|aggregated| ChunkResponse { seq, data: aggregated, + metadata: meta_data, }) .map_err(error::discovery_failed); diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index 6e03419..2cd1d64 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -8,6 +8,8 @@ use std::cmp::Ordering; use std::collections::BinaryHeap; use tokio::sync::mpsc; +use super::object_meta::ObjectMetadata; + /// Stream of binary data representing an Amazon S3 Object's contents. /// /// Wraps potentially multiple streams of binary data into a single coherent stream. @@ -28,6 +30,8 @@ pub struct ChunkResponse { pub(crate) seq: u64, /// data: chunk data pub data: AggregatedBytes, + /// metadata + pub metadata: ObjectMetadata, } impl Body { diff --git a/aws-s3-transfer-manager/src/operation/download/service.rs b/aws-s3-transfer-manager/src/operation/download/service.rs index 3495232..16c21aa 100644 --- a/aws-s3-transfer-manager/src/operation/download/service.rs +++ b/aws-s3-transfer-manager/src/operation/download/service.rs @@ -87,6 +87,7 @@ async fn download_specific_chunk( Ok(ChunkResponse { seq, data: bytes, + metadata: resp.into(), }) } From 5de8eaa756a66b930c007f17b8f2e29fe2507878 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Fri, 1 Nov 2024 10:21:15 -0700 Subject: [PATCH 03/48] wip ChunkResponse --- .../src/operation/download.rs | 6 ++--- .../src/operation/download/body.rs | 8 +++---- .../{object_meta.rs => chunk_meta.rs} | 22 +++++++++++++------ .../src/operation/download/discovery.rs | 8 +++---- .../src/operation/download/handle.rs | 6 ++--- .../src/operation/download/service.rs | 1 - 6 files changed, 28 insertions(+), 23 deletions(-) rename aws-s3-transfer-manager/src/operation/download/{object_meta.rs => chunk_meta.rs} (92%) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 9315def..ab9f9c2 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -6,6 +6,7 @@ mod input; use aws_sdk_s3::error::DisplayErrorContext; +use chunk_meta::ChunkMetadata; /// Request type for dowloading a single object from Amazon S3 pub use input::{DownloadInput, DownloadInputBuilder}; @@ -18,10 +19,9 @@ mod discovery; mod handle; pub use handle::DownloadHandle; -use object_meta::ObjectMetadata; use tracing::Instrument; -mod object_meta; +mod chunk_meta; mod service; use crate::error; @@ -139,7 +139,7 @@ fn handle_discovery_chunk( completed: &mpsc::Sender>, permit: OwnedWorkPermit, parent_span_for_tasks: tracing::Span, - meta_data: ObjectMetadata, + meta_data: ChunkMetadata, ) -> u64 { if let Some(stream) = initial_chunk { let seq = handle.ctx.next_seq(); diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index 2cd1d64..119c012 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -8,7 +8,7 @@ use std::cmp::Ordering; use std::collections::BinaryHeap; use tokio::sync::mpsc; -use super::object_meta::ObjectMetadata; +use super::chunk_meta::ChunkMetadata; /// Stream of binary data representing an Amazon S3 Object's contents. /// @@ -31,7 +31,7 @@ pub struct ChunkResponse { /// data: chunk data pub data: AggregatedBytes, /// metadata - pub metadata: ObjectMetadata, + pub metadata: ChunkMetadata, } impl Body { @@ -78,9 +78,7 @@ impl Body { } } - let chunk = self - .sequencer - .pop(); + let chunk = self.sequencer.pop(); if let Some(chunk) = chunk { self.sequencer.advance(); Some(Ok(chunk)) diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs similarity index 92% rename from aws-s3-transfer-manager/src/operation/download/object_meta.rs rename to aws-s3-transfer-manager/src/operation/download/chunk_meta.rs index be70541..8f9527d 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs @@ -7,6 +7,8 @@ use std::str::FromStr; use aws_sdk_s3::operation::get_object::GetObjectOutput; use aws_sdk_s3::operation::head_object::HeadObjectOutput; +use aws_sdk_s3::operation::RequestId; +use aws_sdk_s3::operation::RequestIdExt; use crate::http::header; @@ -15,7 +17,9 @@ use crate::http::header; /// Object metadata other than the body that can be set from either `GetObject` or `HeadObject` #[derive(Debug, Clone, Default)] -pub struct ObjectMetadata { +pub struct ChunkMetadata { + pub requet_id: Option, + pub extended_request_id: Option, pub delete_marker: Option, pub accept_ranges: Option, pub expiration: Option, @@ -54,7 +58,7 @@ pub struct ObjectMetadata { pub object_lock_legal_hold_status: Option, } -impl ObjectMetadata { +impl ChunkMetadata { /// The total object size pub fn total_size(&self) -> u64 { match (self.content_length, self.content_range.as_ref()) { @@ -95,9 +99,11 @@ impl ObjectMetadata { } } -impl From for ObjectMetadata { +impl From for ChunkMetadata { fn from(value: GetObjectOutput) -> Self { Self { + requet_id: value.request_id().map(|s| s.to_string()), + extended_request_id: value.extended_request_id().map(|s| s.to_string()), delete_marker: value.delete_marker, accept_ranges: value.accept_ranges, expiration: value.expiration, @@ -139,9 +145,11 @@ impl From for ObjectMetadata { } } -impl From for ObjectMetadata { +impl From for ChunkMetadata { fn from(value: HeadObjectOutput) -> Self { Self { + requet_id: value.request_id().map(|s| s.to_string()), + extended_request_id: value.extended_request_id().map(|s| s.to_string()), delete_marker: value.delete_marker, accept_ranges: value.accept_ranges, expiration: value.expiration, @@ -185,11 +193,11 @@ impl From for ObjectMetadata { #[cfg(test)] mod tests { - use super::ObjectMetadata; + use super::ChunkMetadata; #[test] fn test_inferred_content_length() { - let meta = ObjectMetadata { + let meta = ChunkMetadata { content_length: Some(4), content_range: Some("should ignore".to_owned()), ..Default::default() @@ -197,7 +205,7 @@ mod tests { assert_eq!(4, meta.content_length()); - let meta = ObjectMetadata { + let meta = ChunkMetadata { content_length: None, content_range: Some("bytes 0-499/900".to_owned()), ..Default::default() diff --git a/aws-s3-transfer-manager/src/operation/download/discovery.rs b/aws-s3-transfer-manager/src/operation/download/discovery.rs index cf2abe7..89f98ea 100644 --- a/aws-s3-transfer-manager/src/operation/download/discovery.rs +++ b/aws-s3-transfer-manager/src/operation/download/discovery.rs @@ -12,7 +12,7 @@ use aws_smithy_types::body::SdkBody; use aws_smithy_types::byte_stream::ByteStream; use tracing::Instrument; -use super::object_meta::ObjectMetadata; +use super::chunk_meta::ChunkMetadata; use super::DownloadContext; use super::DownloadInput; use crate::error; @@ -35,7 +35,7 @@ pub(super) struct ObjectDiscovery { pub(super) remaining: RangeInclusive, /// the discovered metadata - pub(super) meta: ObjectMetadata, + pub(super) meta: ChunkMetadata, /// the first chunk of data if fetched during discovery pub(super) initial_chunk: Option, @@ -111,7 +111,7 @@ async fn discover_obj_with_head( input: &DownloadInput, byte_range: Option, ) -> Result { - let meta: ObjectMetadata = ctx + let meta: ChunkMetadata = ctx .client() .head_object() .set_bucket(input.bucket().map(str::to_string)) @@ -154,7 +154,7 @@ async fn discover_obj_with_get( let empty_stream = ByteStream::new(SdkBody::empty()); let body = mem::replace(&mut resp.body, empty_stream); - let meta: ObjectMetadata = resp.into(); + let meta: ChunkMetadata = resp.into(); let content_len = meta.content_length(); let remaining = match range { diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index f659742..12d8241 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ use crate::operation::download::body::Body; -use crate::operation::download::object_meta::ObjectMetadata; +use crate::operation::download::chunk_meta::ChunkMetadata; use tokio::task; use super::DownloadContext; @@ -13,7 +13,7 @@ use super::DownloadContext; #[non_exhaustive] pub struct DownloadHandle { /// Object metadata - pub object_meta: ObjectMetadata, + pub object_meta: ChunkMetadata, /// The object content pub(crate) body: Body, @@ -27,7 +27,7 @@ pub struct DownloadHandle { impl DownloadHandle { /// Object metadata - pub fn object_meta(&self) -> &ObjectMetadata { + pub fn object_meta(&self) -> &ChunkMetadata { &self.object_meta } diff --git a/aws-s3-transfer-manager/src/operation/download/service.rs b/aws-s3-transfer-manager/src/operation/download/service.rs index 16c21aa..fbc13ea 100644 --- a/aws-s3-transfer-manager/src/operation/download/service.rs +++ b/aws-s3-transfer-manager/src/operation/download/service.rs @@ -106,7 +106,6 @@ pub(super) fn chunk_service( .service(svc) } - /// Spawn tasks to download the remaining chunks of object data /// /// # Arguments From 0301f54f062f34e89fd4b9fa5aaa845d9c070b5d Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 5 Nov 2024 15:51:14 -0800 Subject: [PATCH 04/48] renames --- aws-s3-transfer-manager/examples/cp.rs | 2 +- aws-s3-transfer-manager/src/operation/download.rs | 6 +++--- .../src/operation/download/handle.rs | 8 ++++---- .../src/operation/download/{body.rs => output.rs} | 10 +++++----- .../src/operation/download/service.rs | 2 +- .../src/operation/download_objects/worker.rs | 4 ++-- 6 files changed, 16 insertions(+), 16 deletions(-) rename aws-s3-transfer-manager/src/operation/download/{body.rs => output.rs} (97%) diff --git a/aws-s3-transfer-manager/examples/cp.rs b/aws-s3-transfer-manager/examples/cp.rs index 4279f5e..56e9449 100644 --- a/aws-s3-transfer-manager/examples/cp.rs +++ b/aws-s3-transfer-manager/examples/cp.rs @@ -10,7 +10,7 @@ use std::time; use aws_s3_transfer_manager::io::InputStream; use aws_s3_transfer_manager::metrics::unit::ByteUnit; use aws_s3_transfer_manager::metrics::Throughput; -use aws_s3_transfer_manager::operation::download::body::Body; +use aws_s3_transfer_manager::operation::download::output::Body; use aws_s3_transfer_manager::types::{ConcurrencySetting, PartSize}; use aws_sdk_s3::error::DisplayErrorContext; use bytes::Buf; diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index ab9f9c2..721c1f2 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -11,7 +11,7 @@ use chunk_meta::ChunkMetadata; pub use input::{DownloadInput, DownloadInputBuilder}; /// Abstractions for response bodies and consuming data streams. -pub mod body; +pub mod output; /// Operation builders pub mod builders; @@ -27,7 +27,7 @@ mod service; use crate::error; use crate::runtime::scheduler::OwnedWorkPermit; use aws_smithy_types::byte_stream::ByteStream; -use body::{Body, ChunkResponse}; +use output::{DownloadOutput, ChunkResponse}; use discovery::discover_obj; use service::distribute_work; use std::sync::atomic::{AtomicU64, Ordering}; @@ -93,7 +93,7 @@ impl Download { // FIXME(aws-sdk-rust#1159) - initial object discovery for a range/first-part will not // have the correct metadata w.r.t. content-length and maybe others for the whole object. object_meta: discovery.meta, - body: Body::new(comp_rx), + body: DownloadOutput::new(comp_rx), // spawn all work into the same JoinSet such that when the set is dropped all tasks are cancelled. tasks: JoinSet::new(), ctx, diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index 12d8241..3db12c2 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -2,7 +2,7 @@ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 */ -use crate::operation::download::body::Body; +use crate::operation::download::output::DownloadOutput; use crate::operation::download::chunk_meta::ChunkMetadata; use tokio::task; @@ -16,7 +16,7 @@ pub struct DownloadHandle { pub object_meta: ChunkMetadata, /// The object content - pub(crate) body: Body, + pub(crate) body: DownloadOutput, /// All child tasks spawned for this download pub(crate) tasks: task::JoinSet<()>, @@ -32,12 +32,12 @@ impl DownloadHandle { } /// Object content - pub fn body(&self) -> &Body { + pub fn body(&self) -> &DownloadOutput { &self.body } /// Mutable reference to the body - pub fn body_mut(&mut self) -> &mut Body { + pub fn body_mut(&mut self) -> &mut DownloadOutput { &mut self.body } diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/output.rs similarity index 97% rename from aws-s3-transfer-manager/src/operation/download/body.rs rename to aws-s3-transfer-manager/src/operation/download/output.rs index 119c012..382275a 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/output.rs @@ -15,7 +15,7 @@ use super::chunk_meta::ChunkMetadata; /// Wraps potentially multiple streams of binary data into a single coherent stream. /// The data on this stream is sequenced into the correct order. #[derive(Debug)] -pub struct Body { +pub struct DownloadOutput { inner: UnorderedBody, sequencer: Sequencer, } @@ -34,7 +34,7 @@ pub struct ChunkResponse { pub metadata: ChunkMetadata, } -impl Body { +impl DownloadOutput { /// Create a new empty Body pub fn empty() -> Self { Self::new_from_channel(None) @@ -195,7 +195,7 @@ mod tests { use bytes::Bytes; use tokio::sync::mpsc; - use super::{Body, Sequencer}; + use super::{DownloadOutput, Sequencer}; fn chunk_resp(seq: u64, data: Option) -> ChunkResponse { ChunkResponse { seq, data } @@ -214,7 +214,7 @@ mod tests { #[tokio::test] async fn test_body_next() { let (tx, rx) = mpsc::channel(2); - let mut body = Body::new(rx); + let mut body = DownloadOutput::new(rx); tokio::spawn(async move { let seq = vec![2, 0, 1]; for i in seq { @@ -239,7 +239,7 @@ mod tests { #[tokio::test] async fn test_body_next_error() { let (tx, rx) = mpsc::channel(2); - let mut body = Body::new(rx); + let mut body = DownloadOutput::new(rx); tokio::spawn(async move { let data = Bytes::from("chunk 0".to_string()); let aggregated = ByteStream::from(data).collect().await.unwrap(); diff --git a/aws-s3-transfer-manager/src/operation/download/service.rs b/aws-s3-transfer-manager/src/operation/download/service.rs index fbc13ea..05ca3bb 100644 --- a/aws-s3-transfer-manager/src/operation/download/service.rs +++ b/aws-s3-transfer-manager/src/operation/download/service.rs @@ -16,7 +16,7 @@ use tokio::sync::mpsc; use tower::{service_fn, Service, ServiceBuilder, ServiceExt}; use tracing::Instrument; -use super::body::ChunkResponse; +use super::output::ChunkResponse; use super::{DownloadHandle, DownloadInput, DownloadInputBuilder}; /// Request/input type for our "chunk" service. diff --git a/aws-s3-transfer-manager/src/operation/download_objects/worker.rs b/aws-s3-transfer-manager/src/operation/download_objects/worker.rs index 64d3400..b881585 100644 --- a/aws-s3-transfer-manager/src/operation/download_objects/worker.rs +++ b/aws-s3-transfer-manager/src/operation/download_objects/worker.rs @@ -12,7 +12,7 @@ use tokio::fs; use tokio::io::AsyncWriteExt; use crate::error; -use crate::operation::download::body::Body; +use crate::operation::download::output::DownloadOutput; use crate::operation::download::{DownloadInput, DownloadInputBuilder}; use crate::types::{DownloadFilter, FailedDownloadTransfer, FailedTransferPolicy}; @@ -144,7 +144,7 @@ async fn download_single_obj( let key_path = local_key_path(root_dir, key.as_str(), prefix, delim)?; let mut handle = crate::operation::download::Download::orchestrate(ctx.handle.clone(), input, true).await?; - let mut body = mem::replace(&mut handle.body, Body::empty()); + let mut body = mem::replace(&mut handle.body, DownloadOutput::empty()); let parent_dir = key_path.parent().expect("valid parent dir for key"); fs::create_dir_all(parent_dir).await?; From 4ee9731c0061a7201c6a46755d93a74b186e4980 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 5 Nov 2024 16:24:16 -0800 Subject: [PATCH 05/48] wip --- .../src/operation/download/output.rs | 29 +++++++------------ .../src/operation/download/service.rs | 2 +- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download/output.rs b/aws-s3-transfer-manager/src/operation/download/output.rs index 382275a..95b6bce 100644 --- a/aws-s3-transfer-manager/src/operation/download/output.rs +++ b/aws-s3-transfer-manager/src/operation/download/output.rs @@ -16,7 +16,7 @@ use super::chunk_meta::ChunkMetadata; /// The data on this stream is sequenced into the correct order. #[derive(Debug)] pub struct DownloadOutput { - inner: UnorderedBody, + inner: UnorderedOutput, sequencer: Sequencer, } @@ -29,13 +29,13 @@ pub struct ChunkResponse { // the seq number pub(crate) seq: u64, /// data: chunk data - pub data: AggregatedBytes, + pub data: Option, /// metadata pub metadata: ChunkMetadata, } impl DownloadOutput { - /// Create a new empty Body + /// Create a new empty output pub fn empty() -> Self { Self::new_from_channel(None) } @@ -46,22 +46,15 @@ impl DownloadOutput { fn new_from_channel(chunks: Option) -> Self { Self { - inner: UnorderedBody::new(chunks), + inner: UnorderedOutput::new(chunks), sequencer: Sequencer::new(), } } - /// Convert this body into an unordered stream of chunks. - // TODO(aws-sdk-rust#1159) - revisit if we actually need/use unordered data stream - #[allow(dead_code)] - pub(crate) fn unordered(self) -> UnorderedBody { - self.inner - } - /// Pull the next chunk of data off the stream. /// /// Returns [None] when there is no more data. - /// Chunks returned from a [Body] are guaranteed to be sequenced + /// Chunks returned from a [DownloadOutput] are guaranteed to be sequenced /// in the right order. pub async fn next(&mut self) -> Option> { // TODO(aws-sdk-rust#1159, design) - do we want ChunkResponse (or similar) rather than AggregatedBytes? Would @@ -158,11 +151,11 @@ impl PartialEq for SequencedChunk { /// A body that returns chunks in whatever order they are received. #[derive(Debug)] -pub(crate) struct UnorderedBody { +pub(crate) struct UnorderedOutput { chunks: Option>>, } -impl UnorderedBody { +impl UnorderedOutput { fn new(chunks: Option) -> Self { Self { chunks } } @@ -170,7 +163,7 @@ impl UnorderedBody { /// Pull the next chunk of data off the stream. /// /// Returns [None] when there is no more data. - /// Chunks returned from an [UnorderedBody] are not guaranteed to be sorted + /// Chunks returned from an [UnorderedOutput] are not guaranteed to be sorted /// in the right order. Consumers are expected to sort the data themselves /// using the chunk sequence number (starting from zero). pub(crate) async fn next(&mut self) -> Option> { @@ -190,7 +183,7 @@ impl UnorderedBody { #[cfg(test)] mod tests { - use crate::{error, operation::download::service::ChunkResponse}; + use crate::{error, operation::download::output::ChunkResponse}; use aws_smithy_types::byte_stream::{AggregatedBytes, ByteStream}; use bytes::Bytes; use tokio::sync::mpsc; @@ -198,7 +191,7 @@ mod tests { use super::{DownloadOutput, Sequencer}; fn chunk_resp(seq: u64, data: Option) -> ChunkResponse { - ChunkResponse { seq, data } + ChunkResponse { seq, data, metadata: Default::default() } } #[test] @@ -228,7 +221,7 @@ mod tests { let mut received = Vec::new(); while let Some(chunk) = body.next().await { let chunk = chunk.expect("chunk ok"); - let data = String::from_utf8(chunk.data.to_vec()).unwrap(); + let data = String::from_utf8(chunk.data.unwrap().to_vec()).unwrap(); received.push(data); } diff --git a/aws-s3-transfer-manager/src/operation/download/service.rs b/aws-s3-transfer-manager/src/operation/download/service.rs index 05ca3bb..dd39e8e 100644 --- a/aws-s3-transfer-manager/src/operation/download/service.rs +++ b/aws-s3-transfer-manager/src/operation/download/service.rs @@ -86,7 +86,7 @@ async fn download_specific_chunk( Ok(ChunkResponse { seq, - data: bytes, + data: Some(bytes), metadata: resp.into(), }) } From f606607e0520756281979f7b2d3c49f2a9372b0e Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Wed, 6 Nov 2024 09:33:11 -0800 Subject: [PATCH 06/48] wip refactor --- aws-s3-transfer-manager/Cargo.toml | 1 + aws-s3-transfer-manager/examples/cp.rs | 6 +- .../src/operation/download.rs | 10 +- .../src/operation/download/handle.rs | 2 +- .../src/operation/download/output.rs | 104 +++++++++++++++--- .../src/operation/download/service.rs | 10 +- .../tests/download_test.rs | 2 +- 7 files changed, 105 insertions(+), 30 deletions(-) diff --git a/aws-s3-transfer-manager/Cargo.toml b/aws-s3-transfer-manager/Cargo.toml index ccea7a5..5eb2d4b 100644 --- a/aws-s3-transfer-manager/Cargo.toml +++ b/aws-s3-transfer-manager/Cargo.toml @@ -19,6 +19,7 @@ aws-smithy-runtime-api = "1.7.1" aws-smithy-types = "1.2.6" aws-types = "1.3.3" bytes = "1" +bytes-utils = "0.1.4" futures-util = "0.3.30" path-clean = "1.0.1" pin-project-lite = "0.2.14" diff --git a/aws-s3-transfer-manager/examples/cp.rs b/aws-s3-transfer-manager/examples/cp.rs index 56e9449..39f8692 100644 --- a/aws-s3-transfer-manager/examples/cp.rs +++ b/aws-s3-transfer-manager/examples/cp.rs @@ -10,7 +10,7 @@ use std::time; use aws_s3_transfer_manager::io::InputStream; use aws_s3_transfer_manager::metrics::unit::ByteUnit; use aws_s3_transfer_manager::metrics::Throughput; -use aws_s3_transfer_manager::operation::download::output::Body; +use aws_s3_transfer_manager::operation::download::output::DownloadOutput; use aws_s3_transfer_manager::types::{ConcurrencySetting, PartSize}; use aws_sdk_s3::error::DisplayErrorContext; use bytes::Buf; @@ -263,9 +263,9 @@ async fn main() -> Result<(), BoxError> { Ok(()) } -async fn write_body(body: &mut Body, mut dest: fs::File) -> Result<(), BoxError> { +async fn write_body(body: &mut DownloadOutput, mut dest: fs::File) -> Result<(), BoxError> { while let Some(chunk) = body.next().await { - let chunk = chunk.unwrap(); + let chunk = chunk.unwrap().data; tracing::trace!("recv'd chunk remaining={}", chunk.remaining()); let mut segment_cnt = 1; for segment in chunk.into_segments() { diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 721c1f2..3caace0 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -10,10 +10,10 @@ use chunk_meta::ChunkMetadata; /// Request type for dowloading a single object from Amazon S3 pub use input::{DownloadInput, DownloadInputBuilder}; -/// Abstractions for response bodies and consuming data streams. -pub mod output; /// Operation builders pub mod builders; +/// Abstractions for response bodies and consuming data streams. +pub mod output; mod discovery; @@ -27,8 +27,8 @@ mod service; use crate::error; use crate::runtime::scheduler::OwnedWorkPermit; use aws_smithy_types::byte_stream::ByteStream; -use output::{DownloadOutput, ChunkResponse}; use discovery::discover_obj; +use output::{AggregatedBytes, ChunkResponse, DownloadOutput}; use service::distribute_work; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; @@ -147,8 +147,8 @@ fn handle_discovery_chunk( // spawn a task to actually read the discovery chunk without waiting for it so we // can get started sooner on any remaining work (if any) handle.tasks.spawn(async move { - let chunk = stream - .collect() + + let chunk = AggregatedBytes::from_byte_stream(stream) .await .map(|aggregated| ChunkResponse { seq, diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index 3db12c2..c356477 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -2,8 +2,8 @@ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 */ -use crate::operation::download::output::DownloadOutput; use crate::operation::download::chunk_meta::ChunkMetadata; +use crate::operation::download::output::DownloadOutput; use tokio::task; use super::DownloadContext; diff --git a/aws-s3-transfer-manager/src/operation/download/output.rs b/aws-s3-transfer-manager/src/operation/download/output.rs index 95b6bce..3fcb0c1 100644 --- a/aws-s3-transfer-manager/src/operation/download/output.rs +++ b/aws-s3-transfer-manager/src/operation/download/output.rs @@ -2,14 +2,84 @@ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 */ -use aws_smithy_types::byte_stream::AggregatedBytes; +use aws_smithy_types::byte_stream::ByteStream; +use bytes::{Buf, Bytes}; +use bytes_utils::SegmentedBuf; use std::cmp; use std::cmp::Ordering; use std::collections::BinaryHeap; +use std::io::IoSlice; use tokio::sync::mpsc; +use crate::error::ErrorKind; + use super::chunk_meta::ChunkMetadata; +#[derive(Debug, Clone)] +/// TODO: waahm7 doc +pub struct AggregatedBytes(SegmentedBuf); + +impl AggregatedBytes { + /// Convert this buffer into [`Bytes`]. + /// + /// # Why does this consume `self`? + /// Technically, [`copy_to_bytes`](bytes::Buf::copy_to_bytes) can be called without ownership of self. However, since this + /// mutates the underlying buffer such that no data is remaining, it is more misuse resistant to + /// prevent the caller from attempting to reread the buffer. + /// + /// If the caller only holds a mutable reference, they may use [`copy_to_bytes`](bytes::Buf::copy_to_bytes) + /// directly on `AggregatedBytes`. + pub fn into_bytes(mut self) -> Bytes { + self.0.copy_to_bytes(self.0.remaining()) + } + + /// Convert this buffer into an [`Iterator`] of underlying non-contiguous segments of [`Bytes`] + pub fn into_segments(self) -> impl Iterator { + self.0.into_inner().into_iter() + } + + /// Convert this buffer into a `Vec` + pub fn to_vec(self) -> Vec { + self.0.into_inner().into_iter().flatten().collect() + } + + /// TODO: waahm7 doc + pub async fn from_byte_stream(value: ByteStream) -> Result { + let mut value = value; + let mut output = SegmentedBuf::new(); + while let Some(buf) = value.next().await { + match buf { + Ok(buf) => output.push(buf), + Err(err) => return Err(crate::error::from_kind(ErrorKind::IOError)(err)), + }; + } + Ok(AggregatedBytes(output)) + } +} + +impl Buf for AggregatedBytes { + // Forward all methods that SegmentedBuf has custom implementations of. + fn remaining(&self) -> usize { + self.0.remaining() + } + + fn chunk(&self) -> &[u8] { + self.0.chunk() + } + + fn chunks_vectored<'a>(&'a self, dst: &mut [IoSlice<'a>]) -> usize { + self.0.chunks_vectored(dst) + } + + fn advance(&mut self, cnt: usize) { + self.0.advance(cnt) + } + + fn copy_to_bytes(&mut self, len: usize) -> Bytes { + self.0.copy_to_bytes(len) + } +} + /// Stream of binary data representing an Amazon S3 Object's contents. /// /// Wraps potentially multiple streams of binary data into a single coherent stream. @@ -29,7 +99,7 @@ pub struct ChunkResponse { // the seq number pub(crate) seq: u64, /// data: chunk data - pub data: Option, + pub data: AggregatedBytes, /// metadata pub metadata: ChunkMetadata, } @@ -184,23 +254,27 @@ impl UnorderedOutput { #[cfg(test)] mod tests { use crate::{error, operation::download::output::ChunkResponse}; - use aws_smithy_types::byte_stream::{AggregatedBytes, ByteStream}; use bytes::Bytes; + use bytes_utils::SegmentedBuf; use tokio::sync::mpsc; - use super::{DownloadOutput, Sequencer}; + use super::{AggregatedBytes, DownloadOutput, Sequencer}; - fn chunk_resp(seq: u64, data: Option) -> ChunkResponse { - ChunkResponse { seq, data, metadata: Default::default() } + fn chunk_resp(seq: u64, data: AggregatedBytes) -> ChunkResponse { + ChunkResponse { + seq, + data, + metadata: Default::default(), + } } #[test] fn test_sequencer() { let mut sequencer = Sequencer::new(); - sequencer.push(chunk_resp(1, None)); - sequencer.push(chunk_resp(2, None)); + sequencer.push(chunk_resp(1, AggregatedBytes(SegmentedBuf::new()))); + sequencer.push(chunk_resp(2, AggregatedBytes(SegmentedBuf::new()))); assert_eq!(sequencer.peek().unwrap().seq, 1); - sequencer.push(chunk_resp(0, None)); + sequencer.push(chunk_resp(0, AggregatedBytes(SegmentedBuf::new()))); assert_eq!(sequencer.pop().unwrap().seq, 0); } @@ -212,8 +286,9 @@ mod tests { let seq = vec![2, 0, 1]; for i in seq { let data = Bytes::from(format!("chunk {i}")); - let aggregated = ByteStream::from(data).collect().await.unwrap(); - let chunk = chunk_resp(i as u64, Some(aggregated)); + let mut aggregated = SegmentedBuf::new(); + aggregated.push(data); + let chunk = chunk_resp(i as u64, AggregatedBytes(aggregated)); tx.send(Ok(chunk)).await.unwrap(); } }); @@ -221,7 +296,7 @@ mod tests { let mut received = Vec::new(); while let Some(chunk) = body.next().await { let chunk = chunk.expect("chunk ok"); - let data = String::from_utf8(chunk.data.unwrap().to_vec()).unwrap(); + let data = String::from_utf8(chunk.data.to_vec()).unwrap(); received.push(data); } @@ -235,8 +310,9 @@ mod tests { let mut body = DownloadOutput::new(rx); tokio::spawn(async move { let data = Bytes::from("chunk 0".to_string()); - let aggregated = ByteStream::from(data).collect().await.unwrap(); - let chunk = chunk_resp(0, Some(aggregated)); + let mut aggregated = SegmentedBuf::new(); + aggregated.push(data); + let chunk = chunk_resp(0, AggregatedBytes(aggregated)); tx.send(Ok(chunk)).await.unwrap(); let err = error::Error::new(error::ErrorKind::InputInvalid, "test errors".to_string()); tx.send(Err(err)).await.unwrap(); diff --git a/aws-s3-transfer-manager/src/operation/download/service.rs b/aws-s3-transfer-manager/src/operation/download/service.rs index dd39e8e..ef5c7d2 100644 --- a/aws-s3-transfer-manager/src/operation/download/service.rs +++ b/aws-s3-transfer-manager/src/operation/download/service.rs @@ -16,6 +16,7 @@ use tokio::sync::mpsc; use tower::{service_fn, Service, ServiceBuilder, ServiceExt}; use tracing::Instrument; +use super::output::AggregatedBytes; use super::output::ChunkResponse; use super::{DownloadHandle, DownloadInput, DownloadInputBuilder}; @@ -74,19 +75,16 @@ async fn download_specific_chunk( .map_err(error::from_kind(error::ErrorKind::ChunkFailed))?; let body = mem::replace(&mut resp.body, ByteStream::new(SdkBody::taken())); - - let bytes = body - .collect() + let body = AggregatedBytes::from_byte_stream(body) .instrument(tracing::debug_span!( "collect-body-from-download-chunk", seq )) - .await - .map_err(error::from_kind(error::ErrorKind::ChunkFailed))?; + .await?; Ok(ChunkResponse { seq, - data: Some(bytes), + data: body, metadata: resp.into(), }) } diff --git a/aws-s3-transfer-manager/tests/download_test.rs b/aws-s3-transfer-manager/tests/download_test.rs index 9e4790b..c4c976d 100644 --- a/aws-s3-transfer-manager/tests/download_test.rs +++ b/aws-s3-transfer-manager/tests/download_test.rs @@ -49,7 +49,7 @@ async fn drain(handle: &mut DownloadHandle) -> Result { let body = handle.body_mut(); let mut data = BytesMut::new(); while let Some(chunk) = body.next().await { - let chunk = chunk?.into_bytes(); + let chunk = chunk?.data.into_bytes(); data.put(chunk); } From c2ab91a572e2f2e6bd66efc9f3ea1f2d21937b83 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Thu, 7 Nov 2024 13:54:18 -0800 Subject: [PATCH 07/48] sucks but works meta_data --- aws-s3-transfer-manager/examples/cp.rs | 2 +- .../src/operation/download.rs | 72 +++-- .../src/operation/download/chunk_meta.rs | 11 +- .../src/operation/download/handle.rs | 39 ++- .../src/operation/download/object_meta.rs | 266 ++++++++++++++++++ .../src/operation/download/service.rs | 17 +- 6 files changed, 357 insertions(+), 50 deletions(-) create mode 100644 aws-s3-transfer-manager/src/operation/download/object_meta.rs diff --git a/aws-s3-transfer-manager/examples/cp.rs b/aws-s3-transfer-manager/examples/cp.rs index 39f8692..211b800 100644 --- a/aws-s3-transfer-manager/examples/cp.rs +++ b/aws-s3-transfer-manager/examples/cp.rs @@ -178,7 +178,7 @@ async fn do_download(args: Args) -> Result<(), BoxError> { .await?; let elapsed = start.elapsed(); - let obj_size_bytes = handle.object_meta.total_size(); + let obj_size_bytes = 0; // TODO: Fix handle.object_meta.total_size(); let throughput = Throughput::new(obj_size_bytes, elapsed); println!( diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 3caace0..93ae0dd 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -19,8 +19,10 @@ mod discovery; mod handle; pub use handle::DownloadHandle; +use object_meta::ObjectMetadata; use tracing::Instrument; +mod object_meta; mod chunk_meta; mod service; @@ -32,8 +34,8 @@ use output::{AggregatedBytes, ChunkResponse, DownloadOutput}; use service::distribute_work; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; -use tokio::sync::mpsc; -use tokio::task::JoinSet; +use tokio::sync::{mpsc, oneshot, Mutex, OnceCell}; +use tokio::task::{self, JoinSet}; use super::TransferContext; @@ -63,8 +65,34 @@ impl Download { todo!("single part download not implemented") } - let concurrency = handle.num_workers(); let ctx = DownloadContext::new(handle); + let concurrency = ctx.handle.num_workers(); + let (comp_tx, comp_rx) = mpsc::channel(concurrency); + let (meta_tx, meta_rx) = oneshot::channel(); + + let tasks = Arc::new(Mutex::new(JoinSet::new())); + let discovery = tokio::spawn(send_discovery(tasks.clone(), ctx.clone(), comp_tx, meta_tx, input, use_current_span_as_parent_for_tasks)); + + + Ok(DownloadHandle { + body: DownloadOutput::new(comp_rx), + tasks, + discovery, + object_meta_receiver: Some(meta_rx), + object_meta: OnceCell::new(), + }) + } +} + +async fn send_discovery( + tasks: Arc>>, + ctx: DownloadContext, + comp_tx: mpsc::Sender>, + meta_tx: oneshot::Sender, + input: DownloadInput, + use_current_span_as_parent_for_tasks: bool, + ) -> Result<(), crate::error::Error> { + let mut tasks = tasks.lock().await; // create span to serve as parent of spawned child tasks. let parent_span_for_tasks = tracing::debug_span!( @@ -78,31 +106,22 @@ impl Download { parent_span_for_tasks.follows_from(tracing::Span::current()); } - // acquire a permit for discovery + // acquire a permit for discovery let permit = ctx.handle.scheduler.acquire_permit().await?; // make initial discovery about the object size, metadata, possibly first chunk let mut discovery = discover_obj(&ctx, &input).await?; - let (comp_tx, comp_rx) = mpsc::channel(concurrency); - // TODO: waahm7 fix let meta_data = discovery.meta.clone(); - let initial_chunk = discovery.initial_chunk.take(); + let _ = meta_tx.send(meta_data.clone().into()); - let mut handle = DownloadHandle { - // FIXME(aws-sdk-rust#1159) - initial object discovery for a range/first-part will not - // have the correct metadata w.r.t. content-length and maybe others for the whole object. - object_meta: discovery.meta, - body: DownloadOutput::new(comp_rx), - // spawn all work into the same JoinSet such that when the set is dropped all tasks are cancelled. - tasks: JoinSet::new(), - ctx, - }; + let initial_chunk = discovery.initial_chunk.take(); // spawn a task (if necessary) to handle the discovery chunk. This returns immediately so // that we can begin concurrently downloading any reamining chunks/parts ASAP let start_seq = handle_discovery_chunk( - &mut handle, + &mut tasks, + ctx.clone(), initial_chunk, &comp_tx, permit, @@ -113,17 +132,16 @@ impl Download { if !discovery.remaining.is_empty() { let remaining = discovery.remaining.clone(); distribute_work( - &mut handle, + &mut tasks, + ctx.clone(), remaining, input, start_seq, comp_tx, parent_span_for_tasks, - ) + ); } - - Ok(handle) - } + Ok(()) } /// Handle possibly sending the first chunk of data received through discovery. Returns @@ -134,7 +152,9 @@ impl Download { /// This allows remaining work to start immediately (and concurrently) without /// waiting for the first chunk. fn handle_discovery_chunk( - handle: &mut DownloadHandle, + //handle: &mut DownloadHandle, + tasks: &mut task::JoinSet<()>, + ctx: DownloadContext, initial_chunk: Option, completed: &mpsc::Sender>, permit: OwnedWorkPermit, @@ -142,11 +162,11 @@ fn handle_discovery_chunk( meta_data: ChunkMetadata, ) -> u64 { if let Some(stream) = initial_chunk { - let seq = handle.ctx.next_seq(); + let seq = ctx.next_seq(); let completed = completed.clone(); // spawn a task to actually read the discovery chunk without waiting for it so we // can get started sooner on any remaining work (if any) - handle.tasks.spawn(async move { + tasks.spawn(async move { let chunk = AggregatedBytes::from_byte_stream(stream) .await @@ -168,7 +188,7 @@ fn handle_discovery_chunk( } }.instrument(tracing::debug_span!(parent: parent_span_for_tasks.clone(), "collect-body-from-discovery", seq))); } - handle.ctx.current_seq() + ctx.current_seq() } /// Download operation specific state diff --git a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs index 8f9527d..9f818b2 100644 --- a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs @@ -15,10 +15,10 @@ use crate::http::header; // TODO(aws-sdk-rust#1159,design): how many of these fields should we expose? // TODO(aws-sdk-rust#1159,docs): Document fields -/// Object metadata other than the body that can be set from either `GetObject` or `HeadObject` +/// Object metadata other than the body #[derive(Debug, Clone, Default)] pub struct ChunkMetadata { - pub requet_id: Option, + pub request_id: Option, pub extended_request_id: Option, pub delete_marker: Option, pub accept_ranges: Option, @@ -58,6 +58,8 @@ pub struct ChunkMetadata { pub object_lock_legal_hold_status: Option, } + +// TODO: waahm7 figure out the content-length vs total_size impl ChunkMetadata { /// The total object size pub fn total_size(&self) -> u64 { @@ -102,7 +104,8 @@ impl ChunkMetadata { impl From for ChunkMetadata { fn from(value: GetObjectOutput) -> Self { Self { - requet_id: value.request_id().map(|s| s.to_string()), + // TODO: waahm7 should we just impl the traits? why traits? + request_id: value.request_id().map(|s| s.to_string()), extended_request_id: value.extended_request_id().map(|s| s.to_string()), delete_marker: value.delete_marker, accept_ranges: value.accept_ranges, @@ -148,7 +151,7 @@ impl From for ChunkMetadata { impl From for ChunkMetadata { fn from(value: HeadObjectOutput) -> Self { Self { - requet_id: value.request_id().map(|s| s.to_string()), + request_id: value.request_id().map(|s| s.to_string()), extended_request_id: value.extended_request_id().map(|s| s.to_string()), delete_marker: value.delete_marker, accept_ranges: value.accept_ranges, diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index c356477..f1de058 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -1,34 +1,48 @@ +use std::sync::Arc; + +use crate::error::{self, ErrorKind}; +use tokio::{sync::{oneshot::Receiver, Mutex, OnceCell}, task}; + /* * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 */ -use crate::operation::download::chunk_meta::ChunkMetadata; use crate::operation::download::output::DownloadOutput; -use tokio::task; -use super::DownloadContext; +use super::object_meta::ObjectMetadata; /// Response type for a single download object request. #[derive(Debug)] #[non_exhaustive] pub struct DownloadHandle { - /// Object metadata - pub object_meta: ChunkMetadata, + /// Object metadata. TODO: Is there a better way to do this than tokio oncecell? + pub(crate) object_meta_receiver: Option>, + pub(crate) object_meta: OnceCell>, /// The object content pub(crate) body: DownloadOutput, + /// Discovery task + pub(crate) discovery: task::JoinHandle>, + /// All child tasks spawned for this download - pub(crate) tasks: task::JoinSet<()>, + pub(crate) tasks: Arc>>, - /// The context used to drive an upload to completion - pub(crate) ctx: DownloadContext, + // /// The context used to drive an upload to completion + // pub(crate) ctx: DownloadContext, } impl DownloadHandle { + /// Object metadata - pub fn object_meta(&self) -> &ChunkMetadata { - &self.object_meta + pub async fn object_meta(&mut self) -> &Result { + + let meta = self.object_meta.get_or_init(|| async { + let meta = self.object_meta_receiver.take().unwrap(); + meta.await.map_err(error::from_kind(ErrorKind::RuntimeError)) + }).await; + + meta } /// Object content @@ -45,7 +59,10 @@ impl DownloadHandle { #[tracing::instrument(skip_all, level = "debug", name = "join-download")] pub async fn join(mut self) -> Result<(), crate::error::Error> { self.body.close(); - while let Some(join_result) = self.tasks.join_next().await { + + self.discovery.await??; + let mut tasks = self.tasks.lock().await; + while let Some(join_result) = tasks.join_next().await { join_result?; } Ok(()) diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs new file mode 100644 index 0000000..c09eda7 --- /dev/null +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -0,0 +1,266 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +use std::str::FromStr; + +use aws_sdk_s3::operation::get_object::GetObjectOutput; +use aws_sdk_s3::operation::head_object::HeadObjectOutput; +use aws_sdk_s3::operation::RequestId; +use aws_sdk_s3::operation::RequestIdExt; + +use crate::http::header; + +use super::chunk_meta::ChunkMetadata; + +// TODO(aws-sdk-rust#1159,design): how many of these fields should we expose? +// TODO(aws-sdk-rust#1159,docs): Document fields + +/// Object metadata other than the body +#[derive(Debug, Clone, Default)] +pub struct ObjectMetadata { + pub requet_id: Option, + pub extended_request_id: Option, + pub delete_marker: Option, + pub accept_ranges: Option, + pub expiration: Option, + pub restore: Option, + pub last_modified: Option<::aws_smithy_types::DateTime>, + pub content_length: Option, + pub e_tag: Option, + pub checksum_crc32: Option, + pub checksum_crc32_c: Option, + pub checksum_sha1: Option, + pub checksum_sha256: Option, + pub missing_meta: Option, + pub version_id: Option, + pub cache_control: Option, + pub content_disposition: Option, + pub content_encoding: Option, + pub content_language: Option, + pub content_range: Option, + pub content_type: Option, + pub expires: Option<::aws_smithy_types::DateTime>, + pub expires_string: Option, + pub website_redirect_location: Option, + pub server_side_encryption: Option, + pub metadata: Option<::std::collections::HashMap>, + pub sse_customer_algorithm: Option, + pub sse_customer_key_md5: Option, + pub ssekms_key_id: Option, + pub bucket_key_enabled: Option, + pub storage_class: Option, + pub request_charged: Option, + pub replication_status: Option, + pub parts_count: Option, + pub tag_count: Option, + pub object_lock_mode: Option, + pub object_lock_retain_until_date: Option<::aws_smithy_types::DateTime>, + pub object_lock_legal_hold_status: Option, +} + + +// TODO: waahm7 figure out the content-length vs total_size +impl ObjectMetadata { + /// The total object size + pub fn total_size(&self) -> u64 { + match (self.content_length, self.content_range.as_ref()) { + (_, Some(range)) => { + let total = range.split_once('/').map(|x| x.1).expect("content range total"); + total.parse().expect("valid range total") + } + (Some(length), None) => { + debug_assert!(length > 0, "content length invalid"); + length as u64 + }, + (None, None) => panic!("total object size cannot be calculated without either content length or content range headers") + } + } + + /// Content length from either the `Content-Range` or `Content-Length` headers + pub(crate) fn content_length(&self) -> u64 { + match (self.content_length, self.content_range.as_ref()) { + (Some(length), _) => { + debug_assert!(length > 0, "content length invalid"); + length as u64 + }, + (None, Some(range)) => { + let byte_range_str = range + .strip_prefix("bytes ") + .expect("content range bytes-unit recognized") + .split_once('/') + .map(|x| x.0) + .expect("content range valid"); + + match header::ByteRange::from_str(byte_range_str).expect("valid byte range") { + header::ByteRange::Inclusive(start, end) => end - start + 1, + _ => unreachable!("Content-Range header invalid") + } + } + (None, None) => panic!("total object size cannot be calculated without either content length or content range headers") + } + } +} + +impl From for ObjectMetadata { + fn from(value: ChunkMetadata) -> Self { + Self { + requet_id: value.request_id, + extended_request_id: value.extended_request_id, + delete_marker: value.delete_marker, + accept_ranges: value.accept_ranges, + expiration: value.expiration, + restore: value.restore, + last_modified: value.last_modified, + content_length: value.content_length, + e_tag: value.e_tag, + checksum_crc32: value.checksum_crc32, + checksum_crc32_c: value.checksum_crc32_c, + checksum_sha1: value.checksum_sha1, + checksum_sha256: value.checksum_sha256, + missing_meta: value.missing_meta, + version_id: value.version_id, + cache_control: value.cache_control, + content_disposition: value.content_disposition, + content_encoding: value.content_encoding, + content_language: value.content_language, + content_range: value.content_range, + content_type: value.content_type, + #[allow(deprecated)] + expires: value.expires, + expires_string: value.expires_string, + website_redirect_location: value.website_redirect_location, + server_side_encryption: value.server_side_encryption, + metadata: value.metadata, + sse_customer_algorithm: value.sse_customer_algorithm, + sse_customer_key_md5: value.sse_customer_key_md5, + ssekms_key_id: value.ssekms_key_id, + bucket_key_enabled: value.bucket_key_enabled, + storage_class: value.storage_class, + request_charged: value.request_charged, + replication_status: value.replication_status, + parts_count: value.parts_count, + tag_count: value.tag_count, + object_lock_mode: value.object_lock_mode, + object_lock_retain_until_date: value.object_lock_retain_until_date, + object_lock_legal_hold_status: value.object_lock_legal_hold_status, + } + } +} + +impl From for ObjectMetadata { + fn from(value: GetObjectOutput) -> Self { + Self { + // TODO: waahm7 should we just impl the traits? why traits? + requet_id: value.request_id().map(|s| s.to_string()), + extended_request_id: value.extended_request_id().map(|s| s.to_string()), + delete_marker: value.delete_marker, + accept_ranges: value.accept_ranges, + expiration: value.expiration, + restore: value.restore, + last_modified: value.last_modified, + content_length: value.content_length, + e_tag: value.e_tag, + checksum_crc32: value.checksum_crc32, + checksum_crc32_c: value.checksum_crc32_c, + checksum_sha1: value.checksum_sha1, + checksum_sha256: value.checksum_sha256, + missing_meta: value.missing_meta, + version_id: value.version_id, + cache_control: value.cache_control, + content_disposition: value.content_disposition, + content_encoding: value.content_encoding, + content_language: value.content_language, + content_range: value.content_range, + content_type: value.content_type, + #[allow(deprecated)] + expires: value.expires, + expires_string: value.expires_string, + website_redirect_location: value.website_redirect_location, + server_side_encryption: value.server_side_encryption, + metadata: value.metadata, + sse_customer_algorithm: value.sse_customer_algorithm, + sse_customer_key_md5: value.sse_customer_key_md5, + ssekms_key_id: value.ssekms_key_id, + bucket_key_enabled: value.bucket_key_enabled, + storage_class: value.storage_class, + request_charged: value.request_charged, + replication_status: value.replication_status, + parts_count: value.parts_count, + tag_count: value.tag_count, + object_lock_mode: value.object_lock_mode, + object_lock_retain_until_date: value.object_lock_retain_until_date, + object_lock_legal_hold_status: value.object_lock_legal_hold_status, + } + } +} + +impl From for ObjectMetadata { + fn from(value: HeadObjectOutput) -> Self { + Self { + requet_id: value.request_id().map(|s| s.to_string()), + extended_request_id: value.extended_request_id().map(|s| s.to_string()), + delete_marker: value.delete_marker, + accept_ranges: value.accept_ranges, + expiration: value.expiration, + restore: value.restore, + last_modified: value.last_modified, + content_length: value.content_length, + e_tag: value.e_tag, + checksum_crc32: value.checksum_crc32, + checksum_crc32_c: value.checksum_crc32_c, + checksum_sha1: value.checksum_sha1, + checksum_sha256: value.checksum_sha256, + missing_meta: value.missing_meta, + version_id: value.version_id, + cache_control: value.cache_control, + content_disposition: value.content_disposition, + content_encoding: value.content_encoding, + content_language: value.content_language, + content_range: None, + content_type: value.content_type, + #[allow(deprecated)] + expires: value.expires, + expires_string: value.expires_string, + website_redirect_location: value.website_redirect_location, + server_side_encryption: value.server_side_encryption, + metadata: value.metadata, + sse_customer_algorithm: value.sse_customer_algorithm, + sse_customer_key_md5: value.sse_customer_key_md5, + ssekms_key_id: value.ssekms_key_id, + bucket_key_enabled: value.bucket_key_enabled, + storage_class: value.storage_class, + request_charged: value.request_charged, + replication_status: value.replication_status, + parts_count: value.parts_count, + tag_count: None, + object_lock_mode: value.object_lock_mode, + object_lock_retain_until_date: value.object_lock_retain_until_date, + object_lock_legal_hold_status: value.object_lock_legal_hold_status, + } + } +} + +#[cfg(test)] +mod tests { + use super::ObjectMetadata; + + #[test] + fn test_inferred_content_length() { + let meta = ObjectMetadata { + content_length: Some(4), + content_range: Some("should ignore".to_owned()), + ..Default::default() + }; + + assert_eq!(4, meta.content_length()); + + let meta = ObjectMetadata { + content_length: None, + content_range: Some("bytes 0-499/900".to_owned()), + ..Default::default() + }; + assert_eq!(500, meta.content_length()); + } +} diff --git a/aws-s3-transfer-manager/src/operation/download/service.rs b/aws-s3-transfer-manager/src/operation/download/service.rs index ef5c7d2..64fbad3 100644 --- a/aws-s3-transfer-manager/src/operation/download/service.rs +++ b/aws-s3-transfer-manager/src/operation/download/service.rs @@ -9,6 +9,7 @@ use crate::middleware::retry; use crate::operation::download::DownloadContext; use aws_smithy_types::body::SdkBody; use aws_smithy_types::byte_stream::ByteStream; +use tokio::task; use std::cmp; use std::mem; use std::ops::RangeInclusive; @@ -18,7 +19,7 @@ use tracing::Instrument; use super::output::AggregatedBytes; use super::output::ChunkResponse; -use super::{DownloadHandle, DownloadInput, DownloadInputBuilder}; +use super::{DownloadInput, DownloadInputBuilder}; /// Request/input type for our "chunk" service. #[derive(Debug, Clone)] @@ -114,22 +115,23 @@ pub(super) fn chunk_service( /// * start_seq - the starting sequence number to use for chunks /// * comp_tx - the channel to send chunk responses to pub(super) fn distribute_work( - handle: &mut DownloadHandle, + tasks: &mut task::JoinSet<()>, + ctx: DownloadContext, remaining: RangeInclusive, input: DownloadInput, start_seq: u64, comp_tx: mpsc::Sender>, parent_span_for_tasks: tracing::Span, ) { - let svc = chunk_service(&handle.ctx); - let part_size = handle.ctx.target_part_size_bytes(); + let svc = chunk_service(&ctx); + let part_size = ctx.target_part_size_bytes(); let input: DownloadInputBuilder = input.into(); let size = *remaining.end() - *remaining.start() + 1; let num_parts = size.div_ceil(part_size); for _ in 0..num_parts { let req = DownloadChunkRequest { - ctx: handle.ctx.clone(), + ctx: ctx.clone(), remaining: remaining.clone(), input: input.clone(), start_seq, @@ -139,14 +141,13 @@ pub(super) fn distribute_work( let comp_tx = comp_tx.clone(); let task = async move { + // TODO: If downloading a chunk fails, do we want to abort the download? let resp = svc.oneshot(req).await; if let Err(err) = comp_tx.send(resp).await { tracing::debug!(error = ?err, "chunk send failed, channel closed"); } }; - handle - .tasks - .spawn(task.instrument(parent_span_for_tasks.clone())); + tasks.spawn(task.instrument(parent_span_for_tasks.clone())); } tracing::trace!("work fully distributed"); From 868a45c5a349d43c7fdfd9beb0c86a54a8212060 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Thu, 7 Nov 2024 16:26:31 -0800 Subject: [PATCH 08/48] refactor chunk vs object meta_data --- .../src/operation/download.rs | 110 ++++---- .../src/operation/download/chunk_meta.rs | 168 +----------- .../src/operation/download/discovery.rs | 30 ++- .../src/operation/download/handle.rs | 20 +- .../src/operation/download/object_meta.rs | 246 ++++++------------ .../src/operation/download/service.rs | 2 +- 6 files changed, 172 insertions(+), 404 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 93ae0dd..615c50d 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -6,7 +6,6 @@ mod input; use aws_sdk_s3::error::DisplayErrorContext; -use chunk_meta::ChunkMetadata; /// Request type for dowloading a single object from Amazon S3 pub use input::{DownloadInput, DownloadInputBuilder}; @@ -19,11 +18,10 @@ mod discovery; mod handle; pub use handle::DownloadHandle; -use object_meta::ObjectMetadata; use tracing::Instrument; -mod object_meta; mod chunk_meta; +mod object_meta; mod service; use crate::error; @@ -36,6 +34,8 @@ use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use tokio::sync::{mpsc, oneshot, Mutex, OnceCell}; use tokio::task::{self, JoinSet}; +use chunk_meta::ChunkMetadata; +use object_meta::ObjectMetadata; use super::TransferContext; @@ -71,8 +71,14 @@ impl Download { let (meta_tx, meta_rx) = oneshot::channel(); let tasks = Arc::new(Mutex::new(JoinSet::new())); - let discovery = tokio::spawn(send_discovery(tasks.clone(), ctx.clone(), comp_tx, meta_tx, input, use_current_span_as_parent_for_tasks)); - + let discovery = tokio::spawn(send_discovery( + tasks.clone(), + ctx.clone(), + comp_tx, + meta_tx, + input, + use_current_span_as_parent_for_tasks, + )); Ok(DownloadHandle { body: DownloadOutput::new(comp_rx), @@ -91,57 +97,56 @@ async fn send_discovery( meta_tx: oneshot::Sender, input: DownloadInput, use_current_span_as_parent_for_tasks: bool, - ) -> Result<(), crate::error::Error> { - let mut tasks = tasks.lock().await; - - // create span to serve as parent of spawned child tasks. - let parent_span_for_tasks = tracing::debug_span!( - parent: if use_current_span_as_parent_for_tasks { tracing::Span::current().id() } else { None } , - "download-tasks", - bucket = input.bucket().unwrap_or_default(), - key = input.key().unwrap_or_default(), - ); - if !use_current_span_as_parent_for_tasks { - // if not child of current span, then "follows from" current span - parent_span_for_tasks.follows_from(tracing::Span::current()); - } - - // acquire a permit for discovery - let permit = ctx.handle.scheduler.acquire_permit().await?; - - // make initial discovery about the object size, metadata, possibly first chunk - let mut discovery = discover_obj(&ctx, &input).await?; - // TODO: waahm7 fix - let meta_data = discovery.meta.clone(); - let _ = meta_tx.send(meta_data.clone().into()); - - let initial_chunk = discovery.initial_chunk.take(); +) -> Result<(), crate::error::Error> { + let mut tasks = tasks.lock().await; + + // create span to serve as parent of spawned child tasks. + let parent_span_for_tasks = tracing::debug_span!( + parent: if use_current_span_as_parent_for_tasks { tracing::Span::current().id() } else { None } , + "download-tasks", + bucket = input.bucket().unwrap_or_default(), + key = input.key().unwrap_or_default(), + ); + if !use_current_span_as_parent_for_tasks { + // if not child of current span, then "follows from" current span + parent_span_for_tasks.follows_from(tracing::Span::current()); + } - // spawn a task (if necessary) to handle the discovery chunk. This returns immediately so - // that we can begin concurrently downloading any reamining chunks/parts ASAP - let start_seq = handle_discovery_chunk( + // acquire a permit for discovery + let permit = ctx.handle.scheduler.acquire_permit().await?; + + // make initial discovery about the object size, metadata, possibly first chunk + let mut discovery = discover_obj(&ctx, &input).await?; + // TODO: waahm7 fix + let _ = meta_tx.send(discovery.object_meta); + + let initial_chunk = discovery.initial_chunk.take(); + + // spawn a task (if necessary) to handle the discovery chunk. This returns immediately so + // that we can begin concurrently downloading any remaining chunks/parts ASAP + let start_seq = handle_discovery_chunk( + &mut tasks, + ctx.clone(), + initial_chunk, + &comp_tx, + permit, + parent_span_for_tasks.clone(), + discovery.chunk_meta, + ); + + if !discovery.remaining.is_empty() { + let remaining = discovery.remaining.clone(); + distribute_work( &mut tasks, ctx.clone(), - initial_chunk, - &comp_tx, - permit, - parent_span_for_tasks.clone(), - meta_data, + remaining, + input, + start_seq, + comp_tx, + parent_span_for_tasks, ); - - if !discovery.remaining.is_empty() { - let remaining = discovery.remaining.clone(); - distribute_work( - &mut tasks, - ctx.clone(), - remaining, - input, - start_seq, - comp_tx, - parent_span_for_tasks, - ); - } - Ok(()) + } + Ok(()) } /// Handle possibly sending the first chunk of data received through discovery. Returns @@ -188,6 +193,7 @@ fn handle_discovery_chunk( } }.instrument(tracing::debug_span!(parent: parent_span_for_tasks.clone(), "collect-body-from-discovery", seq))); } + // TODO: waahm7 handle head-object metadata. Discussion point ctx.current_seq() } diff --git a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs index 9f818b2..1f0a474 100644 --- a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs @@ -3,15 +3,11 @@ * SPDX-License-Identifier: Apache-2.0 */ -use std::str::FromStr; - use aws_sdk_s3::operation::get_object::GetObjectOutput; use aws_sdk_s3::operation::head_object::HeadObjectOutput; use aws_sdk_s3::operation::RequestId; use aws_sdk_s3::operation::RequestIdExt; -use crate::http::header; - // TODO(aws-sdk-rust#1159,design): how many of these fields should we expose? // TODO(aws-sdk-rust#1159,docs): Document fields @@ -20,85 +16,13 @@ use crate::http::header; pub struct ChunkMetadata { pub request_id: Option, pub extended_request_id: Option, - pub delete_marker: Option, - pub accept_ranges: Option, - pub expiration: Option, - pub restore: Option, - pub last_modified: Option<::aws_smithy_types::DateTime>, pub content_length: Option, - pub e_tag: Option, + pub content_range: Option, pub checksum_crc32: Option, pub checksum_crc32_c: Option, pub checksum_sha1: Option, pub checksum_sha256: Option, - pub missing_meta: Option, - pub version_id: Option, - pub cache_control: Option, - pub content_disposition: Option, - pub content_encoding: Option, - pub content_language: Option, - pub content_range: Option, - pub content_type: Option, - pub expires: Option<::aws_smithy_types::DateTime>, - pub expires_string: Option, - pub website_redirect_location: Option, - pub server_side_encryption: Option, - pub metadata: Option<::std::collections::HashMap>, - pub sse_customer_algorithm: Option, - pub sse_customer_key_md5: Option, - pub ssekms_key_id: Option, - pub bucket_key_enabled: Option, - pub storage_class: Option, pub request_charged: Option, - pub replication_status: Option, - pub parts_count: Option, - pub tag_count: Option, - pub object_lock_mode: Option, - pub object_lock_retain_until_date: Option<::aws_smithy_types::DateTime>, - pub object_lock_legal_hold_status: Option, -} - - -// TODO: waahm7 figure out the content-length vs total_size -impl ChunkMetadata { - /// The total object size - pub fn total_size(&self) -> u64 { - match (self.content_length, self.content_range.as_ref()) { - (_, Some(range)) => { - let total = range.split_once('/').map(|x| x.1).expect("content range total"); - total.parse().expect("valid range total") - } - (Some(length), None) => { - debug_assert!(length > 0, "content length invalid"); - length as u64 - }, - (None, None) => panic!("total object size cannot be calculated without either content length or content range headers") - } - } - - /// Content length from either the `Content-Range` or `Content-Length` headers - pub(crate) fn content_length(&self) -> u64 { - match (self.content_length, self.content_range.as_ref()) { - (Some(length), _) => { - debug_assert!(length > 0, "content length invalid"); - length as u64 - }, - (None, Some(range)) => { - let byte_range_str = range - .strip_prefix("bytes ") - .expect("content range bytes-unit recognized") - .split_once('/') - .map(|x| x.0) - .expect("content range valid"); - - match header::ByteRange::from_str(byte_range_str).expect("valid byte range") { - header::ByteRange::Inclusive(start, end) => end - start + 1, - _ => unreachable!("Content-Range header invalid") - } - } - (None, None) => panic!("total object size cannot be calculated without either content length or content range headers") - } - } } impl From for ChunkMetadata { @@ -107,43 +31,13 @@ impl From for ChunkMetadata { // TODO: waahm7 should we just impl the traits? why traits? request_id: value.request_id().map(|s| s.to_string()), extended_request_id: value.extended_request_id().map(|s| s.to_string()), - delete_marker: value.delete_marker, - accept_ranges: value.accept_ranges, - expiration: value.expiration, - restore: value.restore, - last_modified: value.last_modified, content_length: value.content_length, - e_tag: value.e_tag, + content_range: value.content_range, checksum_crc32: value.checksum_crc32, checksum_crc32_c: value.checksum_crc32_c, checksum_sha1: value.checksum_sha1, checksum_sha256: value.checksum_sha256, - missing_meta: value.missing_meta, - version_id: value.version_id, - cache_control: value.cache_control, - content_disposition: value.content_disposition, - content_encoding: value.content_encoding, - content_language: value.content_language, - content_range: value.content_range, - content_type: value.content_type, - #[allow(deprecated)] - expires: value.expires, - expires_string: value.expires_string, - website_redirect_location: value.website_redirect_location, - server_side_encryption: value.server_side_encryption, - metadata: value.metadata, - sse_customer_algorithm: value.sse_customer_algorithm, - sse_customer_key_md5: value.sse_customer_key_md5, - ssekms_key_id: value.ssekms_key_id, - bucket_key_enabled: value.bucket_key_enabled, - storage_class: value.storage_class, request_charged: value.request_charged, - replication_status: value.replication_status, - parts_count: value.parts_count, - tag_count: value.tag_count, - object_lock_mode: value.object_lock_mode, - object_lock_retain_until_date: value.object_lock_retain_until_date, - object_lock_legal_hold_status: value.object_lock_legal_hold_status, } } } @@ -153,66 +47,14 @@ impl From for ChunkMetadata { Self { request_id: value.request_id().map(|s| s.to_string()), extended_request_id: value.extended_request_id().map(|s| s.to_string()), - delete_marker: value.delete_marker, - accept_ranges: value.accept_ranges, - expiration: value.expiration, - restore: value.restore, - last_modified: value.last_modified, - content_length: value.content_length, - e_tag: value.e_tag, + // Do not include Content-Length as it will not be accurate as chunk metadata + content_length: None, + content_range: None, checksum_crc32: value.checksum_crc32, checksum_crc32_c: value.checksum_crc32_c, checksum_sha1: value.checksum_sha1, checksum_sha256: value.checksum_sha256, - missing_meta: value.missing_meta, - version_id: value.version_id, - cache_control: value.cache_control, - content_disposition: value.content_disposition, - content_encoding: value.content_encoding, - content_language: value.content_language, - content_range: None, - content_type: value.content_type, - #[allow(deprecated)] - expires: value.expires, - expires_string: value.expires_string, - website_redirect_location: value.website_redirect_location, - server_side_encryption: value.server_side_encryption, - metadata: value.metadata, - sse_customer_algorithm: value.sse_customer_algorithm, - sse_customer_key_md5: value.sse_customer_key_md5, - ssekms_key_id: value.ssekms_key_id, - bucket_key_enabled: value.bucket_key_enabled, - storage_class: value.storage_class, request_charged: value.request_charged, - replication_status: value.replication_status, - parts_count: value.parts_count, - tag_count: None, - object_lock_mode: value.object_lock_mode, - object_lock_retain_until_date: value.object_lock_retain_until_date, - object_lock_legal_hold_status: value.object_lock_legal_hold_status, } } } - -#[cfg(test)] -mod tests { - use super::ChunkMetadata; - - #[test] - fn test_inferred_content_length() { - let meta = ChunkMetadata { - content_length: Some(4), - content_range: Some("should ignore".to_owned()), - ..Default::default() - }; - - assert_eq!(4, meta.content_length()); - - let meta = ChunkMetadata { - content_length: None, - content_range: Some("bytes 0-499/900".to_owned()), - ..Default::default() - }; - assert_eq!(500, meta.content_length()); - } -} diff --git a/aws-s3-transfer-manager/src/operation/download/discovery.rs b/aws-s3-transfer-manager/src/operation/download/discovery.rs index 89f98ea..3bed1d2 100644 --- a/aws-s3-transfer-manager/src/operation/download/discovery.rs +++ b/aws-s3-transfer-manager/src/operation/download/discovery.rs @@ -13,6 +13,7 @@ use aws_smithy_types::byte_stream::ByteStream; use tracing::Instrument; use super::chunk_meta::ChunkMetadata; +use super::object_meta::ObjectMetadata; use super::DownloadContext; use super::DownloadInput; use crate::error; @@ -35,7 +36,8 @@ pub(super) struct ObjectDiscovery { pub(super) remaining: RangeInclusive, /// the discovered metadata - pub(super) meta: ChunkMetadata, + pub(super) chunk_meta: ChunkMetadata, + pub(super) object_meta: ObjectMetadata, /// the first chunk of data if fetched during discovery pub(super) initial_chunk: Option, @@ -111,28 +113,30 @@ async fn discover_obj_with_head( input: &DownloadInput, byte_range: Option, ) -> Result { - let meta: ChunkMetadata = ctx + let resp = ctx .client() .head_object() .set_bucket(input.bucket().map(str::to_string)) .set_key(input.key().map(str::to_string)) .send() .await - .map_err(error::discovery_failed)? - .into(); + .map_err(error::discovery_failed)?; + let object_meta: ObjectMetadata = (&resp).into(); + let chunk_meta: ChunkMetadata = resp.into(); let remaining = match byte_range { Some(range) => match range { ByteRange::Inclusive(start, end) => start..=end, - ByteRange::AllFrom(start) => start..=meta.total_size(), - ByteRange::Last(n) => (meta.total_size() - n + 1)..=meta.total_size(), + ByteRange::AllFrom(start) => start..=object_meta.total_size(), + ByteRange::Last(n) => (object_meta.total_size() - n + 1)..=object_meta.total_size(), }, - None => 0..=meta.total_size(), + None => 0..=object_meta.total_size(), }; Ok(ObjectDiscovery { remaining, - meta, + chunk_meta, + object_meta, initial_chunk: None, }) } @@ -154,12 +158,13 @@ async fn discover_obj_with_get( let empty_stream = ByteStream::new(SdkBody::empty()); let body = mem::replace(&mut resp.body, empty_stream); - let meta: ChunkMetadata = resp.into(); - let content_len = meta.content_length(); + let object_meta: ObjectMetadata = (&resp).into(); + let chunk_meta: ChunkMetadata = resp.into(); + let content_len = chunk_meta.content_length.expect("expected content_length") as u64; let remaining = match range { Some(range) => (*range.start() + content_len)..=*range.end(), - None => content_len..=meta.total_size() - 1, + None => content_len..=object_meta.total_size() - 1, }; let initial_chunk = match content_len == 0 { @@ -169,7 +174,8 @@ async fn discover_obj_with_get( Ok(ObjectDiscovery { remaining, - meta, + chunk_meta, + object_meta, initial_chunk, }) } diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index f1de058..10a1967 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -1,7 +1,10 @@ use std::sync::Arc; use crate::error::{self, ErrorKind}; -use tokio::{sync::{oneshot::Receiver, Mutex, OnceCell}, task}; +use tokio::{ + sync::{oneshot::Receiver, Mutex, OnceCell}, + task, +}; /* * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. @@ -27,20 +30,21 @@ pub struct DownloadHandle { /// All child tasks spawned for this download pub(crate) tasks: Arc>>, - // /// The context used to drive an upload to completion // pub(crate) ctx: DownloadContext, } impl DownloadHandle { - /// Object metadata pub async fn object_meta(&mut self) -> &Result { - - let meta = self.object_meta.get_or_init(|| async { - let meta = self.object_meta_receiver.take().unwrap(); - meta.await.map_err(error::from_kind(ErrorKind::RuntimeError)) - }).await; + let meta = self + .object_meta + .get_or_init(|| async { + let meta = self.object_meta_receiver.take().unwrap(); + meta.await + .map_err(error::from_kind(ErrorKind::RuntimeError)) + }) + .await; meta } diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index c09eda7..6acb5b2 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -3,16 +3,8 @@ * SPDX-License-Identifier: Apache-2.0 */ -use std::str::FromStr; - use aws_sdk_s3::operation::get_object::GetObjectOutput; use aws_sdk_s3::operation::head_object::HeadObjectOutput; -use aws_sdk_s3::operation::RequestId; -use aws_sdk_s3::operation::RequestIdExt; - -use crate::http::header; - -use super::chunk_meta::ChunkMetadata; // TODO(aws-sdk-rust#1159,design): how many of these fields should we expose? // TODO(aws-sdk-rust#1159,docs): Document fields @@ -20,14 +12,12 @@ use super::chunk_meta::ChunkMetadata; /// Object metadata other than the body #[derive(Debug, Clone, Default)] pub struct ObjectMetadata { - pub requet_id: Option, - pub extended_request_id: Option, pub delete_marker: Option, pub accept_ranges: Option, pub expiration: Option, pub restore: Option, pub last_modified: Option<::aws_smithy_types::DateTime>, - pub content_length: Option, + pub(crate) content_length: Option, pub e_tag: Option, pub checksum_crc32: Option, pub checksum_crc32_c: Option, @@ -39,7 +29,7 @@ pub struct ObjectMetadata { pub content_disposition: Option, pub content_encoding: Option, pub content_language: Option, - pub content_range: Option, + pub(crate) content_range: Option, pub content_type: Option, pub expires: Option<::aws_smithy_types::DateTime>, pub expires_string: Option, @@ -51,7 +41,6 @@ pub struct ObjectMetadata { pub ssekms_key_id: Option, pub bucket_key_enabled: Option, pub storage_class: Option, - pub request_charged: Option, pub replication_status: Option, pub parts_count: Option, pub tag_count: Option, @@ -60,8 +49,6 @@ pub struct ObjectMetadata { pub object_lock_legal_hold_status: Option, } - -// TODO: waahm7 figure out the content-length vs total_size impl ObjectMetadata { /// The total object size pub fn total_size(&self) -> u64 { @@ -77,190 +64,113 @@ impl ObjectMetadata { (None, None) => panic!("total object size cannot be calculated without either content length or content range headers") } } - - /// Content length from either the `Content-Range` or `Content-Length` headers - pub(crate) fn content_length(&self) -> u64 { - match (self.content_length, self.content_range.as_ref()) { - (Some(length), _) => { - debug_assert!(length > 0, "content length invalid"); - length as u64 - }, - (None, Some(range)) => { - let byte_range_str = range - .strip_prefix("bytes ") - .expect("content range bytes-unit recognized") - .split_once('/') - .map(|x| x.0) - .expect("content range valid"); - - match header::ByteRange::from_str(byte_range_str).expect("valid byte range") { - header::ByteRange::Inclusive(start, end) => end - start + 1, - _ => unreachable!("Content-Range header invalid") - } - } - (None, None) => panic!("total object size cannot be calculated without either content length or content range headers") - } - } -} - -impl From for ObjectMetadata { - fn from(value: ChunkMetadata) -> Self { - Self { - requet_id: value.request_id, - extended_request_id: value.extended_request_id, - delete_marker: value.delete_marker, - accept_ranges: value.accept_ranges, - expiration: value.expiration, - restore: value.restore, - last_modified: value.last_modified, - content_length: value.content_length, - e_tag: value.e_tag, - checksum_crc32: value.checksum_crc32, - checksum_crc32_c: value.checksum_crc32_c, - checksum_sha1: value.checksum_sha1, - checksum_sha256: value.checksum_sha256, - missing_meta: value.missing_meta, - version_id: value.version_id, - cache_control: value.cache_control, - content_disposition: value.content_disposition, - content_encoding: value.content_encoding, - content_language: value.content_language, - content_range: value.content_range, - content_type: value.content_type, - #[allow(deprecated)] - expires: value.expires, - expires_string: value.expires_string, - website_redirect_location: value.website_redirect_location, - server_side_encryption: value.server_side_encryption, - metadata: value.metadata, - sse_customer_algorithm: value.sse_customer_algorithm, - sse_customer_key_md5: value.sse_customer_key_md5, - ssekms_key_id: value.ssekms_key_id, - bucket_key_enabled: value.bucket_key_enabled, - storage_class: value.storage_class, - request_charged: value.request_charged, - replication_status: value.replication_status, - parts_count: value.parts_count, - tag_count: value.tag_count, - object_lock_mode: value.object_lock_mode, - object_lock_retain_until_date: value.object_lock_retain_until_date, - object_lock_legal_hold_status: value.object_lock_legal_hold_status, - } - } } -impl From for ObjectMetadata { - fn from(value: GetObjectOutput) -> Self { +impl From<&GetObjectOutput> for ObjectMetadata { + fn from(value: &GetObjectOutput) -> Self { Self { - // TODO: waahm7 should we just impl the traits? why traits? - requet_id: value.request_id().map(|s| s.to_string()), - extended_request_id: value.extended_request_id().map(|s| s.to_string()), delete_marker: value.delete_marker, - accept_ranges: value.accept_ranges, - expiration: value.expiration, - restore: value.restore, + accept_ranges: value.accept_ranges.clone(), + expiration: value.expiration.clone(), + restore: value.restore.clone(), last_modified: value.last_modified, content_length: value.content_length, - e_tag: value.e_tag, - checksum_crc32: value.checksum_crc32, - checksum_crc32_c: value.checksum_crc32_c, - checksum_sha1: value.checksum_sha1, - checksum_sha256: value.checksum_sha256, + e_tag: value.e_tag.clone(), + checksum_crc32: value.checksum_crc32.clone(), + checksum_crc32_c: value.checksum_crc32_c.clone(), + checksum_sha1: value.checksum_sha1.clone(), + checksum_sha256: value.checksum_sha256.clone(), missing_meta: value.missing_meta, - version_id: value.version_id, - cache_control: value.cache_control, - content_disposition: value.content_disposition, - content_encoding: value.content_encoding, - content_language: value.content_language, - content_range: value.content_range, - content_type: value.content_type, + version_id: value.version_id.clone(), + cache_control: value.cache_control.clone(), + content_disposition: value.content_disposition.clone(), + content_encoding: value.content_encoding.clone(), + content_language: value.content_language.clone(), + content_range: value.content_range.clone(), + content_type: value.content_type.clone(), #[allow(deprecated)] expires: value.expires, - expires_string: value.expires_string, - website_redirect_location: value.website_redirect_location, - server_side_encryption: value.server_side_encryption, - metadata: value.metadata, - sse_customer_algorithm: value.sse_customer_algorithm, - sse_customer_key_md5: value.sse_customer_key_md5, - ssekms_key_id: value.ssekms_key_id, + expires_string: value.expires_string.clone(), + website_redirect_location: value.website_redirect_location.clone(), + server_side_encryption: value.server_side_encryption.clone(), + metadata: value.metadata.clone(), + sse_customer_algorithm: value.sse_customer_algorithm.clone(), + sse_customer_key_md5: value.sse_customer_key_md5.clone(), + ssekms_key_id: value.ssekms_key_id.clone(), bucket_key_enabled: value.bucket_key_enabled, - storage_class: value.storage_class, - request_charged: value.request_charged, - replication_status: value.replication_status, + storage_class: value.storage_class.clone(), + replication_status: value.replication_status.clone(), parts_count: value.parts_count, tag_count: value.tag_count, - object_lock_mode: value.object_lock_mode, - object_lock_retain_until_date: value.object_lock_retain_until_date, - object_lock_legal_hold_status: value.object_lock_legal_hold_status, + object_lock_mode: value.object_lock_mode.clone(), + object_lock_retain_until_date: value.object_lock_retain_until_date.clone(), + object_lock_legal_hold_status: value.object_lock_legal_hold_status.clone(), } } } -impl From for ObjectMetadata { - fn from(value: HeadObjectOutput) -> Self { +impl From<&HeadObjectOutput> for ObjectMetadata { + fn from(value: &HeadObjectOutput) -> Self { Self { - requet_id: value.request_id().map(|s| s.to_string()), - extended_request_id: value.extended_request_id().map(|s| s.to_string()), delete_marker: value.delete_marker, - accept_ranges: value.accept_ranges, - expiration: value.expiration, - restore: value.restore, + accept_ranges: value.accept_ranges.clone(), + expiration: value.expiration.clone(), + restore: value.restore.clone(), last_modified: value.last_modified, content_length: value.content_length, - e_tag: value.e_tag, - checksum_crc32: value.checksum_crc32, - checksum_crc32_c: value.checksum_crc32_c, - checksum_sha1: value.checksum_sha1, - checksum_sha256: value.checksum_sha256, + e_tag: value.e_tag.clone(), + checksum_crc32: value.checksum_crc32.clone(), + checksum_crc32_c: value.checksum_crc32_c.clone(), + checksum_sha1: value.checksum_sha1.clone(), + checksum_sha256: value.checksum_sha256.clone(), missing_meta: value.missing_meta, - version_id: value.version_id, - cache_control: value.cache_control, - content_disposition: value.content_disposition, - content_encoding: value.content_encoding, - content_language: value.content_language, + version_id: value.version_id.clone(), + cache_control: value.cache_control.clone(), + content_disposition: value.content_disposition.clone(), + content_encoding: value.content_encoding.clone(), + content_language: value.content_language.clone(), content_range: None, - content_type: value.content_type, + content_type: value.content_type.clone(), #[allow(deprecated)] expires: value.expires, - expires_string: value.expires_string, - website_redirect_location: value.website_redirect_location, - server_side_encryption: value.server_side_encryption, - metadata: value.metadata, - sse_customer_algorithm: value.sse_customer_algorithm, - sse_customer_key_md5: value.sse_customer_key_md5, - ssekms_key_id: value.ssekms_key_id, + expires_string: value.expires_string.clone(), + website_redirect_location: value.website_redirect_location.clone(), + server_side_encryption: value.server_side_encryption.clone(), + metadata: value.metadata.clone(), + sse_customer_algorithm: value.sse_customer_algorithm.clone(), + sse_customer_key_md5: value.sse_customer_key_md5.clone(), + ssekms_key_id: value.ssekms_key_id.clone(), bucket_key_enabled: value.bucket_key_enabled, - storage_class: value.storage_class, - request_charged: value.request_charged, - replication_status: value.replication_status, + storage_class: value.storage_class.clone(), + replication_status: value.replication_status.clone(), parts_count: value.parts_count, tag_count: None, - object_lock_mode: value.object_lock_mode, - object_lock_retain_until_date: value.object_lock_retain_until_date, - object_lock_legal_hold_status: value.object_lock_legal_hold_status, + object_lock_mode: value.object_lock_mode.clone(), + object_lock_retain_until_date: value.object_lock_retain_until_date.clone(), + object_lock_legal_hold_status: value.object_lock_legal_hold_status.clone(), } } } #[cfg(test)] mod tests { - use super::ObjectMetadata; - - #[test] - fn test_inferred_content_length() { - let meta = ObjectMetadata { - content_length: Some(4), - content_range: Some("should ignore".to_owned()), - ..Default::default() - }; - - assert_eq!(4, meta.content_length()); - - let meta = ObjectMetadata { - content_length: None, - content_range: Some("bytes 0-499/900".to_owned()), - ..Default::default() - }; - assert_eq!(500, meta.content_length()); - } + // use super::ObjectMetadata; + + // #[test] + // fn test_inferred_content_length() { + // let meta = ObjectMetadata { + // content_length: Some(4), + // content_range: Some("should ignore".to_owned()), + // ..Default::default() + // }; + + // assert_eq!(4, meta.content_length()); + + // let meta = ObjectMetadata { + // content_length: None, + // content_range: Some("bytes 0-499/900".to_owned()), + // ..Default::default() + // }; + // assert_eq!(500, meta.content_length()); + // } } diff --git a/aws-s3-transfer-manager/src/operation/download/service.rs b/aws-s3-transfer-manager/src/operation/download/service.rs index 64fbad3..7064a3e 100644 --- a/aws-s3-transfer-manager/src/operation/download/service.rs +++ b/aws-s3-transfer-manager/src/operation/download/service.rs @@ -9,11 +9,11 @@ use crate::middleware::retry; use crate::operation::download::DownloadContext; use aws_smithy_types::body::SdkBody; use aws_smithy_types::byte_stream::ByteStream; -use tokio::task; use std::cmp; use std::mem; use std::ops::RangeInclusive; use tokio::sync::mpsc; +use tokio::task; use tower::{service_fn, Service, ServiceBuilder, ServiceExt}; use tracing::Instrument; From c9c855279ee3a87354ce3d6a33461742180f6bc7 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Fri, 8 Nov 2024 10:35:42 -0800 Subject: [PATCH 09/48] no async send --- .../src/operation/download.rs | 5 ++--- .../src/operation/download/builders.rs | 8 ++++---- .../src/operation/download/object_meta.rs | 19 +++++++++++++++++-- .../src/operation/download_objects/worker.rs | 2 +- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 615c50d..9b47248 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -55,7 +55,7 @@ impl Download { /// "follow from" the current span, but be under their own root of the trace tree. /// Use this for `TransferManager.download().send()`, where the spawned tasks /// should NOT extend the life of the current `send()` span. - pub(crate) async fn orchestrate( + pub(crate) fn orchestrate( handle: Arc, input: crate::operation::download::DownloadInput, use_current_span_as_parent_for_tasks: bool, @@ -135,11 +135,10 @@ async fn send_discovery( ); if !discovery.remaining.is_empty() { - let remaining = discovery.remaining.clone(); distribute_work( &mut tasks, ctx.clone(), - remaining, + discovery.remaining, input, start_seq, comp_tx, diff --git a/aws-s3-transfer-manager/src/operation/download/builders.rs b/aws-s3-transfer-manager/src/operation/download/builders.rs index d74d9ec..2405680 100644 --- a/aws-s3-transfer-manager/src/operation/download/builders.rs +++ b/aws-s3-transfer-manager/src/operation/download/builders.rs @@ -26,9 +26,9 @@ impl DownloadFluentBuilder { bucket = self.inner.bucket.as_deref().unwrap_or_default(), key = self.inner.key.as_deref().unwrap_or_default(), ))] - pub async fn send(self) -> Result { + pub fn send(self) -> Result { let input = self.inner.build()?; - crate::operation::download::Download::orchestrate(self.handle, input, false).await + crate::operation::download::Download::orchestrate(self.handle, input, false) } ///

The bucket name containing the object.

@@ -524,12 +524,12 @@ impl DownloadFluentBuilder { impl crate::operation::download::input::DownloadInputBuilder { /// Initiate a download transfer for a single object with this input using the given client. - pub async fn send_with( + pub fn send_with( self, client: &crate::Client, ) -> Result { let mut fluent_builder = client.download(); fluent_builder.inner = self; - fluent_builder.send().await + fluent_builder.send() } } diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index 6acb5b2..149da6e 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -5,6 +5,8 @@ use aws_sdk_s3::operation::get_object::GetObjectOutput; use aws_sdk_s3::operation::head_object::HeadObjectOutput; +use aws_sdk_s3::operation::RequestId; +use aws_sdk_s3::operation::RequestIdExt; // TODO(aws-sdk-rust#1159,design): how many of these fields should we expose? // TODO(aws-sdk-rust#1159,docs): Document fields @@ -47,6 +49,13 @@ pub struct ObjectMetadata { pub object_lock_mode: Option, pub object_lock_retain_until_date: Option<::aws_smithy_types::DateTime>, pub object_lock_legal_hold_status: Option, + + // request_id if the client made a request to get object metadata, like HeadObject. + pub request_id: Option, + // extended_request_id if the client made a request to get object metadata, like HeadObject. + pub extended_request_id: Option, + // whether the request that client made only to object metadata, like HeadObject. + pub request_charged: Option, } impl ObjectMetadata { @@ -103,8 +112,11 @@ impl From<&GetObjectOutput> for ObjectMetadata { parts_count: value.parts_count, tag_count: value.tag_count, object_lock_mode: value.object_lock_mode.clone(), - object_lock_retain_until_date: value.object_lock_retain_until_date.clone(), + object_lock_retain_until_date: value.object_lock_retain_until_date, object_lock_legal_hold_status: value.object_lock_legal_hold_status.clone(), + request_id: None, + extended_request_id: None, + request_charged: None, } } } @@ -146,8 +158,11 @@ impl From<&HeadObjectOutput> for ObjectMetadata { parts_count: value.parts_count, tag_count: None, object_lock_mode: value.object_lock_mode.clone(), - object_lock_retain_until_date: value.object_lock_retain_until_date.clone(), + object_lock_retain_until_date: value.object_lock_retain_until_date, object_lock_legal_hold_status: value.object_lock_legal_hold_status.clone(), + request_id: value.request_id().map(|s| s.to_string()), + extended_request_id: value.extended_request_id().map(|s| s.to_string()), + request_charged: value.request_charged.clone(), } } } diff --git a/aws-s3-transfer-manager/src/operation/download_objects/worker.rs b/aws-s3-transfer-manager/src/operation/download_objects/worker.rs index b881585..492e869 100644 --- a/aws-s3-transfer-manager/src/operation/download_objects/worker.rs +++ b/aws-s3-transfer-manager/src/operation/download_objects/worker.rs @@ -143,7 +143,7 @@ async fn download_single_obj( let key_path = local_key_path(root_dir, key.as_str(), prefix, delim)?; let mut handle = - crate::operation::download::Download::orchestrate(ctx.handle.clone(), input, true).await?; + crate::operation::download::Download::orchestrate(ctx.handle.clone(), input, true)?; let mut body = mem::replace(&mut handle.body, DownloadOutput::empty()); let parent_dir = key_path.parent().expect("valid parent dir for key"); From 5636926318ae693e1ccb134734b1a6980c5ebf40 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Fri, 8 Nov 2024 14:10:32 -0800 Subject: [PATCH 10/48] No need for oncecell, I have mut reference --- aws-s3-transfer-manager/examples/cp.rs | 5 ++-- .../src/operation/download.rs | 5 ++-- .../src/operation/download/handle.rs | 28 ++++++++++--------- .../tests/download_test.rs | 5 ---- 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/aws-s3-transfer-manager/examples/cp.rs b/aws-s3-transfer-manager/examples/cp.rs index 211b800..b82e39b 100644 --- a/aws-s3-transfer-manager/examples/cp.rs +++ b/aws-s3-transfer-manager/examples/cp.rs @@ -171,14 +171,15 @@ async fn do_download(args: Args) -> Result<(), BoxError> { // TODO(aws-sdk-rust#1159) - rewrite this less naively, // likely abstract this into performant utils for single file download. Higher level // TM will handle it's own thread pool for filesystem work - let mut handle = tm.download().bucket(bucket).key(key).send().await?; + let mut handle = tm.download().bucket(bucket).key(key).send()?; + let metadata = handle.object_meta().await?; write_body(handle.body_mut(), dest) .instrument(tracing::debug_span!("write-output")) .await?; let elapsed = start.elapsed(); - let obj_size_bytes = 0; // TODO: Fix handle.object_meta.total_size(); + let obj_size_bytes = metadata.total_size(); let throughput = Throughput::new(obj_size_bytes, elapsed); println!( diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 9b47248..cf459bf 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -32,7 +32,7 @@ use output::{AggregatedBytes, ChunkResponse, DownloadOutput}; use service::distribute_work; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; -use tokio::sync::{mpsc, oneshot, Mutex, OnceCell}; +use tokio::sync::{mpsc, oneshot, Mutex}; use tokio::task::{self, JoinSet}; use chunk_meta::ChunkMetadata; use object_meta::ObjectMetadata; @@ -85,7 +85,7 @@ impl Download { tasks, discovery, object_meta_receiver: Some(meta_rx), - object_meta: OnceCell::new(), + object_meta: None, }) } } @@ -113,6 +113,7 @@ async fn send_discovery( } // acquire a permit for discovery + // TODO: Verify, if this fails we are not stuck. let permit = ctx.handle.scheduler.acquire_permit().await?; // make initial discovery about the object size, metadata, possibly first chunk diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index 10a1967..5cad1b7 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use crate::error::{self, ErrorKind}; use tokio::{ - sync::{oneshot::Receiver, Mutex, OnceCell}, + sync::{oneshot::Receiver, Mutex}, task, }; @@ -20,7 +20,7 @@ use super::object_meta::ObjectMetadata; pub struct DownloadHandle { /// Object metadata. TODO: Is there a better way to do this than tokio oncecell? pub(crate) object_meta_receiver: Option>, - pub(crate) object_meta: OnceCell>, + pub(crate) object_meta: Option, /// The object content pub(crate) body: DownloadOutput, @@ -36,17 +36,19 @@ pub struct DownloadHandle { impl DownloadHandle { /// Object metadata - pub async fn object_meta(&mut self) -> &Result { - let meta = self - .object_meta - .get_or_init(|| async { - let meta = self.object_meta_receiver.take().unwrap(); - meta.await - .map_err(error::from_kind(ErrorKind::RuntimeError)) - }) - .await; - - meta + pub async fn object_meta(&mut self) -> Result { + if let Some(object_meta) = &self.object_meta { + Ok(object_meta.clone()) + } else { + let meta = self + .object_meta_receiver + .take() + .expect("metadata is not initialized yet"); + self.object_meta = Some(meta + .await + .map_err(error::from_kind(ErrorKind::ObjectNotDiscoverable))?); + Ok(self.object_meta.clone().unwrap()) + } } /// Object content diff --git a/aws-s3-transfer-manager/tests/download_test.rs b/aws-s3-transfer-manager/tests/download_test.rs index c4c976d..a8003ae 100644 --- a/aws-s3-transfer-manager/tests/download_test.rs +++ b/aws-s3-transfer-manager/tests/download_test.rs @@ -130,7 +130,6 @@ async fn test_download_ranges() { .bucket("test-bucket") .key("test-object") .send() - .await .unwrap(); let body = drain(&mut handle).await.unwrap(); @@ -165,7 +164,6 @@ async fn test_body_not_consumed() { .bucket("test-bucket") .key("test-object") .send() - .await .unwrap(); handle.join().await.unwrap(); @@ -273,7 +271,6 @@ async fn test_retry_failed_chunk() { .bucket("test-bucket") .key("test-object") .send() - .await .unwrap(); let body = drain(&mut handle).await.unwrap(); @@ -328,7 +325,6 @@ async fn test_non_retryable_error() { .bucket("test-bucket") .key("test-object") .send() - .await .unwrap(); let _ = drain(&mut handle).await.unwrap_err(); @@ -388,7 +384,6 @@ async fn test_retry_max_attempts() { .bucket("test-bucket") .key("test-object") .send() - .await .unwrap(); let _ = drain(&mut handle).await.unwrap_err(); From 24074d15f58d6cca41fbd06d17d018f321760330 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Fri, 8 Nov 2024 14:48:30 -0800 Subject: [PATCH 11/48] interior mutability ftw --- aws-s3-transfer-manager/examples/cp.rs | 3 +- .../src/operation/download.rs | 6 ++-- .../src/operation/download/handle.rs | 29 ++++++++----------- 3 files changed, 16 insertions(+), 22 deletions(-) diff --git a/aws-s3-transfer-manager/examples/cp.rs b/aws-s3-transfer-manager/examples/cp.rs index b82e39b..e7cdd25 100644 --- a/aws-s3-transfer-manager/examples/cp.rs +++ b/aws-s3-transfer-manager/examples/cp.rs @@ -172,14 +172,13 @@ async fn do_download(args: Args) -> Result<(), BoxError> { // likely abstract this into performant utils for single file download. Higher level // TM will handle it's own thread pool for filesystem work let mut handle = tm.download().bucket(bucket).key(key).send()?; - let metadata = handle.object_meta().await?; write_body(handle.body_mut(), dest) .instrument(tracing::debug_span!("write-output")) .await?; let elapsed = start.elapsed(); - let obj_size_bytes = metadata.total_size(); + let obj_size_bytes = handle.object_meta().await?.total_size(); let throughput = Throughput::new(obj_size_bytes, elapsed); println!( diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index cf459bf..3f02b0d 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -32,7 +32,7 @@ use output::{AggregatedBytes, ChunkResponse, DownloadOutput}; use service::distribute_work; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; -use tokio::sync::{mpsc, oneshot, Mutex}; +use tokio::sync::{mpsc, oneshot, Mutex, OnceCell}; use tokio::task::{self, JoinSet}; use chunk_meta::ChunkMetadata; use object_meta::ObjectMetadata; @@ -84,8 +84,8 @@ impl Download { body: DownloadOutput::new(comp_rx), tasks, discovery, - object_meta_receiver: Some(meta_rx), - object_meta: None, + object_meta_receiver: Mutex::new(Some(meta_rx)), + object_meta: OnceCell::new(), }) } } diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index 5cad1b7..67394ad 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use crate::error::{self, ErrorKind}; use tokio::{ - sync::{oneshot::Receiver, Mutex}, + sync::{oneshot::Receiver, Mutex, OnceCell}, task, }; @@ -19,8 +19,8 @@ use super::object_meta::ObjectMetadata; #[non_exhaustive] pub struct DownloadHandle { /// Object metadata. TODO: Is there a better way to do this than tokio oncecell? - pub(crate) object_meta_receiver: Option>, - pub(crate) object_meta: Option, + pub(crate) object_meta_receiver: Mutex>>, + pub(crate) object_meta: OnceCell, /// The object content pub(crate) body: DownloadOutput, @@ -30,25 +30,20 @@ pub struct DownloadHandle { /// All child tasks spawned for this download pub(crate) tasks: Arc>>, - // /// The context used to drive an upload to completion - // pub(crate) ctx: DownloadContext, } impl DownloadHandle { /// Object metadata - pub async fn object_meta(&mut self) -> Result { - if let Some(object_meta) = &self.object_meta { - Ok(object_meta.clone()) - } else { - let meta = self - .object_meta_receiver - .take() - .expect("metadata is not initialized yet"); - self.object_meta = Some(meta - .await - .map_err(error::from_kind(ErrorKind::ObjectNotDiscoverable))?); - Ok(self.object_meta.clone().unwrap()) + pub async fn object_meta(&self) -> Result<&ObjectMetadata, error::Error> { + if !self.object_meta.initialized() { + let mut object_meta_receiver = self.object_meta_receiver.lock().await; + let object_meta_receiver = object_meta_receiver.take().unwrap(); + let meta = object_meta_receiver.await.map_err(error::from_kind(ErrorKind::ObjectNotDiscoverable))?; + // TODO: handle error + let _ = self.object_meta.set(meta); } + + return Ok(self.object_meta.get().unwrap()); } /// Object content From e14e80bfa95448ede2c7daaa04d00984264429b7 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Fri, 8 Nov 2024 14:58:29 -0800 Subject: [PATCH 12/48] rename output back to body --- aws-s3-transfer-manager/examples/cp.rs | 4 +-- .../src/operation/download.rs | 6 ++--- .../operation/download/{output.rs => body.rs} | 26 +++++++++---------- .../src/operation/download/handle.rs | 8 +++--- .../src/operation/download/object_meta.rs | 2 +- .../src/operation/download/service.rs | 4 +-- .../src/operation/download_objects/worker.rs | 4 +-- 7 files changed, 27 insertions(+), 27 deletions(-) rename aws-s3-transfer-manager/src/operation/download/{output.rs => body.rs} (93%) diff --git a/aws-s3-transfer-manager/examples/cp.rs b/aws-s3-transfer-manager/examples/cp.rs index e7cdd25..509426b 100644 --- a/aws-s3-transfer-manager/examples/cp.rs +++ b/aws-s3-transfer-manager/examples/cp.rs @@ -10,7 +10,7 @@ use std::time; use aws_s3_transfer_manager::io::InputStream; use aws_s3_transfer_manager::metrics::unit::ByteUnit; use aws_s3_transfer_manager::metrics::Throughput; -use aws_s3_transfer_manager::operation::download::output::DownloadOutput; +use aws_s3_transfer_manager::operation::download::body::Body; use aws_s3_transfer_manager::types::{ConcurrencySetting, PartSize}; use aws_sdk_s3::error::DisplayErrorContext; use bytes::Buf; @@ -263,7 +263,7 @@ async fn main() -> Result<(), BoxError> { Ok(()) } -async fn write_body(body: &mut DownloadOutput, mut dest: fs::File) -> Result<(), BoxError> { +async fn write_body(body: &mut Body, mut dest: fs::File) -> Result<(), BoxError> { while let Some(chunk) = body.next().await { let chunk = chunk.unwrap().data; tracing::trace!("recv'd chunk remaining={}", chunk.remaining()); diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 3f02b0d..1fe6960 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -12,7 +12,7 @@ pub use input::{DownloadInput, DownloadInputBuilder}; /// Operation builders pub mod builders; /// Abstractions for response bodies and consuming data streams. -pub mod output; +pub mod body; mod discovery; @@ -28,7 +28,7 @@ use crate::error; use crate::runtime::scheduler::OwnedWorkPermit; use aws_smithy_types::byte_stream::ByteStream; use discovery::discover_obj; -use output::{AggregatedBytes, ChunkResponse, DownloadOutput}; +use body::{AggregatedBytes, ChunkResponse, Body}; use service::distribute_work; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; @@ -81,7 +81,7 @@ impl Download { )); Ok(DownloadHandle { - body: DownloadOutput::new(comp_rx), + body: Body::new(comp_rx), tasks, discovery, object_meta_receiver: Mutex::new(Some(meta_rx)), diff --git a/aws-s3-transfer-manager/src/operation/download/output.rs b/aws-s3-transfer-manager/src/operation/download/body.rs similarity index 93% rename from aws-s3-transfer-manager/src/operation/download/output.rs rename to aws-s3-transfer-manager/src/operation/download/body.rs index 3fcb0c1..53bb583 100644 --- a/aws-s3-transfer-manager/src/operation/download/output.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -85,8 +85,8 @@ impl Buf for AggregatedBytes { /// Wraps potentially multiple streams of binary data into a single coherent stream. /// The data on this stream is sequenced into the correct order. #[derive(Debug)] -pub struct DownloadOutput { - inner: UnorderedOutput, +pub struct Body { + inner: UnorderedBody, sequencer: Sequencer, } @@ -104,8 +104,8 @@ pub struct ChunkResponse { pub metadata: ChunkMetadata, } -impl DownloadOutput { - /// Create a new empty output +impl Body { + /// Create a new empty body pub fn empty() -> Self { Self::new_from_channel(None) } @@ -116,7 +116,7 @@ impl DownloadOutput { fn new_from_channel(chunks: Option) -> Self { Self { - inner: UnorderedOutput::new(chunks), + inner: UnorderedBody::new(chunks), sequencer: Sequencer::new(), } } @@ -124,7 +124,7 @@ impl DownloadOutput { /// Pull the next chunk of data off the stream. /// /// Returns [None] when there is no more data. - /// Chunks returned from a [DownloadOutput] are guaranteed to be sequenced + /// Chunks returned from a [Body] are guaranteed to be sequenced /// in the right order. pub async fn next(&mut self) -> Option> { // TODO(aws-sdk-rust#1159, design) - do we want ChunkResponse (or similar) rather than AggregatedBytes? Would @@ -221,11 +221,11 @@ impl PartialEq for SequencedChunk { /// A body that returns chunks in whatever order they are received. #[derive(Debug)] -pub(crate) struct UnorderedOutput { +pub(crate) struct UnorderedBody { chunks: Option>>, } -impl UnorderedOutput { +impl UnorderedBody { fn new(chunks: Option) -> Self { Self { chunks } } @@ -233,7 +233,7 @@ impl UnorderedOutput { /// Pull the next chunk of data off the stream. /// /// Returns [None] when there is no more data. - /// Chunks returned from an [UnorderedOutput] are not guaranteed to be sorted + /// Chunks returned from an [UnorderedBody] are not guaranteed to be sorted /// in the right order. Consumers are expected to sort the data themselves /// using the chunk sequence number (starting from zero). pub(crate) async fn next(&mut self) -> Option> { @@ -253,12 +253,12 @@ impl UnorderedOutput { #[cfg(test)] mod tests { - use crate::{error, operation::download::output::ChunkResponse}; + use crate::{error, operation::download::body::ChunkResponse}; use bytes::Bytes; use bytes_utils::SegmentedBuf; use tokio::sync::mpsc; - use super::{AggregatedBytes, DownloadOutput, Sequencer}; + use super::{AggregatedBytes, Body, Sequencer}; fn chunk_resp(seq: u64, data: AggregatedBytes) -> ChunkResponse { ChunkResponse { @@ -281,7 +281,7 @@ mod tests { #[tokio::test] async fn test_body_next() { let (tx, rx) = mpsc::channel(2); - let mut body = DownloadOutput::new(rx); + let mut body = Body::new(rx); tokio::spawn(async move { let seq = vec![2, 0, 1]; for i in seq { @@ -307,7 +307,7 @@ mod tests { #[tokio::test] async fn test_body_next_error() { let (tx, rx) = mpsc::channel(2); - let mut body = DownloadOutput::new(rx); + let mut body = Body::new(rx); tokio::spawn(async move { let data = Bytes::from("chunk 0".to_string()); let mut aggregated = SegmentedBuf::new(); diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index 67394ad..e79e185 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -10,7 +10,7 @@ use tokio::{ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 */ -use crate::operation::download::output::DownloadOutput; +use crate::operation::download::body::Body; use super::object_meta::ObjectMetadata; @@ -23,7 +23,7 @@ pub struct DownloadHandle { pub(crate) object_meta: OnceCell, /// The object content - pub(crate) body: DownloadOutput, + pub(crate) body: Body, /// Discovery task pub(crate) discovery: task::JoinHandle>, @@ -47,12 +47,12 @@ impl DownloadHandle { } /// Object content - pub fn body(&self) -> &DownloadOutput { + pub fn body(&self) -> &Body { &self.body } /// Mutable reference to the body - pub fn body_mut(&mut self) -> &mut DownloadOutput { + pub fn body_mut(&mut self) -> &mut Body { &mut self.body } diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index 149da6e..1ef75d9 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -54,7 +54,7 @@ pub struct ObjectMetadata { pub request_id: Option, // extended_request_id if the client made a request to get object metadata, like HeadObject. pub extended_request_id: Option, - // whether the request that client made only to object metadata, like HeadObject. + // whether the request that client made only to get object metadata, like HeadObject. pub request_charged: Option, } diff --git a/aws-s3-transfer-manager/src/operation/download/service.rs b/aws-s3-transfer-manager/src/operation/download/service.rs index 7064a3e..fa45334 100644 --- a/aws-s3-transfer-manager/src/operation/download/service.rs +++ b/aws-s3-transfer-manager/src/operation/download/service.rs @@ -17,8 +17,8 @@ use tokio::task; use tower::{service_fn, Service, ServiceBuilder, ServiceExt}; use tracing::Instrument; -use super::output::AggregatedBytes; -use super::output::ChunkResponse; +use super::body::AggregatedBytes; +use super::body::ChunkResponse; use super::{DownloadInput, DownloadInputBuilder}; /// Request/input type for our "chunk" service. diff --git a/aws-s3-transfer-manager/src/operation/download_objects/worker.rs b/aws-s3-transfer-manager/src/operation/download_objects/worker.rs index 492e869..6711a2d 100644 --- a/aws-s3-transfer-manager/src/operation/download_objects/worker.rs +++ b/aws-s3-transfer-manager/src/operation/download_objects/worker.rs @@ -12,7 +12,7 @@ use tokio::fs; use tokio::io::AsyncWriteExt; use crate::error; -use crate::operation::download::output::DownloadOutput; +use crate::operation::download::body::Body; use crate::operation::download::{DownloadInput, DownloadInputBuilder}; use crate::types::{DownloadFilter, FailedDownloadTransfer, FailedTransferPolicy}; @@ -144,7 +144,7 @@ async fn download_single_obj( let key_path = local_key_path(root_dir, key.as_str(), prefix, delim)?; let mut handle = crate::operation::download::Download::orchestrate(ctx.handle.clone(), input, true)?; - let mut body = mem::replace(&mut handle.body, DownloadOutput::empty()); + let mut body = mem::replace(&mut handle.body, Body::empty()); let parent_dir = key_path.parent().expect("valid parent dir for key"); fs::create_dir_all(parent_dir).await?; From e3394177f2f157b92ab129343308083cf305c1c5 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Fri, 8 Nov 2024 16:04:39 -0800 Subject: [PATCH 13/48] chunk meta can't be constructed from HeadObject --- .../src/operation/download.rs | 6 ++---- .../src/operation/download/chunk_meta.rs | 19 ++----------------- .../src/operation/download/discovery.rs | 7 +++---- .../src/operation/download/handle.rs | 11 +++++------ .../src/operation/download/object_meta.rs | 2 ++ 5 files changed, 14 insertions(+), 31 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 1fe6960..a25b614 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -157,14 +157,13 @@ async fn send_discovery( /// This allows remaining work to start immediately (and concurrently) without /// waiting for the first chunk. fn handle_discovery_chunk( - //handle: &mut DownloadHandle, tasks: &mut task::JoinSet<()>, ctx: DownloadContext, initial_chunk: Option, completed: &mpsc::Sender>, permit: OwnedWorkPermit, parent_span_for_tasks: tracing::Span, - meta_data: ChunkMetadata, + metadata: Option, ) -> u64 { if let Some(stream) = initial_chunk { let seq = ctx.next_seq(); @@ -178,7 +177,7 @@ fn handle_discovery_chunk( .map(|aggregated| ChunkResponse { seq, data: aggregated, - metadata: meta_data, + metadata: metadata.expect("metadata is available"), }) .map_err(error::discovery_failed); @@ -193,7 +192,6 @@ fn handle_discovery_chunk( } }.instrument(tracing::debug_span!(parent: parent_span_for_tasks.clone(), "collect-body-from-discovery", seq))); } - // TODO: waahm7 handle head-object metadata. Discussion point ctx.current_seq() } diff --git a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs index 1f0a474..e9787b7 100644 --- a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs @@ -4,14 +4,15 @@ */ use aws_sdk_s3::operation::get_object::GetObjectOutput; -use aws_sdk_s3::operation::head_object::HeadObjectOutput; use aws_sdk_s3::operation::RequestId; use aws_sdk_s3::operation::RequestIdExt; // TODO(aws-sdk-rust#1159,design): how many of these fields should we expose? // TODO(aws-sdk-rust#1159,docs): Document fields + /// Object metadata other than the body +/// SDK type #[derive(Debug, Clone, Default)] pub struct ChunkMetadata { pub request_id: Option, @@ -42,19 +43,3 @@ impl From for ChunkMetadata { } } -impl From for ChunkMetadata { - fn from(value: HeadObjectOutput) -> Self { - Self { - request_id: value.request_id().map(|s| s.to_string()), - extended_request_id: value.extended_request_id().map(|s| s.to_string()), - // Do not include Content-Length as it will not be accurate as chunk metadata - content_length: None, - content_range: None, - checksum_crc32: value.checksum_crc32, - checksum_crc32_c: value.checksum_crc32_c, - checksum_sha1: value.checksum_sha1, - checksum_sha256: value.checksum_sha256, - request_charged: value.request_charged, - } - } -} diff --git a/aws-s3-transfer-manager/src/operation/download/discovery.rs b/aws-s3-transfer-manager/src/operation/download/discovery.rs index 3bed1d2..11fca33 100644 --- a/aws-s3-transfer-manager/src/operation/download/discovery.rs +++ b/aws-s3-transfer-manager/src/operation/download/discovery.rs @@ -36,7 +36,7 @@ pub(super) struct ObjectDiscovery { pub(super) remaining: RangeInclusive, /// the discovered metadata - pub(super) chunk_meta: ChunkMetadata, + pub(super) chunk_meta: Option, pub(super) object_meta: ObjectMetadata, /// the first chunk of data if fetched during discovery @@ -122,7 +122,6 @@ async fn discover_obj_with_head( .await .map_err(error::discovery_failed)?; let object_meta: ObjectMetadata = (&resp).into(); - let chunk_meta: ChunkMetadata = resp.into(); let remaining = match byte_range { Some(range) => match range { @@ -135,7 +134,7 @@ async fn discover_obj_with_head( Ok(ObjectDiscovery { remaining, - chunk_meta, + chunk_meta: None, object_meta, initial_chunk: None, }) @@ -174,7 +173,7 @@ async fn discover_obj_with_get( Ok(ObjectDiscovery { remaining, - chunk_meta, + chunk_meta: Some(chunk_meta), object_meta, initial_chunk, }) diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index e79e185..5179018 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -35,15 +35,14 @@ pub struct DownloadHandle { impl DownloadHandle { /// Object metadata pub async fn object_meta(&self) -> Result<&ObjectMetadata, error::Error> { - if !self.object_meta.initialized() { + let meta = self.object_meta.get_or_try_init(|| async { let mut object_meta_receiver = self.object_meta_receiver.lock().await; let object_meta_receiver = object_meta_receiver.take().unwrap(); - let meta = object_meta_receiver.await.map_err(error::from_kind(ErrorKind::ObjectNotDiscoverable))?; - // TODO: handle error - let _ = self.object_meta.set(meta); - } + object_meta_receiver.await.map_err(error::from_kind(ErrorKind::ObjectNotDiscoverable)) + + }).await?; - return Ok(self.object_meta.get().unwrap()); + Ok(meta) } /// Object content diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index 1ef75d9..e195260 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -15,12 +15,14 @@ use aws_sdk_s3::operation::RequestIdExt; #[derive(Debug, Clone, Default)] pub struct ObjectMetadata { pub delete_marker: Option, + // TODO: remove pub accept_ranges: Option, pub expiration: Option, pub restore: Option, pub last_modified: Option<::aws_smithy_types::DateTime>, pub(crate) content_length: Option, pub e_tag: Option, + // todo: wrong for first part pub checksum_crc32: Option, pub checksum_crc32_c: Option, pub checksum_sha1: Option, From 0de1530065f6537452f34b2c84c1dc4911c9650b Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Sun, 10 Nov 2024 10:35:13 -0800 Subject: [PATCH 14/48] fix todos --- .../src/operation/download.rs | 3 +- .../src/operation/download/body.rs | 12 +++- .../src/operation/download/chunk_meta.rs | 71 ++++++++++++++++--- .../src/operation/download/discovery.rs | 2 +- .../src/operation/download/object_meta.rs | 71 +++++++------------ 5 files changed, 99 insertions(+), 60 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index a25b614..a705eb5 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -98,7 +98,6 @@ async fn send_discovery( input: DownloadInput, use_current_span_as_parent_for_tasks: bool, ) -> Result<(), crate::error::Error> { - let mut tasks = tasks.lock().await; // create span to serve as parent of spawned child tasks. let parent_span_for_tasks = tracing::debug_span!( @@ -118,11 +117,11 @@ async fn send_discovery( // make initial discovery about the object size, metadata, possibly first chunk let mut discovery = discover_obj(&ctx, &input).await?; - // TODO: waahm7 fix let _ = meta_tx.send(discovery.object_meta); let initial_chunk = discovery.initial_chunk.take(); + let mut tasks = tasks.lock().await; // spawn a task (if necessary) to handle the discovery chunk. This returns immediately so // that we can begin concurrently downloading any remaining chunks/parts ASAP let start_seq = handle_discovery_chunk( diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index 53bb583..459ad19 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -16,7 +16,13 @@ use crate::error::ErrorKind; use super::chunk_meta::ChunkMetadata; #[derive(Debug, Clone)] -/// TODO: waahm7 doc + +/// Non-contiguous Binary Data Storage +/// +/// When data is read from the network, it is read in a sequence of chunks that are not in +/// contiguous memory. [`AggregatedBytes`](crate::byte_stream::AggregatedBytes) provides a view of +/// this data via [`impl Buf`](bytes::Buf) or it can be copied into contiguous storage with +/// [`.into_bytes()`](crate::byte_stream::AggregatedBytes::into_bytes). pub struct AggregatedBytes(SegmentedBuf); impl AggregatedBytes { @@ -43,8 +49,8 @@ impl AggregatedBytes { self.0.into_inner().into_iter().flatten().collect() } - /// TODO: waahm7 doc - pub async fn from_byte_stream(value: ByteStream) -> Result { + /// Make this buffer from a ByteStream + pub(crate) async fn from_byte_stream(value: ByteStream) -> Result { let mut value = value; let mut output = SegmentedBuf::new(); while let Some(buf) = value.next().await { diff --git a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs index e9787b7..5ef5c08 100644 --- a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs @@ -7,38 +7,91 @@ use aws_sdk_s3::operation::get_object::GetObjectOutput; use aws_sdk_s3::operation::RequestId; use aws_sdk_s3::operation::RequestIdExt; -// TODO(aws-sdk-rust#1159,design): how many of these fields should we expose? -// TODO(aws-sdk-rust#1159,docs): Document fields - - -/// Object metadata other than the body -/// SDK type +/// request metadata other than the body that will be set from `GetObject` #[derive(Debug, Clone, Default)] pub struct ChunkMetadata { pub request_id: Option, pub extended_request_id: Option, + pub delete_marker: Option, + pub accept_ranges: Option, + pub expiration: Option, + pub restore: Option, + pub last_modified: Option<::aws_smithy_types::DateTime>, pub content_length: Option, - pub content_range: Option, + pub e_tag: Option, pub checksum_crc32: Option, pub checksum_crc32_c: Option, pub checksum_sha1: Option, pub checksum_sha256: Option, + pub missing_meta: Option, + pub version_id: Option, + pub cache_control: Option, + pub content_disposition: Option, + pub content_encoding: Option, + pub content_language: Option, + pub content_range: Option, + pub content_type: Option, + pub expires: Option<::aws_smithy_types::DateTime>, + pub expires_string: Option, + pub website_redirect_location: Option, + pub server_side_encryption: Option, + pub metadata: Option<::std::collections::HashMap>, + pub sse_customer_algorithm: Option, + pub sse_customer_key_md5: Option, + pub ssekms_key_id: Option, + pub bucket_key_enabled: Option, + pub storage_class: Option, pub request_charged: Option, + pub replication_status: Option, + pub parts_count: Option, + pub tag_count: Option, + pub object_lock_mode: Option, + pub object_lock_retain_until_date: Option<::aws_smithy_types::DateTime>, + pub object_lock_legal_hold_status: Option, } impl From for ChunkMetadata { fn from(value: GetObjectOutput) -> Self { Self { - // TODO: waahm7 should we just impl the traits? why traits? request_id: value.request_id().map(|s| s.to_string()), extended_request_id: value.extended_request_id().map(|s| s.to_string()), + delete_marker: value.delete_marker, + accept_ranges: value.accept_ranges, + expiration: value.expiration, + restore: value.restore, + last_modified: value.last_modified, content_length: value.content_length, - content_range: value.content_range, + e_tag: value.e_tag, checksum_crc32: value.checksum_crc32, checksum_crc32_c: value.checksum_crc32_c, checksum_sha1: value.checksum_sha1, checksum_sha256: value.checksum_sha256, + missing_meta: value.missing_meta, + version_id: value.version_id, + cache_control: value.cache_control, + content_disposition: value.content_disposition, + content_encoding: value.content_encoding, + content_language: value.content_language, + content_range: value.content_range, + content_type: value.content_type, + #[allow(deprecated)] + expires: value.expires, + expires_string: value.expires_string, + website_redirect_location: value.website_redirect_location, + server_side_encryption: value.server_side_encryption, + metadata: value.metadata, + sse_customer_algorithm: value.sse_customer_algorithm, + sse_customer_key_md5: value.sse_customer_key_md5, + ssekms_key_id: value.ssekms_key_id, + bucket_key_enabled: value.bucket_key_enabled, + storage_class: value.storage_class, request_charged: value.request_charged, + replication_status: value.replication_status, + parts_count: value.parts_count, + tag_count: value.tag_count, + object_lock_mode: value.object_lock_mode, + object_lock_retain_until_date: value.object_lock_retain_until_date, + object_lock_legal_hold_status: value.object_lock_legal_hold_status, } } } diff --git a/aws-s3-transfer-manager/src/operation/download/discovery.rs b/aws-s3-transfer-manager/src/operation/download/discovery.rs index 11fca33..8248bc1 100644 --- a/aws-s3-transfer-manager/src/operation/download/discovery.rs +++ b/aws-s3-transfer-manager/src/operation/download/discovery.rs @@ -121,7 +121,7 @@ async fn discover_obj_with_head( .send() .await .map_err(error::discovery_failed)?; - let object_meta: ObjectMetadata = (&resp).into(); + let object_meta: ObjectMetadata = resp.into(); let remaining = match byte_range { Some(range) => match range { diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index e195260..158bcf1 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -15,18 +15,11 @@ use aws_sdk_s3::operation::RequestIdExt; #[derive(Debug, Clone, Default)] pub struct ObjectMetadata { pub delete_marker: Option, - // TODO: remove - pub accept_ranges: Option, pub expiration: Option, pub restore: Option, pub last_modified: Option<::aws_smithy_types::DateTime>, pub(crate) content_length: Option, pub e_tag: Option, - // todo: wrong for first part - pub checksum_crc32: Option, - pub checksum_crc32_c: Option, - pub checksum_sha1: Option, - pub checksum_sha256: Option, pub missing_meta: Option, pub version_id: Option, pub cache_control: Option, @@ -47,7 +40,6 @@ pub struct ObjectMetadata { pub storage_class: Option, pub replication_status: Option, pub parts_count: Option, - pub tag_count: Option, pub object_lock_mode: Option, pub object_lock_retain_until_date: Option<::aws_smithy_types::DateTime>, pub object_lock_legal_hold_status: Option, @@ -81,16 +73,11 @@ impl From<&GetObjectOutput> for ObjectMetadata { fn from(value: &GetObjectOutput) -> Self { Self { delete_marker: value.delete_marker, - accept_ranges: value.accept_ranges.clone(), expiration: value.expiration.clone(), restore: value.restore.clone(), last_modified: value.last_modified, content_length: value.content_length, e_tag: value.e_tag.clone(), - checksum_crc32: value.checksum_crc32.clone(), - checksum_crc32_c: value.checksum_crc32_c.clone(), - checksum_sha1: value.checksum_sha1.clone(), - checksum_sha256: value.checksum_sha256.clone(), missing_meta: value.missing_meta, version_id: value.version_id.clone(), cache_control: value.cache_control.clone(), @@ -112,7 +99,6 @@ impl From<&GetObjectOutput> for ObjectMetadata { storage_class: value.storage_class.clone(), replication_status: value.replication_status.clone(), parts_count: value.parts_count, - tag_count: value.tag_count, object_lock_mode: value.object_lock_mode.clone(), object_lock_retain_until_date: value.object_lock_retain_until_date, object_lock_legal_hold_status: value.object_lock_legal_hold_status.clone(), @@ -123,48 +109,43 @@ impl From<&GetObjectOutput> for ObjectMetadata { } } -impl From<&HeadObjectOutput> for ObjectMetadata { - fn from(value: &HeadObjectOutput) -> Self { +impl From for ObjectMetadata { + fn from(value: HeadObjectOutput) -> Self { Self { + request_id: value.request_id().map(|s| s.to_string()), + extended_request_id: value.extended_request_id().map(|s| s.to_string()), + request_charged: value.request_charged, delete_marker: value.delete_marker, - accept_ranges: value.accept_ranges.clone(), - expiration: value.expiration.clone(), - restore: value.restore.clone(), + // accept_ranges: value.accept_ranges, + expiration: value.expiration, + restore: value.restore, last_modified: value.last_modified, content_length: value.content_length, - e_tag: value.e_tag.clone(), - checksum_crc32: value.checksum_crc32.clone(), - checksum_crc32_c: value.checksum_crc32_c.clone(), - checksum_sha1: value.checksum_sha1.clone(), - checksum_sha256: value.checksum_sha256.clone(), + e_tag: value.e_tag, missing_meta: value.missing_meta, - version_id: value.version_id.clone(), - cache_control: value.cache_control.clone(), - content_disposition: value.content_disposition.clone(), - content_encoding: value.content_encoding.clone(), - content_language: value.content_language.clone(), + version_id: value.version_id, + cache_control: value.cache_control, + content_disposition: value.content_disposition, + content_encoding: value.content_encoding, + content_language: value.content_language, content_range: None, - content_type: value.content_type.clone(), + content_type: value.content_type, #[allow(deprecated)] expires: value.expires, - expires_string: value.expires_string.clone(), - website_redirect_location: value.website_redirect_location.clone(), - server_side_encryption: value.server_side_encryption.clone(), - metadata: value.metadata.clone(), - sse_customer_algorithm: value.sse_customer_algorithm.clone(), - sse_customer_key_md5: value.sse_customer_key_md5.clone(), - ssekms_key_id: value.ssekms_key_id.clone(), + expires_string: value.expires_string, + website_redirect_location: value.website_redirect_location, + server_side_encryption: value.server_side_encryption, + metadata: value.metadata, + sse_customer_algorithm: value.sse_customer_algorithm, + sse_customer_key_md5: value.sse_customer_key_md5, + ssekms_key_id: value.ssekms_key_id, bucket_key_enabled: value.bucket_key_enabled, - storage_class: value.storage_class.clone(), - replication_status: value.replication_status.clone(), + storage_class: value.storage_class, + replication_status: value.replication_status, parts_count: value.parts_count, - tag_count: None, - object_lock_mode: value.object_lock_mode.clone(), + object_lock_mode: value.object_lock_mode, object_lock_retain_until_date: value.object_lock_retain_until_date, - object_lock_legal_hold_status: value.object_lock_legal_hold_status.clone(), - request_id: value.request_id().map(|s| s.to_string()), - extended_request_id: value.extended_request_id().map(|s| s.to_string()), - request_charged: value.request_charged.clone(), + object_lock_legal_hold_status: value.object_lock_legal_hold_status, } } } From e824e230e0b7904f16ba32ed49d3110a0f35a008 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Sun, 10 Nov 2024 10:43:06 -0800 Subject: [PATCH 15/48] unordered body is useful --- .../src/operation/download/body.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index 459ad19..9a6a4c8 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -98,15 +98,16 @@ pub struct Body { type BodyChannel = mpsc::Receiver>; +/// Contains body and metadata for each GetObject call made. This will be delivered sequentially +/// in-order. #[derive(Debug, Clone)] -/// TODO: DOcs pub struct ChunkResponse { // TODO(aws-sdk-rust#1159, design) - consider PartialOrd for ChunkResponse and hiding `seq` as internal only detail // the seq number pub(crate) seq: u64, - /// data: chunk data + /// data: body of the object pub data: AggregatedBytes, - /// metadata + /// metadata: metadata returned by the S3. pub metadata: ChunkMetadata, } @@ -127,14 +128,20 @@ impl Body { } } + /// Convert this body into an unordered stream of chunks. + // TODO(aws-sdk-rust#1159) - revisit if we actually need/use unordered data stream. + // download_objects should utilize this so that it can write in parallel to files. + #[allow(dead_code)] + pub(crate) fn unordered(self) -> UnorderedBody { + self.inner + } + /// Pull the next chunk of data off the stream. /// /// Returns [None] when there is no more data. /// Chunks returned from a [Body] are guaranteed to be sequenced /// in the right order. pub async fn next(&mut self) -> Option> { - // TODO(aws-sdk-rust#1159, design) - do we want ChunkResponse (or similar) rather than AggregatedBytes? Would - // make additional retries of an individual chunk/part more feasible (though theoretically already exhausted retries) loop { if self.sequencer.is_ordered() { break; From abafe4e6a387a638e31e9be7cf4f7069b8cc2dc4 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Sun, 10 Nov 2024 10:51:47 -0800 Subject: [PATCH 16/48] fix docs --- aws-s3-transfer-manager/src/operation/download/body.rs | 1 + .../src/operation/download/chunk_meta.rs | 1 + aws-s3-transfer-manager/src/operation/download/handle.rs | 3 +-- .../src/operation/download/object_meta.rs | 8 ++++---- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index 9a6a4c8..ff8818d 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -101,6 +101,7 @@ type BodyChannel = mpsc::Receiver>; /// Contains body and metadata for each GetObject call made. This will be delivered sequentially /// in-order. #[derive(Debug, Clone)] +#[non_exhaustive] pub struct ChunkResponse { // TODO(aws-sdk-rust#1159, design) - consider PartialOrd for ChunkResponse and hiding `seq` as internal only detail // the seq number diff --git a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs index 5ef5c08..05dfded 100644 --- a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs @@ -8,6 +8,7 @@ use aws_sdk_s3::operation::RequestId; use aws_sdk_s3::operation::RequestIdExt; /// request metadata other than the body that will be set from `GetObject` +// TODO: Document fields #[derive(Debug, Clone, Default)] pub struct ChunkMetadata { pub request_id: Option, diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index 5179018..fbc0d2f 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -28,7 +28,7 @@ pub struct DownloadHandle { /// Discovery task pub(crate) discovery: task::JoinHandle>, - /// All child tasks spawned for this download + /// All child tasks (ranged GetObject) spawned for this download pub(crate) tasks: Arc>>, } @@ -39,7 +39,6 @@ impl DownloadHandle { let mut object_meta_receiver = self.object_meta_receiver.lock().await; let object_meta_receiver = object_meta_receiver.take().unwrap(); object_meta_receiver.await.map_err(error::from_kind(ErrorKind::ObjectNotDiscoverable)) - }).await?; Ok(meta) diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index 158bcf1..54b50ff 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -11,7 +11,7 @@ use aws_sdk_s3::operation::RequestIdExt; // TODO(aws-sdk-rust#1159,design): how many of these fields should we expose? // TODO(aws-sdk-rust#1159,docs): Document fields -/// Object metadata other than the body +/// Object metadata other than the body that can be set from either `GetObject` or `HeadObject` #[derive(Debug, Clone, Default)] pub struct ObjectMetadata { pub delete_marker: Option, @@ -44,11 +44,11 @@ pub struct ObjectMetadata { pub object_lock_retain_until_date: Option<::aws_smithy_types::DateTime>, pub object_lock_legal_hold_status: Option, - // request_id if the client made a request to get object metadata, like HeadObject. + /// The request_id if the client made a request to retrieve object metadata, such as with HeadObject. pub request_id: Option, - // extended_request_id if the client made a request to get object metadata, like HeadObject. + /// The extended_request_id if the client made a request to retrieve object metadata, such as with HeadObject. pub extended_request_id: Option, - // whether the request that client made only to get object metadata, like HeadObject. + /// Indicates whether the request was charged for a request made only to retrieve object metadata, such as HeadObject. pub request_charged: Option, } From ea2ce9c5a98231efa94b15ac5c50914558d9cf92 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Sun, 10 Nov 2024 10:54:50 -0800 Subject: [PATCH 17/48] nit --- aws-s3-transfer-manager/src/operation/download.rs | 14 ++++++-------- .../src/operation/download/body.rs | 2 +- .../src/operation/download/builders.rs | 5 +---- .../src/operation/download/chunk_meta.rs | 1 - .../src/operation/download/discovery.rs | 2 +- .../src/operation/download/handle.rs | 15 ++++++++++----- 6 files changed, 19 insertions(+), 20 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index a705eb5..5239684 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -9,10 +9,10 @@ use aws_sdk_s3::error::DisplayErrorContext; /// Request type for dowloading a single object from Amazon S3 pub use input::{DownloadInput, DownloadInputBuilder}; -/// Operation builders -pub mod builders; /// Abstractions for response bodies and consuming data streams. pub mod body; +/// Operation builders +pub mod builders; mod discovery; @@ -27,15 +27,15 @@ mod service; use crate::error; use crate::runtime::scheduler::OwnedWorkPermit; use aws_smithy_types::byte_stream::ByteStream; +use body::{AggregatedBytes, Body, ChunkResponse}; +use chunk_meta::ChunkMetadata; use discovery::discover_obj; -use body::{AggregatedBytes, ChunkResponse, Body}; +use object_meta::ObjectMetadata; use service::distribute_work; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use tokio::sync::{mpsc, oneshot, Mutex, OnceCell}; use tokio::task::{self, JoinSet}; -use chunk_meta::ChunkMetadata; -use object_meta::ObjectMetadata; use super::TransferContext; @@ -98,7 +98,6 @@ async fn send_discovery( input: DownloadInput, use_current_span_as_parent_for_tasks: bool, ) -> Result<(), crate::error::Error> { - // create span to serve as parent of spawned child tasks. let parent_span_for_tasks = tracing::debug_span!( parent: if use_current_span_as_parent_for_tasks { tracing::Span::current().id() } else { None } , @@ -112,7 +111,7 @@ async fn send_discovery( } // acquire a permit for discovery - // TODO: Verify, if this fails we are not stuck. + // TODO: Verify, if this fails we are not stuck. let permit = ctx.handle.scheduler.acquire_permit().await?; // make initial discovery about the object size, metadata, possibly first chunk @@ -170,7 +169,6 @@ fn handle_discovery_chunk( // spawn a task to actually read the discovery chunk without waiting for it so we // can get started sooner on any remaining work (if any) tasks.spawn(async move { - let chunk = AggregatedBytes::from_byte_stream(stream) .await .map(|aggregated| ChunkResponse { diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index ff8818d..c557f17 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -49,7 +49,7 @@ impl AggregatedBytes { self.0.into_inner().into_iter().flatten().collect() } - /// Make this buffer from a ByteStream + /// Make this buffer from a ByteStream pub(crate) async fn from_byte_stream(value: ByteStream) -> Result { let mut value = value; let mut output = SegmentedBuf::new(); diff --git a/aws-s3-transfer-manager/src/operation/download/builders.rs b/aws-s3-transfer-manager/src/operation/download/builders.rs index 2405680..c62b997 100644 --- a/aws-s3-transfer-manager/src/operation/download/builders.rs +++ b/aws-s3-transfer-manager/src/operation/download/builders.rs @@ -524,10 +524,7 @@ impl DownloadFluentBuilder { impl crate::operation::download::input::DownloadInputBuilder { /// Initiate a download transfer for a single object with this input using the given client. - pub fn send_with( - self, - client: &crate::Client, - ) -> Result { + pub fn send_with(self, client: &crate::Client) -> Result { let mut fluent_builder = client.download(); fluent_builder.inner = self; fluent_builder.send() diff --git a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs index 05dfded..7493278 100644 --- a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs @@ -96,4 +96,3 @@ impl From for ChunkMetadata { } } } - diff --git a/aws-s3-transfer-manager/src/operation/download/discovery.rs b/aws-s3-transfer-manager/src/operation/download/discovery.rs index 8248bc1..06e83f3 100644 --- a/aws-s3-transfer-manager/src/operation/download/discovery.rs +++ b/aws-s3-transfer-manager/src/operation/download/discovery.rs @@ -113,7 +113,7 @@ async fn discover_obj_with_head( input: &DownloadInput, byte_range: Option, ) -> Result { - let resp = ctx + let resp = ctx .client() .head_object() .set_bucket(input.bucket().map(str::to_string)) diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index fbc0d2f..12f3876 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -35,11 +35,16 @@ pub struct DownloadHandle { impl DownloadHandle { /// Object metadata pub async fn object_meta(&self) -> Result<&ObjectMetadata, error::Error> { - let meta = self.object_meta.get_or_try_init(|| async { - let mut object_meta_receiver = self.object_meta_receiver.lock().await; - let object_meta_receiver = object_meta_receiver.take().unwrap(); - object_meta_receiver.await.map_err(error::from_kind(ErrorKind::ObjectNotDiscoverable)) - }).await?; + let meta = self + .object_meta + .get_or_try_init(|| async { + let mut object_meta_receiver = self.object_meta_receiver.lock().await; + let object_meta_receiver = object_meta_receiver.take().unwrap(); + object_meta_receiver + .await + .map_err(error::from_kind(ErrorKind::ObjectNotDiscoverable)) + }) + .await?; Ok(meta) } From f1cc638be5d25175b732087be37a44fc48700cb5 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Sun, 10 Nov 2024 10:58:40 -0800 Subject: [PATCH 18/48] fix test --- .../src/operation/download/object_meta.rs | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index 54b50ff..f2c080a 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -152,23 +152,22 @@ impl From for ObjectMetadata { #[cfg(test)] mod tests { - // use super::ObjectMetadata; + use super::ObjectMetadata; - // #[test] - // fn test_inferred_content_length() { - // let meta = ObjectMetadata { - // content_length: Some(4), - // content_range: Some("should ignore".to_owned()), - // ..Default::default() - // }; + #[test] + fn test_inferred_total_size() { + let meta = ObjectMetadata { + content_length: Some(15), + ..Default::default() + }; - // assert_eq!(4, meta.content_length()); + assert_eq!(15, meta.total_size()); - // let meta = ObjectMetadata { - // content_length: None, - // content_range: Some("bytes 0-499/900".to_owned()), - // ..Default::default() - // }; - // assert_eq!(500, meta.content_length()); - // } + let meta = ObjectMetadata { + content_range: Some("bytes 0-499/900".to_owned()), + content_length: Some(500), + ..Default::default() + }; + assert_eq!(900, meta.total_size()); + } } From db9bd427b6fff5b6a44d5860feeb27898b9ea5a8 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Sun, 10 Nov 2024 12:38:19 -0800 Subject: [PATCH 19/48] fix tests --- aws-s3-transfer-manager/src/client.rs | 3 +- .../src/operation/download_objects/worker.rs | 1 + .../tests/download_test.rs | 31 ++++++++++++++++++- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/aws-s3-transfer-manager/src/client.rs b/aws-s3-transfer-manager/src/client.rs index 9f6ffd4..b7a82ce 100644 --- a/aws-s3-transfer-manager/src/client.rs +++ b/aws-s3-transfer-manager/src/client.rs @@ -136,8 +136,7 @@ impl Client { /// .download() /// .bucket("my-bucket") /// .key("my-key") - /// .send() - /// .await?; + /// .send()?; /// /// // process data off handle... /// diff --git a/aws-s3-transfer-manager/src/operation/download_objects/worker.rs b/aws-s3-transfer-manager/src/operation/download_objects/worker.rs index 6711a2d..edb85b3 100644 --- a/aws-s3-transfer-manager/src/operation/download_objects/worker.rs +++ b/aws-s3-transfer-manager/src/operation/download_objects/worker.rs @@ -144,6 +144,7 @@ async fn download_single_obj( let key_path = local_key_path(root_dir, key.as_str(), prefix, delim)?; let mut handle = crate::operation::download::Download::orchestrate(ctx.handle.clone(), input, true)?; + let _ = handle.object_meta().await?; let mut body = mem::replace(&mut handle.body, Body::empty()); let parent_dir = key_path.parent().expect("valid parent dir for key"); diff --git a/aws-s3-transfer-manager/tests/download_test.rs b/aws-s3-transfer-manager/tests/download_test.rs index a8003ae..626526c 100644 --- a/aws-s3-transfer-manager/tests/download_test.rs +++ b/aws-s3-transfer-manager/tests/download_test.rs @@ -70,13 +70,18 @@ fn simple_object_connector(data: &Bytes, part_size: usize) -> StaticReplayClient .enumerate() .map(|(idx, chunk)| { let start = idx * part_size; - let end = part_size * (idx + 1) - 1; + let end = std::cmp::min(start+part_size, data.len()) - 1; + eprintln!("{},{}-{}", end-start+1, start,end); ReplayEvent::new( // NOTE: Rather than try to recreate all the expected requests we just put in placeholders and // make our own assertions against the captured requests. dummy_expected_request(), http_02x::Response::builder() .status(200) + .header( + "Content-Length", + format!("{}", end-start+1), + ) .header( "Content-Range", format!("bytes {start}-{end}/{}", data.len()), @@ -227,6 +232,10 @@ async fn test_retry_failed_chunk() { dummy_expected_request(), http_02x::Response::builder() .status(200) + .header( + "Content-Length", + format!("{}", part_size), + ) .header( "Content-Range", format!("bytes 0-{}/{}", part_size - 1, data.len()), @@ -239,6 +248,10 @@ async fn test_retry_failed_chunk() { dummy_expected_request(), http_02x::Response::builder() .status(200) + .header( + "Content-Length", + format!("{}", data.len() - part_size), + ) .header( "Content-Range", format!("bytes {}-{}/{}", part_size, data.len(), data.len()), @@ -255,6 +268,10 @@ async fn test_retry_failed_chunk() { dummy_expected_request(), http_02x::Response::builder() .status(200) + .header( + "Content-Length", + format!("{}", data.len() - part_size), + ) .header( "Content-Range", format!("bytes {}-{}/{}", part_size, data.len(), data.len()), @@ -301,6 +318,10 @@ async fn test_non_retryable_error() { dummy_expected_request(), http_02x::Response::builder() .status(200) + .header( + "Content-Length", + format!("{}", part_size), + ) .header( "Content-Range", format!("bytes 0-{}/{}", part_size - 1, data.len()), @@ -347,6 +368,10 @@ async fn test_retry_max_attempts() { dummy_expected_request(), http_02x::Response::builder() .status(200) + .header( + "Content-Length", + format!("{}", part_size), + ) .header( "Content-Range", format!("bytes {}-{}/{}", part_size, data.len(), data.len()), @@ -366,6 +391,10 @@ async fn test_retry_max_attempts() { dummy_expected_request(), http_02x::Response::builder() .status(200) + .header( + "Content-Length", + format!("{}", part_size), + ) .header( "Content-Range", format!("bytes 0-{}/{}", part_size - 1, data.len()), From ee590556981b84f6a19c958d1083c3d012be46b1 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Sun, 10 Nov 2024 12:39:15 -0800 Subject: [PATCH 20/48] cleanup --- aws-s3-transfer-manager/tests/download_test.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/aws-s3-transfer-manager/tests/download_test.rs b/aws-s3-transfer-manager/tests/download_test.rs index 626526c..f3ebed7 100644 --- a/aws-s3-transfer-manager/tests/download_test.rs +++ b/aws-s3-transfer-manager/tests/download_test.rs @@ -71,7 +71,6 @@ fn simple_object_connector(data: &Bytes, part_size: usize) -> StaticReplayClient .map(|(idx, chunk)| { let start = idx * part_size; let end = std::cmp::min(start+part_size, data.len()) - 1; - eprintln!("{},{}-{}", end-start+1, start,end); ReplayEvent::new( // NOTE: Rather than try to recreate all the expected requests we just put in placeholders and // make our own assertions against the captured requests. From 49a681a298fd30858410d66e2b54599d1c3831b8 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Sun, 10 Nov 2024 13:55:51 -0800 Subject: [PATCH 21/48] fmt --- .../tests/download_test.rs | 37 ++++--------------- 1 file changed, 8 insertions(+), 29 deletions(-) diff --git a/aws-s3-transfer-manager/tests/download_test.rs b/aws-s3-transfer-manager/tests/download_test.rs index f3ebed7..8bcddb1 100644 --- a/aws-s3-transfer-manager/tests/download_test.rs +++ b/aws-s3-transfer-manager/tests/download_test.rs @@ -70,17 +70,14 @@ fn simple_object_connector(data: &Bytes, part_size: usize) -> StaticReplayClient .enumerate() .map(|(idx, chunk)| { let start = idx * part_size; - let end = std::cmp::min(start+part_size, data.len()) - 1; + let end = std::cmp::min(start + part_size, data.len()) - 1; ReplayEvent::new( // NOTE: Rather than try to recreate all the expected requests we just put in placeholders and // make our own assertions against the captured requests. dummy_expected_request(), http_02x::Response::builder() .status(200) - .header( - "Content-Length", - format!("{}", end-start+1), - ) + .header("Content-Length", format!("{}", end - start + 1)) .header( "Content-Range", format!("bytes {start}-{end}/{}", data.len()), @@ -231,10 +228,7 @@ async fn test_retry_failed_chunk() { dummy_expected_request(), http_02x::Response::builder() .status(200) - .header( - "Content-Length", - format!("{}", part_size), - ) + .header("Content-Length", format!("{}", part_size)) .header( "Content-Range", format!("bytes 0-{}/{}", part_size - 1, data.len()), @@ -247,10 +241,7 @@ async fn test_retry_failed_chunk() { dummy_expected_request(), http_02x::Response::builder() .status(200) - .header( - "Content-Length", - format!("{}", data.len() - part_size), - ) + .header("Content-Length", format!("{}", data.len() - part_size)) .header( "Content-Range", format!("bytes {}-{}/{}", part_size, data.len(), data.len()), @@ -267,10 +258,7 @@ async fn test_retry_failed_chunk() { dummy_expected_request(), http_02x::Response::builder() .status(200) - .header( - "Content-Length", - format!("{}", data.len() - part_size), - ) + .header("Content-Length", format!("{}", data.len() - part_size)) .header( "Content-Range", format!("bytes {}-{}/{}", part_size, data.len(), data.len()), @@ -317,10 +305,7 @@ async fn test_non_retryable_error() { dummy_expected_request(), http_02x::Response::builder() .status(200) - .header( - "Content-Length", - format!("{}", part_size), - ) + .header("Content-Length", format!("{}", part_size)) .header( "Content-Range", format!("bytes 0-{}/{}", part_size - 1, data.len()), @@ -367,10 +352,7 @@ async fn test_retry_max_attempts() { dummy_expected_request(), http_02x::Response::builder() .status(200) - .header( - "Content-Length", - format!("{}", part_size), - ) + .header("Content-Length", format!("{}", part_size)) .header( "Content-Range", format!("bytes {}-{}/{}", part_size, data.len(), data.len()), @@ -390,10 +372,7 @@ async fn test_retry_max_attempts() { dummy_expected_request(), http_02x::Response::builder() .status(200) - .header( - "Content-Length", - format!("{}", part_size), - ) + .header("Content-Length", format!("{}", part_size)) .header( "Content-Range", format!("bytes 0-{}/{}", part_size - 1, data.len()), From b3bf149f2b9ef878f0979d8efc0a4e8b0a28dc5d Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Mon, 11 Nov 2024 09:07:09 -0800 Subject: [PATCH 22/48] cleanup --- aws-s3-transfer-manager/src/operation/download.rs | 2 +- aws-s3-transfer-manager/src/operation/download/body.rs | 5 ++--- .../src/operation/download/object_meta.rs | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 5239684..4fdbfc0 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -174,7 +174,7 @@ fn handle_discovery_chunk( .map(|aggregated| ChunkResponse { seq, data: aggregated, - metadata: metadata.expect("metadata is available"), + metadata: metadata.expect("chunk metadata is available"), }) .map_err(error::discovery_failed); diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index c557f17..b566583 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -15,14 +15,13 @@ use crate::error::ErrorKind; use super::chunk_meta::ChunkMetadata; -#[derive(Debug, Clone)] - /// Non-contiguous Binary Data Storage /// /// When data is read from the network, it is read in a sequence of chunks that are not in /// contiguous memory. [`AggregatedBytes`](crate::byte_stream::AggregatedBytes) provides a view of /// this data via [`impl Buf`](bytes::Buf) or it can be copied into contiguous storage with /// [`.into_bytes()`](crate::byte_stream::AggregatedBytes::into_bytes). +#[derive(Debug, Clone)] pub struct AggregatedBytes(SegmentedBuf); impl AggregatedBytes { @@ -56,7 +55,7 @@ impl AggregatedBytes { while let Some(buf) = value.next().await { match buf { Ok(buf) => output.push(buf), - Err(err) => return Err(crate::error::from_kind(ErrorKind::IOError)(err)), + Err(err) => return Err(crate::error::from_kind(ErrorKind::ChunkFailed)(err)), }; } Ok(AggregatedBytes(output)) diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index f2c080a..17594a6 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -116,7 +116,6 @@ impl From for ObjectMetadata { extended_request_id: value.extended_request_id().map(|s| s.to_string()), request_charged: value.request_charged, delete_marker: value.delete_marker, - // accept_ranges: value.accept_ranges, expiration: value.expiration, restore: value.restore, last_modified: value.last_modified, From d386e1deeaa1bbba57d9d07de6be0967d2930805 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Mon, 11 Nov 2024 09:27:36 -0800 Subject: [PATCH 23/48] fix meta_receiver bug possibility --- aws-s3-transfer-manager/src/operation/download.rs | 1 - aws-s3-transfer-manager/src/operation/download/handle.rs | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 4fdbfc0..dd65de3 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -111,7 +111,6 @@ async fn send_discovery( } // acquire a permit for discovery - // TODO: Verify, if this fails we are not stuck. let permit = ctx.handle.scheduler.acquire_permit().await?; // make initial discovery about the object size, metadata, possibly first chunk diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index 12f3876..6b0ab4c 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -39,7 +39,10 @@ impl DownloadHandle { .object_meta .get_or_try_init(|| async { let mut object_meta_receiver = self.object_meta_receiver.lock().await; - let object_meta_receiver = object_meta_receiver.take().unwrap(); + let object_meta_receiver = object_meta_receiver + .take() + .ok_or("meta_receiver is already taken") + .map_err(error::from_kind(ErrorKind::ObjectNotDiscoverable))?; object_meta_receiver .await .map_err(error::from_kind(ErrorKind::ObjectNotDiscoverable)) From 67f9b531d69e4d9ddbbe069c2414a13ecf919af9 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Mon, 11 Nov 2024 09:29:26 -0800 Subject: [PATCH 24/48] fix naming --- aws-s3-transfer-manager/src/operation/download.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index dd65de3..27667e0 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -68,14 +68,14 @@ impl Download { let ctx = DownloadContext::new(handle); let concurrency = ctx.handle.num_workers(); let (comp_tx, comp_rx) = mpsc::channel(concurrency); - let (meta_tx, meta_rx) = oneshot::channel(); + let (object_meta_tx, object_meta_rx) = oneshot::channel(); let tasks = Arc::new(Mutex::new(JoinSet::new())); let discovery = tokio::spawn(send_discovery( tasks.clone(), ctx.clone(), comp_tx, - meta_tx, + object_meta_tx, input, use_current_span_as_parent_for_tasks, )); @@ -84,7 +84,7 @@ impl Download { body: Body::new(comp_rx), tasks, discovery, - object_meta_receiver: Mutex::new(Some(meta_rx)), + object_meta_receiver: Mutex::new(Some(object_meta_rx)), object_meta: OnceCell::new(), }) } @@ -94,7 +94,7 @@ async fn send_discovery( tasks: Arc>>, ctx: DownloadContext, comp_tx: mpsc::Sender>, - meta_tx: oneshot::Sender, + object_meta_tx: oneshot::Sender, input: DownloadInput, use_current_span_as_parent_for_tasks: bool, ) -> Result<(), crate::error::Error> { @@ -115,7 +115,7 @@ async fn send_discovery( // make initial discovery about the object size, metadata, possibly first chunk let mut discovery = discover_obj(&ctx, &input).await?; - let _ = meta_tx.send(discovery.object_meta); + let _ = object_meta_tx.send(discovery.object_meta); let initial_chunk = discovery.initial_chunk.take(); From e92aab65fba51d294d6c05175225d5943bd150fc Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Mon, 11 Nov 2024 10:54:49 -0800 Subject: [PATCH 25/48] fix comment --- aws-s3-transfer-manager/src/operation/download/handle.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index 6b0ab4c..e1a8c3c 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -18,8 +18,9 @@ use super::object_meta::ObjectMetadata; #[derive(Debug)] #[non_exhaustive] pub struct DownloadHandle { - /// Object metadata. TODO: Is there a better way to do this than tokio oncecell? + /// Object metadata receiver. pub(crate) object_meta_receiver: Mutex>>, + /// Object metadata. pub(crate) object_meta: OnceCell, /// The object content From e812a9d9bbb2a3529e235bb648adfeb5900b81b8 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Mon, 11 Nov 2024 11:10:46 -0800 Subject: [PATCH 26/48] allow buf --- aws-s3-transfer-manager/external-types.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/aws-s3-transfer-manager/external-types.toml b/aws-s3-transfer-manager/external-types.toml index 25f0f12..cd30fa0 100644 --- a/aws-s3-transfer-manager/external-types.toml +++ b/aws-s3-transfer-manager/external-types.toml @@ -5,4 +5,5 @@ allowed_external_types = [ "aws_smithy_types::*", "tokio::runtime::task::error::JoinError", "bytes::bytes::Bytes", + "bytes::bytes::Buf", ] From 07d7e9a92ba2269af29171560bf54cce3a1b2e61 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Mon, 11 Nov 2024 11:16:42 -0800 Subject: [PATCH 27/48] fix type --- aws-s3-transfer-manager/external-types.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-s3-transfer-manager/external-types.toml b/aws-s3-transfer-manager/external-types.toml index cd30fa0..b06fe1d 100644 --- a/aws-s3-transfer-manager/external-types.toml +++ b/aws-s3-transfer-manager/external-types.toml @@ -5,5 +5,5 @@ allowed_external_types = [ "aws_smithy_types::*", "tokio::runtime::task::error::JoinError", "bytes::bytes::Bytes", - "bytes::bytes::Buf", + "bytes::buf::buf_impl::Buf", ] From e8da53caf63fca1abe13f047f906b918365da52f Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 19 Nov 2024 13:37:50 -0800 Subject: [PATCH 28/48] rename to initiate --- aws-s3-transfer-manager/examples/cp.rs | 2 +- aws-s3-transfer-manager/src/client.rs | 2 +- .../src/operation/download/builders.rs | 4 ++-- .../src/operation/upload/builders.rs | 1 + aws-s3-transfer-manager/tests/download_test.rs | 10 +++++----- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/aws-s3-transfer-manager/examples/cp.rs b/aws-s3-transfer-manager/examples/cp.rs index 509426b..7978417 100644 --- a/aws-s3-transfer-manager/examples/cp.rs +++ b/aws-s3-transfer-manager/examples/cp.rs @@ -171,7 +171,7 @@ async fn do_download(args: Args) -> Result<(), BoxError> { // TODO(aws-sdk-rust#1159) - rewrite this less naively, // likely abstract this into performant utils for single file download. Higher level // TM will handle it's own thread pool for filesystem work - let mut handle = tm.download().bucket(bucket).key(key).send()?; + let mut handle = tm.download().bucket(bucket).key(key).initiate()?; write_body(handle.body_mut(), dest) .instrument(tracing::debug_span!("write-output")) diff --git a/aws-s3-transfer-manager/src/client.rs b/aws-s3-transfer-manager/src/client.rs index b7a82ce..8d7354f 100644 --- a/aws-s3-transfer-manager/src/client.rs +++ b/aws-s3-transfer-manager/src/client.rs @@ -136,7 +136,7 @@ impl Client { /// .download() /// .bucket("my-bucket") /// .key("my-key") - /// .send()?; + /// .initiate()?; /// /// // process data off handle... /// diff --git a/aws-s3-transfer-manager/src/operation/download/builders.rs b/aws-s3-transfer-manager/src/operation/download/builders.rs index c62b997..f673778 100644 --- a/aws-s3-transfer-manager/src/operation/download/builders.rs +++ b/aws-s3-transfer-manager/src/operation/download/builders.rs @@ -26,7 +26,7 @@ impl DownloadFluentBuilder { bucket = self.inner.bucket.as_deref().unwrap_or_default(), key = self.inner.key.as_deref().unwrap_or_default(), ))] - pub fn send(self) -> Result { + pub fn initiate(self) -> Result { let input = self.inner.build()?; crate::operation::download::Download::orchestrate(self.handle, input, false) } @@ -527,6 +527,6 @@ impl crate::operation::download::input::DownloadInputBuilder { pub fn send_with(self, client: &crate::Client) -> Result { let mut fluent_builder = client.download(); fluent_builder.inner = self; - fluent_builder.send() + fluent_builder.initiate() } } diff --git a/aws-s3-transfer-manager/src/operation/upload/builders.rs b/aws-s3-transfer-manager/src/operation/upload/builders.rs index b69f1d3..907e4de 100644 --- a/aws-s3-transfer-manager/src/operation/upload/builders.rs +++ b/aws-s3-transfer-manager/src/operation/upload/builders.rs @@ -29,6 +29,7 @@ impl UploadFluentBuilder { bucket = self.inner.bucket.as_deref().unwrap_or_default(), key = self.inner.key.as_deref().unwrap_or_default(), ))] + // TODO: Make it consistent with download by renaming it to initiate and making it synchronous pub async fn send(self) -> Result { let input = self.inner.build()?; crate::operation::upload::Upload::orchestrate(self.handle, input).await diff --git a/aws-s3-transfer-manager/tests/download_test.rs b/aws-s3-transfer-manager/tests/download_test.rs index 8bcddb1..db69353 100644 --- a/aws-s3-transfer-manager/tests/download_test.rs +++ b/aws-s3-transfer-manager/tests/download_test.rs @@ -130,7 +130,7 @@ async fn test_download_ranges() { .download() .bucket("test-bucket") .key("test-object") - .send() + .initiate() .unwrap(); let body = drain(&mut handle).await.unwrap(); @@ -164,7 +164,7 @@ async fn test_body_not_consumed() { .download() .bucket("test-bucket") .key("test-object") - .send() + .initiate() .unwrap(); handle.join().await.unwrap(); @@ -274,7 +274,7 @@ async fn test_retry_failed_chunk() { .download() .bucket("test-bucket") .key("test-object") - .send() + .initiate() .unwrap(); let body = drain(&mut handle).await.unwrap(); @@ -329,7 +329,7 @@ async fn test_non_retryable_error() { .download() .bucket("test-bucket") .key("test-object") - .send() + .initiate() .unwrap(); let _ = drain(&mut handle).await.unwrap_err(); @@ -390,7 +390,7 @@ async fn test_retry_max_attempts() { .download() .bucket("test-bucket") .key("test-object") - .send() + .initiate() .unwrap(); let _ = drain(&mut handle).await.unwrap_err(); From 8b19402a74079ee4908dd271f58bd16dd248b446 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 19 Nov 2024 13:44:24 -0800 Subject: [PATCH 29/48] Move aggregated bytes to io --- .../src/io/aggregated_bytes.rs | 84 +++++++++++++++++++ aws-s3-transfer-manager/src/io/mod.rs | 2 + .../src/operation/download.rs | 3 +- .../src/operation/download/body.rs | 78 +---------------- .../src/operation/download/service.rs | 2 +- 5 files changed, 91 insertions(+), 78 deletions(-) create mode 100644 aws-s3-transfer-manager/src/io/aggregated_bytes.rs diff --git a/aws-s3-transfer-manager/src/io/aggregated_bytes.rs b/aws-s3-transfer-manager/src/io/aggregated_bytes.rs new file mode 100644 index 0000000..2a6a0fd --- /dev/null +++ b/aws-s3-transfer-manager/src/io/aggregated_bytes.rs @@ -0,0 +1,84 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +use std::io::IoSlice; + +use aws_sdk_s3::primitives::ByteStream; +use bytes::{Buf, Bytes}; +use bytes_utils::SegmentedBuf; + +use crate::error::ErrorKind; + +/// +/// Non-contiguous Binary Data Storage +/// +/// When data is read from the network, it is read in a sequence of chunks that are not in +/// contiguous memory. [`AggregatedBytes`](crate::byte_stream::AggregatedBytes) provides a view of +/// this data via [`impl Buf`](bytes::Buf) or it can be copied into contiguous storage with +/// [`.into_bytes()`](crate::byte_stream::AggregatedBytes::into_bytes). +#[derive(Debug, Clone)] +pub struct AggregatedBytes(pub(crate) SegmentedBuf); + +impl AggregatedBytes { + /// Convert this buffer into [`Bytes`]. + /// + /// # Why does this consume `self`? + /// Technically, [`copy_to_bytes`](bytes::Buf::copy_to_bytes) can be called without ownership of self. However, since this + /// mutates the underlying buffer such that no data is remaining, it is more misuse resistant to + /// prevent the caller from attempting to reread the buffer. + /// + /// If the caller only holds a mutable reference, they may use [`copy_to_bytes`](bytes::Buf::copy_to_bytes) + /// directly on `AggregatedBytes`. + pub fn into_bytes(mut self) -> Bytes { + self.0.copy_to_bytes(self.0.remaining()) + } + + /// Convert this buffer into an [`Iterator`] of underlying non-contiguous segments of [`Bytes`] + pub fn into_segments(self) -> impl Iterator { + self.0.into_inner().into_iter() + } + + /// Convert this buffer into a `Vec` + pub fn to_vec(self) -> Vec { + self.0.into_inner().into_iter().flatten().collect() + } + + /// Make this buffer from a ByteStream + pub(crate) async fn from_byte_stream(value: ByteStream) -> Result { + let mut value = value; + let mut output = SegmentedBuf::new(); + while let Some(buf) = value.next().await { + match buf { + Ok(buf) => output.push(buf), + Err(err) => return Err(crate::error::from_kind(ErrorKind::ChunkFailed)(err)), + }; + } + Ok(AggregatedBytes(output)) + } +} + +impl Buf for AggregatedBytes { + // Forward all methods that SegmentedBuf has custom implementations of. + fn remaining(&self) -> usize { + self.0.remaining() + } + + fn chunk(&self) -> &[u8] { + self.0.chunk() + } + + fn chunks_vectored<'a>(&'a self, dst: &mut [IoSlice<'a>]) -> usize { + self.0.chunks_vectored(dst) + } + + fn advance(&mut self, cnt: usize) { + self.0.advance(cnt) + } + + fn copy_to_bytes(&mut self, len: usize) -> Bytes { + self.0.copy_to_bytes(len) + } +} + diff --git a/aws-s3-transfer-manager/src/io/mod.rs b/aws-s3-transfer-manager/src/io/mod.rs index ebe9a60..ddb5759 100644 --- a/aws-s3-transfer-manager/src/io/mod.rs +++ b/aws-s3-transfer-manager/src/io/mod.rs @@ -6,6 +6,8 @@ pub(crate) mod part_reader; mod path_body; mod stream; +/// Download Body Type +pub mod aggregated_bytes; /// Error types related to I/O abstractions pub mod error; diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 27667e0..502a8d5 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -25,9 +25,10 @@ mod object_meta; mod service; use crate::error; +use crate::io::aggregated_bytes::AggregatedBytes; use crate::runtime::scheduler::OwnedWorkPermit; use aws_smithy_types::byte_stream::ByteStream; -use body::{AggregatedBytes, Body, ChunkResponse}; +use body::{Body, ChunkResponse}; use chunk_meta::ChunkMetadata; use discovery::discover_obj; use object_meta::ObjectMetadata; diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index b566583..6c44df1 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -2,89 +2,15 @@ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 */ -use aws_smithy_types::byte_stream::ByteStream; -use bytes::{Buf, Bytes}; -use bytes_utils::SegmentedBuf; + use std::cmp; use std::cmp::Ordering; use std::collections::BinaryHeap; -use std::io::IoSlice; use tokio::sync::mpsc; -use crate::error::ErrorKind; - +use crate::io::aggregated_bytes::AggregatedBytes; use super::chunk_meta::ChunkMetadata; -/// Non-contiguous Binary Data Storage -/// -/// When data is read from the network, it is read in a sequence of chunks that are not in -/// contiguous memory. [`AggregatedBytes`](crate::byte_stream::AggregatedBytes) provides a view of -/// this data via [`impl Buf`](bytes::Buf) or it can be copied into contiguous storage with -/// [`.into_bytes()`](crate::byte_stream::AggregatedBytes::into_bytes). -#[derive(Debug, Clone)] -pub struct AggregatedBytes(SegmentedBuf); - -impl AggregatedBytes { - /// Convert this buffer into [`Bytes`]. - /// - /// # Why does this consume `self`? - /// Technically, [`copy_to_bytes`](bytes::Buf::copy_to_bytes) can be called without ownership of self. However, since this - /// mutates the underlying buffer such that no data is remaining, it is more misuse resistant to - /// prevent the caller from attempting to reread the buffer. - /// - /// If the caller only holds a mutable reference, they may use [`copy_to_bytes`](bytes::Buf::copy_to_bytes) - /// directly on `AggregatedBytes`. - pub fn into_bytes(mut self) -> Bytes { - self.0.copy_to_bytes(self.0.remaining()) - } - - /// Convert this buffer into an [`Iterator`] of underlying non-contiguous segments of [`Bytes`] - pub fn into_segments(self) -> impl Iterator { - self.0.into_inner().into_iter() - } - - /// Convert this buffer into a `Vec` - pub fn to_vec(self) -> Vec { - self.0.into_inner().into_iter().flatten().collect() - } - - /// Make this buffer from a ByteStream - pub(crate) async fn from_byte_stream(value: ByteStream) -> Result { - let mut value = value; - let mut output = SegmentedBuf::new(); - while let Some(buf) = value.next().await { - match buf { - Ok(buf) => output.push(buf), - Err(err) => return Err(crate::error::from_kind(ErrorKind::ChunkFailed)(err)), - }; - } - Ok(AggregatedBytes(output)) - } -} - -impl Buf for AggregatedBytes { - // Forward all methods that SegmentedBuf has custom implementations of. - fn remaining(&self) -> usize { - self.0.remaining() - } - - fn chunk(&self) -> &[u8] { - self.0.chunk() - } - - fn chunks_vectored<'a>(&'a self, dst: &mut [IoSlice<'a>]) -> usize { - self.0.chunks_vectored(dst) - } - - fn advance(&mut self, cnt: usize) { - self.0.advance(cnt) - } - - fn copy_to_bytes(&mut self, len: usize) -> Bytes { - self.0.copy_to_bytes(len) - } -} - /// Stream of binary data representing an Amazon S3 Object's contents. /// /// Wraps potentially multiple streams of binary data into a single coherent stream. diff --git a/aws-s3-transfer-manager/src/operation/download/service.rs b/aws-s3-transfer-manager/src/operation/download/service.rs index fa45334..20a748e 100644 --- a/aws-s3-transfer-manager/src/operation/download/service.rs +++ b/aws-s3-transfer-manager/src/operation/download/service.rs @@ -4,6 +4,7 @@ */ use crate::error; use crate::http::header; +use crate::io::aggregated_bytes::AggregatedBytes; use crate::middleware::limit::concurrency::ConcurrencyLimitLayer; use crate::middleware::retry; use crate::operation::download::DownloadContext; @@ -17,7 +18,6 @@ use tokio::task; use tower::{service_fn, Service, ServiceBuilder, ServiceExt}; use tracing::Instrument; -use super::body::AggregatedBytes; use super::body::ChunkResponse; use super::{DownloadInput, DownloadInputBuilder}; From 631592053bbd2465d047c30a510532d262ef3351 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 19 Nov 2024 13:53:15 -0800 Subject: [PATCH 30/48] traits for request ids --- .../src/operation/download/chunk_meta.rs | 20 ++++++++++--- .../src/operation/download/object_meta.rs | 29 +++++++++++++------ 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs index 7493278..d114a2e 100644 --- a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs @@ -11,8 +11,8 @@ use aws_sdk_s3::operation::RequestIdExt; // TODO: Document fields #[derive(Debug, Clone, Default)] pub struct ChunkMetadata { - pub request_id: Option, - pub extended_request_id: Option, + _request_id: Option, + _extended_request_id: Option, pub delete_marker: Option, pub accept_ranges: Option, pub expiration: Option, @@ -54,8 +54,8 @@ pub struct ChunkMetadata { impl From for ChunkMetadata { fn from(value: GetObjectOutput) -> Self { Self { - request_id: value.request_id().map(|s| s.to_string()), - extended_request_id: value.extended_request_id().map(|s| s.to_string()), + _request_id: value.request_id().map(|s| s.to_string()), + _extended_request_id: value.extended_request_id().map(|s| s.to_string()), delete_marker: value.delete_marker, accept_ranges: value.accept_ranges, expiration: value.expiration, @@ -96,3 +96,15 @@ impl From for ChunkMetadata { } } } + + +impl RequestIdExt for ChunkMetadata { + fn extended_request_id(&self) -> Option<&str> { + self._extended_request_id.as_deref() + } +} +impl RequestId for ChunkMetadata { + fn request_id(&self) -> Option<&str> { + self._request_id.as_deref() + } +} diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index 17594a6..127c1fe 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -14,6 +14,10 @@ use aws_sdk_s3::operation::RequestIdExt; /// Object metadata other than the body that can be set from either `GetObject` or `HeadObject` #[derive(Debug, Clone, Default)] pub struct ObjectMetadata { + /// The request_id if the client made a request to retrieve object metadata, such as with HeadObject. + _request_id: Option, + /// The extended_request_id if the client made a request to retrieve object metadata, such as with HeadObject. + _extended_request_id: Option, pub delete_marker: Option, pub expiration: Option, pub restore: Option, @@ -43,11 +47,6 @@ pub struct ObjectMetadata { pub object_lock_mode: Option, pub object_lock_retain_until_date: Option<::aws_smithy_types::DateTime>, pub object_lock_legal_hold_status: Option, - - /// The request_id if the client made a request to retrieve object metadata, such as with HeadObject. - pub request_id: Option, - /// The extended_request_id if the client made a request to retrieve object metadata, such as with HeadObject. - pub extended_request_id: Option, /// Indicates whether the request was charged for a request made only to retrieve object metadata, such as HeadObject. pub request_charged: Option, } @@ -102,8 +101,8 @@ impl From<&GetObjectOutput> for ObjectMetadata { object_lock_mode: value.object_lock_mode.clone(), object_lock_retain_until_date: value.object_lock_retain_until_date, object_lock_legal_hold_status: value.object_lock_legal_hold_status.clone(), - request_id: None, - extended_request_id: None, + _request_id: None, + _extended_request_id: None, request_charged: None, } } @@ -112,8 +111,8 @@ impl From<&GetObjectOutput> for ObjectMetadata { impl From for ObjectMetadata { fn from(value: HeadObjectOutput) -> Self { Self { - request_id: value.request_id().map(|s| s.to_string()), - extended_request_id: value.extended_request_id().map(|s| s.to_string()), + _request_id: value.request_id().map(|s| s.to_string()), + _extended_request_id: value.extended_request_id().map(|s| s.to_string()), request_charged: value.request_charged, delete_marker: value.delete_marker, expiration: value.expiration, @@ -149,6 +148,18 @@ impl From for ObjectMetadata { } } +impl RequestIdExt for ObjectMetadata { + fn extended_request_id(&self) -> Option<&str> { + self._extended_request_id.as_deref() + } +} +impl RequestId for ObjectMetadata { + fn request_id(&self) -> Option<&str> { + self._request_id.as_deref() + } +} + + #[cfg(test)] mod tests { use super::ObjectMetadata; From 43d1b436048093325c6f80512584c11ae72aed18 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 19 Nov 2024 13:59:36 -0800 Subject: [PATCH 31/48] rename total size to content length --- aws-s3-transfer-manager/examples/cp.rs | 2 +- .../src/operation/download/discovery.rs | 8 ++++---- .../src/operation/download/object_meta.rs | 17 +++++++---------- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/aws-s3-transfer-manager/examples/cp.rs b/aws-s3-transfer-manager/examples/cp.rs index 7978417..fe5d524 100644 --- a/aws-s3-transfer-manager/examples/cp.rs +++ b/aws-s3-transfer-manager/examples/cp.rs @@ -178,7 +178,7 @@ async fn do_download(args: Args) -> Result<(), BoxError> { .await?; let elapsed = start.elapsed(); - let obj_size_bytes = handle.object_meta().await?.total_size(); + let obj_size_bytes = handle.object_meta().await?.content_length(); let throughput = Throughput::new(obj_size_bytes, elapsed); println!( diff --git a/aws-s3-transfer-manager/src/operation/download/discovery.rs b/aws-s3-transfer-manager/src/operation/download/discovery.rs index 06e83f3..d18c459 100644 --- a/aws-s3-transfer-manager/src/operation/download/discovery.rs +++ b/aws-s3-transfer-manager/src/operation/download/discovery.rs @@ -126,10 +126,10 @@ async fn discover_obj_with_head( let remaining = match byte_range { Some(range) => match range { ByteRange::Inclusive(start, end) => start..=end, - ByteRange::AllFrom(start) => start..=object_meta.total_size(), - ByteRange::Last(n) => (object_meta.total_size() - n + 1)..=object_meta.total_size(), + ByteRange::AllFrom(start) => start..=object_meta.content_length(), + ByteRange::Last(n) => (object_meta.content_length() - n + 1)..=object_meta.content_length(), }, - None => 0..=object_meta.total_size(), + None => 0..=object_meta.content_length(), }; Ok(ObjectDiscovery { @@ -163,7 +163,7 @@ async fn discover_obj_with_get( let remaining = match range { Some(range) => (*range.start() + content_len)..=*range.end(), - None => content_len..=object_meta.total_size() - 1, + None => content_len..=object_meta.content_length() - 1, }; let initial_chunk = match content_len == 0 { diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index 127c1fe..ea1fa8a 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -14,9 +14,7 @@ use aws_sdk_s3::operation::RequestIdExt; /// Object metadata other than the body that can be set from either `GetObject` or `HeadObject` #[derive(Debug, Clone, Default)] pub struct ObjectMetadata { - /// The request_id if the client made a request to retrieve object metadata, such as with HeadObject. _request_id: Option, - /// The extended_request_id if the client made a request to retrieve object metadata, such as with HeadObject. _extended_request_id: Option, pub delete_marker: Option, pub expiration: Option, @@ -47,13 +45,12 @@ pub struct ObjectMetadata { pub object_lock_mode: Option, pub object_lock_retain_until_date: Option<::aws_smithy_types::DateTime>, pub object_lock_legal_hold_status: Option, - /// Indicates whether the request was charged for a request made only to retrieve object metadata, such as HeadObject. pub request_charged: Option, } impl ObjectMetadata { /// The total object size - pub fn total_size(&self) -> u64 { + pub fn content_length(&self) -> u64 { match (self.content_length, self.content_range.as_ref()) { (_, Some(range)) => { let total = range.split_once('/').map(|x| x.1).expect("content range total"); @@ -71,6 +68,8 @@ impl ObjectMetadata { impl From<&GetObjectOutput> for ObjectMetadata { fn from(value: &GetObjectOutput) -> Self { Self { + _request_id: value.request_id().map(|s| s.to_string()), + _extended_request_id: value.extended_request_id().map(|s| s.to_string()), delete_marker: value.delete_marker, expiration: value.expiration.clone(), restore: value.restore.clone(), @@ -101,9 +100,7 @@ impl From<&GetObjectOutput> for ObjectMetadata { object_lock_mode: value.object_lock_mode.clone(), object_lock_retain_until_date: value.object_lock_retain_until_date, object_lock_legal_hold_status: value.object_lock_legal_hold_status.clone(), - _request_id: None, - _extended_request_id: None, - request_charged: None, + request_charged: value.request_charged.clone(), } } } @@ -113,7 +110,6 @@ impl From for ObjectMetadata { Self { _request_id: value.request_id().map(|s| s.to_string()), _extended_request_id: value.extended_request_id().map(|s| s.to_string()), - request_charged: value.request_charged, delete_marker: value.delete_marker, expiration: value.expiration, restore: value.restore, @@ -144,6 +140,7 @@ impl From for ObjectMetadata { object_lock_mode: value.object_lock_mode, object_lock_retain_until_date: value.object_lock_retain_until_date, object_lock_legal_hold_status: value.object_lock_legal_hold_status, + request_charged: value.request_charged, } } } @@ -171,13 +168,13 @@ mod tests { ..Default::default() }; - assert_eq!(15, meta.total_size()); + assert_eq!(15, meta.content_length()); let meta = ObjectMetadata { content_range: Some("bytes 0-499/900".to_owned()), content_length: Some(500), ..Default::default() }; - assert_eq!(900, meta.total_size()); + assert_eq!(900, meta.content_length()); } } From aa22e50767ba5970548cbedee247b9eb352b92e3 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 19 Nov 2024 14:24:39 -0800 Subject: [PATCH 32/48] Add docs for ObjectMetadata --- .../src/operation/download/object_meta.rs | 76 ++++++++++++++++--- 1 file changed, 65 insertions(+), 11 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index ea1fa8a..06ef809 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -8,48 +8,106 @@ use aws_sdk_s3::operation::head_object::HeadObjectOutput; use aws_sdk_s3::operation::RequestId; use aws_sdk_s3::operation::RequestIdExt; -// TODO(aws-sdk-rust#1159,design): how many of these fields should we expose? -// TODO(aws-sdk-rust#1159,docs): Document fields - /// Object metadata other than the body that can be set from either `GetObject` or `HeadObject` +/// In the case of GetObject, some data will be duplicated as part of the first chunk. #[derive(Debug, Clone, Default)] pub struct ObjectMetadata { _request_id: Option, _extended_request_id: Option, + ///

Indicates whether the object retrieved was (true) or was not (false) a Delete Marker. If false, this response header does not appear in the response.

+ ///
    + ///
  • + ///

    If the current version of the object is a delete marker, Amazon S3 behaves as if the object was deleted and includes x-amz-delete-marker: true in the response.

  • + ///
  • + ///

    If the specified version in the request is a delete marker, the response returns a 405 Method Not Allowed error and the Last-Modified: timestamp response header.

  • + ///
+ ///
pub delete_marker: Option, + ///

If the object expiration is configured (see PutBucketLifecycleConfiguration ), the response includes this header. It includes the expiry-date and rule-id key-value pairs providing object expiration information. The value of the rule-id is URL-encoded.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub expiration: Option, + ///

Provides information about object restoration action and expiration time of the restored object copy.

+ ///

This functionality is not supported for directory buckets. Only the S3 Express One Zone storage class is supported by directory buckets to store objects.

+ ///
pub restore: Option, + ///

Date and time when the object was last modified.

+ ///

General purpose buckets - When you specify a versionId of the object in your request, if the specified version in the request is a delete marker, the response returns a 405 Method Not Allowed error and the Last-Modified: timestamp response header.

pub last_modified: Option<::aws_smithy_types::DateTime>, pub(crate) content_length: Option, + ///

An entity tag (ETag) is an opaque identifier assigned by a web server to a specific version of a resource found at a URL.

pub e_tag: Option, + ///

This is set to the number of metadata entries not returned in the headers that are prefixed with x-amz-meta-. This can happen if you create metadata using an API like SOAP that supports more flexible metadata than the REST API. For example, using SOAP, you can create metadata whose values are not legal HTTP headers.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub missing_meta: Option, + ///

Version ID of the object.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub version_id: Option, + ///

Specifies caching behavior along the request/reply chain.

pub cache_control: Option, + ///

Specifies presentational information for the object.

pub content_disposition: Option, + ///

Indicates what content encodings have been applied to the object and thus what decoding mechanisms must be applied to obtain the media-type referenced by the Content-Type header field.

pub content_encoding: Option, + ///

The language the content is in.

pub content_language: Option, pub(crate) content_range: Option, + ///

A standard MIME type describing the format of the object data.

pub content_type: Option, - pub expires: Option<::aws_smithy_types::DateTime>, - pub expires_string: Option, + ///

If the bucket is configured as a website, redirects requests for this object to another object in the same bucket or to an external URL. Amazon S3 stores the value of this header in the object metadata.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub website_redirect_location: Option, + ///

The server-side encryption algorithm used when you store this object in Amazon S3.

pub server_side_encryption: Option, + ///

A map of metadata to store with the object in S3.

pub metadata: Option<::std::collections::HashMap>, + ///

If server-side encryption with a customer-provided encryption key was requested, the response will include this header to confirm the encryption algorithm that's used.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub sse_customer_algorithm: Option, + ///

If server-side encryption with a customer-provided encryption key was requested, the response will include this header to provide the round-trip message integrity verification of the customer-provided encryption key.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub sse_customer_key_md5: Option, + ///

If present, indicates the ID of the KMS key that was used for object encryption.

pub ssekms_key_id: Option, + ///

Indicates whether the object uses an S3 Bucket Key for server-side encryption with Key Management Service (KMS) keys (SSE-KMS).

pub bucket_key_enabled: Option, + ///

Provides storage class information of the object. Amazon S3 returns this header for all objects except for S3 Standard storage class objects.

+ ///

Directory buckets - Only the S3 Express One Zone storage class is supported by directory buckets to store objects.

+ ///
pub storage_class: Option, + ///

If present, indicates that the requester was successfully charged for the request.

+ ///

This functionality is not supported for directory buckets.

+ ///
+ pub request_charged: Option, + ///

Amazon S3 can return this if your request involves a bucket that is either a source or destination in a replication rule.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub replication_status: Option, + ///

The count of parts this object has. This value is only returned if you specify partNumber in your request and the object was uploaded as a multipart upload.

pub parts_count: Option, + ///

The Object Lock mode that's currently in place for this object.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub object_lock_mode: Option, + ///

The date and time when this object's Object Lock will expire.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub object_lock_retain_until_date: Option<::aws_smithy_types::DateTime>, + ///

Indicates whether this object has an active legal hold. This field is only returned if you have permission to view an object's legal hold status.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub object_lock_legal_hold_status: Option, - pub request_charged: Option, + ///

The date and time at which the object is no longer cacheable.

+ pub expires_string: Option, } impl ObjectMetadata { - /// The total object size + ///

Size of the object in bytes.

pub fn content_length(&self) -> u64 { match (self.content_length, self.content_range.as_ref()) { (_, Some(range)) => { @@ -84,8 +142,6 @@ impl From<&GetObjectOutput> for ObjectMetadata { content_language: value.content_language.clone(), content_range: value.content_range.clone(), content_type: value.content_type.clone(), - #[allow(deprecated)] - expires: value.expires, expires_string: value.expires_string.clone(), website_redirect_location: value.website_redirect_location.clone(), server_side_encryption: value.server_side_encryption.clone(), @@ -124,8 +180,6 @@ impl From for ObjectMetadata { content_language: value.content_language, content_range: None, content_type: value.content_type, - #[allow(deprecated)] - expires: value.expires, expires_string: value.expires_string, website_redirect_location: value.website_redirect_location, server_side_encryption: value.server_side_encryption, From 0fe2fac76b9a50eb8aec55542a446920bb8bd47b Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 19 Nov 2024 14:37:41 -0800 Subject: [PATCH 33/48] Source docs from generated code --- .../src/operation/download/chunk_meta.rs | 87 +++++++++++++++++-- .../src/operation/download/object_meta.rs | 1 + 2 files changed, 79 insertions(+), 9 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs index d114a2e..efd002a 100644 --- a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs @@ -7,48 +7,119 @@ use aws_sdk_s3::operation::get_object::GetObjectOutput; use aws_sdk_s3::operation::RequestId; use aws_sdk_s3::operation::RequestIdExt; -/// request metadata other than the body that will be set from `GetObject` -// TODO: Document fields +/// Request metadata, other than the body, that will be set from the `GetObject` request. #[derive(Debug, Clone, Default)] +#[non_exhaustive] pub struct ChunkMetadata { - _request_id: Option, - _extended_request_id: Option, + ///

Indicates whether the object retrieved was (true) or was not (false) a Delete Marker. If false, this response header does not appear in the response.

+ ///
    + ///
  • + ///

    If the current version of the object is a delete marker, Amazon S3 behaves as if the object was deleted and includes x-amz-delete-marker: true in the response.

  • + ///
  • + ///

    If the specified version in the request is a delete marker, the response returns a 405 Method Not Allowed error and the Last-Modified: timestamp response header.

  • + ///
+ ///
pub delete_marker: Option, + ///

Indicates that a range of bytes was specified in the request.

pub accept_ranges: Option, + ///

If the object expiration is configured (see PutBucketLifecycleConfiguration ), the response includes this header. It includes the expiry-date and rule-id key-value pairs providing object expiration information. The value of the rule-id is URL-encoded.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub expiration: Option, + ///

Provides information about object restoration action and expiration time of the restored object copy.

+ ///

This functionality is not supported for directory buckets. Only the S3 Express One Zone storage class is supported by directory buckets to store objects.

+ ///
pub restore: Option, + ///

Date and time when the object was last modified.

+ ///

General purpose buckets - When you specify a versionId of the object in your request, if the specified version in the request is a delete marker, the response returns a 405 Method Not Allowed error and the Last-Modified: timestamp response header.

pub last_modified: Option<::aws_smithy_types::DateTime>, + ///

Size of the body in bytes.

pub content_length: Option, + ///

An entity tag (ETag) is an opaque identifier assigned by a web server to a specific version of a resource found at a URL.

pub e_tag: Option, + ///

The base64-encoded, 32-bit CRC-32 checksum of the object. This will only be present if it was uploaded with the object. For more information, see Checking object integrity in the Amazon S3 User Guide.

pub checksum_crc32: Option, + ///

The base64-encoded, 32-bit CRC-32C checksum of the object. This will only be present if it was uploaded with the object. For more information, see Checking object integrity in the Amazon S3 User Guide.

pub checksum_crc32_c: Option, + ///

The base64-encoded, 160-bit SHA-1 digest of the object. This will only be present if it was uploaded with the object. For more information, see Checking object integrity in the Amazon S3 User Guide.

pub checksum_sha1: Option, + ///

The base64-encoded, 256-bit SHA-256 digest of the object. This will only be present if it was uploaded with the object. For more information, see Checking object integrity in the Amazon S3 User Guide.

pub checksum_sha256: Option, + ///

This is set to the number of metadata entries not returned in the headers that are prefixed with x-amz-meta-. This can happen if you create metadata using an API like SOAP that supports more flexible metadata than the REST API. For example, using SOAP, you can create metadata whose values are not legal HTTP headers.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub missing_meta: Option, + ///

Version ID of the object.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub version_id: Option, + ///

Specifies caching behavior along the request/reply chain.

pub cache_control: Option, + ///

Specifies presentational information for the object.

pub content_disposition: Option, + ///

Indicates what content encodings have been applied to the object and thus what decoding mechanisms must be applied to obtain the media-type referenced by the Content-Type header field.

pub content_encoding: Option, + ///

The language the content is in.

pub content_language: Option, + ///

The portion of the object returned in the response.

pub content_range: Option, + ///

A standard MIME type describing the format of the object data.

pub content_type: Option, - pub expires: Option<::aws_smithy_types::DateTime>, - pub expires_string: Option, + ///

If the bucket is configured as a website, redirects requests for this object to another object in the same bucket or to an external URL. Amazon S3 stores the value of this header in the object metadata.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub website_redirect_location: Option, + ///

The server-side encryption algorithm used when you store this object in Amazon S3.

pub server_side_encryption: Option, + ///

A map of metadata to store with the object in S3.

pub metadata: Option<::std::collections::HashMap>, + ///

If server-side encryption with a customer-provided encryption key was requested, the response will include this header to confirm the encryption algorithm that's used.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub sse_customer_algorithm: Option, + ///

If server-side encryption with a customer-provided encryption key was requested, the response will include this header to provide the round-trip message integrity verification of the customer-provided encryption key.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub sse_customer_key_md5: Option, + ///

If present, indicates the ID of the KMS key that was used for object encryption.

pub ssekms_key_id: Option, + ///

Indicates whether the object uses an S3 Bucket Key for server-side encryption with Key Management Service (KMS) keys (SSE-KMS).

pub bucket_key_enabled: Option, + ///

Provides storage class information of the object. Amazon S3 returns this header for all objects except for S3 Standard storage class objects.

+ ///

Directory buckets - Only the S3 Express One Zone storage class is supported by directory buckets to store objects.

+ ///
pub storage_class: Option, + ///

If present, indicates that the requester was successfully charged for the request.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub request_charged: Option, + ///

Amazon S3 can return this if your request involves a bucket that is either a source or destination in a replication rule.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub replication_status: Option, + ///

The count of parts this object has. This value is only returned if you specify partNumber in your request and the object was uploaded as a multipart upload.

pub parts_count: Option, + ///

The number of tags, if any, on the object, when you have the relevant permission to read object tags.

+ ///

You can use GetObjectTagging to retrieve the tag set associated with an object.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub tag_count: Option, + ///

The Object Lock mode that's currently in place for this object.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub object_lock_mode: Option, + ///

The date and time when this object's Object Lock will expire.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub object_lock_retain_until_date: Option<::aws_smithy_types::DateTime>, + ///

Indicates whether this object has an active legal hold. This field is only returned if you have permission to view an object's legal hold status.

+ ///

This functionality is not supported for directory buckets.

+ ///
pub object_lock_legal_hold_status: Option, + ///

The date and time at which the object is no longer cacheable.

+ pub expires_string: Option, + _request_id: Option, + _extended_request_id: Option, } impl From for ChunkMetadata { @@ -75,9 +146,6 @@ impl From for ChunkMetadata { content_language: value.content_language, content_range: value.content_range, content_type: value.content_type, - #[allow(deprecated)] - expires: value.expires, - expires_string: value.expires_string, website_redirect_location: value.website_redirect_location, server_side_encryption: value.server_side_encryption, metadata: value.metadata, @@ -93,6 +161,7 @@ impl From for ChunkMetadata { object_lock_mode: value.object_lock_mode, object_lock_retain_until_date: value.object_lock_retain_until_date, object_lock_legal_hold_status: value.object_lock_legal_hold_status, + expires_string: value.expires_string, } } } diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index 06ef809..7bb24ba 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -11,6 +11,7 @@ use aws_sdk_s3::operation::RequestIdExt; /// Object metadata other than the body that can be set from either `GetObject` or `HeadObject` /// In the case of GetObject, some data will be duplicated as part of the first chunk. #[derive(Debug, Clone, Default)] +#[non_exhaustive] pub struct ObjectMetadata { _request_id: Option, _extended_request_id: Option, From b04c4d4ec6ba5f7f5e8ce11c3ffd823637fe989d Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 19 Nov 2024 14:42:18 -0800 Subject: [PATCH 34/48] add comments --- aws-s3-transfer-manager/src/io/aggregated_bytes.rs | 1 - aws-s3-transfer-manager/src/operation/download/handle.rs | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/aws-s3-transfer-manager/src/io/aggregated_bytes.rs b/aws-s3-transfer-manager/src/io/aggregated_bytes.rs index 2a6a0fd..1ba309d 100644 --- a/aws-s3-transfer-manager/src/io/aggregated_bytes.rs +++ b/aws-s3-transfer-manager/src/io/aggregated_bytes.rs @@ -11,7 +11,6 @@ use bytes_utils::SegmentedBuf; use crate::error::ErrorKind; -/// /// Non-contiguous Binary Data Storage /// /// When data is read from the network, it is read in a sequence of chunks that are not in diff --git a/aws-s3-transfer-manager/src/operation/download/handle.rs b/aws-s3-transfer-manager/src/operation/download/handle.rs index e1a8c3c..63689f3 100644 --- a/aws-s3-transfer-manager/src/operation/download/handle.rs +++ b/aws-s3-transfer-manager/src/operation/download/handle.rs @@ -69,6 +69,8 @@ impl DownloadHandle { self.body.close(); self.discovery.await??; + // It's safe to grab the lock here because discovery is already complete, and we will never + // lock tasks again after discovery to spawn more tasks. let mut tasks = self.tasks.lock().await; while let Some(join_result) = tasks.join_next().await { join_result?; From bb837fc0f85a07a89f574d6620901d91271181bd Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 19 Nov 2024 14:45:43 -0800 Subject: [PATCH 35/48] add todo --- aws-s3-transfer-manager/src/operation/download/body.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index 6c44df1..404cbd6 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -37,6 +37,8 @@ pub struct ChunkResponse { pub metadata: ChunkMetadata, } +// TODO: Do we want to expose something to yield multiple chunks in a single call, like +// recv_many/collect, etc.? impl Body { /// Create a new empty body pub fn empty() -> Self { From 6481cd95b582b9b6884c196492930d759e570000 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 19 Nov 2024 14:47:51 -0800 Subject: [PATCH 36/48] fix comments --- aws-s3-transfer-manager/src/operation/download/body.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index 404cbd6..a01e03d 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -31,9 +31,10 @@ pub struct ChunkResponse { // TODO(aws-sdk-rust#1159, design) - consider PartialOrd for ChunkResponse and hiding `seq` as internal only detail // the seq number pub(crate) seq: u64, - /// data: body of the object + /// The content associated with this particular ranged GetObject request. pub data: AggregatedBytes, - /// metadata: metadata returned by the S3. + /// The metadata associated with this particular ranged Get request. This contains all the + /// metadata returned by the S3 GetObject call. pub metadata: ChunkMetadata, } From 947501ab82d293f3f1c6e8eb8683aa5a892b0bc5 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 19 Nov 2024 15:32:54 -0800 Subject: [PATCH 37/48] fmt --- aws-s3-transfer-manager/src/io/aggregated_bytes.rs | 1 - aws-s3-transfer-manager/src/io/mod.rs | 4 ++-- aws-s3-transfer-manager/src/operation/download/body.rs | 2 +- aws-s3-transfer-manager/src/operation/download/chunk_meta.rs | 1 - aws-s3-transfer-manager/src/operation/download/discovery.rs | 4 +++- aws-s3-transfer-manager/src/operation/download/object_meta.rs | 1 - 6 files changed, 6 insertions(+), 7 deletions(-) diff --git a/aws-s3-transfer-manager/src/io/aggregated_bytes.rs b/aws-s3-transfer-manager/src/io/aggregated_bytes.rs index 1ba309d..c8d6293 100644 --- a/aws-s3-transfer-manager/src/io/aggregated_bytes.rs +++ b/aws-s3-transfer-manager/src/io/aggregated_bytes.rs @@ -80,4 +80,3 @@ impl Buf for AggregatedBytes { self.0.copy_to_bytes(len) } } - diff --git a/aws-s3-transfer-manager/src/io/mod.rs b/aws-s3-transfer-manager/src/io/mod.rs index ddb5759..185a660 100644 --- a/aws-s3-transfer-manager/src/io/mod.rs +++ b/aws-s3-transfer-manager/src/io/mod.rs @@ -3,11 +3,11 @@ * SPDX-License-Identifier: Apache-2.0 */ +/// Download Body Type +pub mod aggregated_bytes; pub(crate) mod part_reader; mod path_body; mod stream; -/// Download Body Type -pub mod aggregated_bytes; /// Error types related to I/O abstractions pub mod error; diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index a01e03d..cba1646 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -8,8 +8,8 @@ use std::cmp::Ordering; use std::collections::BinaryHeap; use tokio::sync::mpsc; -use crate::io::aggregated_bytes::AggregatedBytes; use super::chunk_meta::ChunkMetadata; +use crate::io::aggregated_bytes::AggregatedBytes; /// Stream of binary data representing an Amazon S3 Object's contents. /// diff --git a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs index efd002a..66b0375 100644 --- a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs @@ -166,7 +166,6 @@ impl From for ChunkMetadata { } } - impl RequestIdExt for ChunkMetadata { fn extended_request_id(&self) -> Option<&str> { self._extended_request_id.as_deref() diff --git a/aws-s3-transfer-manager/src/operation/download/discovery.rs b/aws-s3-transfer-manager/src/operation/download/discovery.rs index d18c459..ed72136 100644 --- a/aws-s3-transfer-manager/src/operation/download/discovery.rs +++ b/aws-s3-transfer-manager/src/operation/download/discovery.rs @@ -127,7 +127,9 @@ async fn discover_obj_with_head( Some(range) => match range { ByteRange::Inclusive(start, end) => start..=end, ByteRange::AllFrom(start) => start..=object_meta.content_length(), - ByteRange::Last(n) => (object_meta.content_length() - n + 1)..=object_meta.content_length(), + ByteRange::Last(n) => { + (object_meta.content_length() - n + 1)..=object_meta.content_length() + } }, None => 0..=object_meta.content_length(), }; diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index 7bb24ba..bd9516b 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -211,7 +211,6 @@ impl RequestId for ObjectMetadata { } } - #[cfg(test)] mod tests { use super::ObjectMetadata; From 60cbcdc0c34be5e061ca6892d68e33cef037198b Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 19 Nov 2024 15:45:13 -0800 Subject: [PATCH 38/48] update comment --- aws-s3-transfer-manager/src/operation/download/body.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index cba1646..20d618e 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -39,7 +39,8 @@ pub struct ChunkResponse { } // TODO: Do we want to expose something to yield multiple chunks in a single call, like -// recv_many/collect, etc.? +// recv_many/collect, etc.? We can benchmark to see if we get a significant performance boost once +// we have a better scheduler in place. impl Body { /// Create a new empty body pub fn empty() -> Self { From ba32f032fc1297987df7555f5f583f4be32de3e2 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Tue, 19 Nov 2024 16:49:05 -0800 Subject: [PATCH 39/48] add manual debug implementation --- .../src/operation/download/body.rs | 4 +- .../src/operation/download/chunk_meta.rs | 54 ++++++++++++++++++- .../src/operation/download/object_meta.rs | 46 +++++++++++++++- 3 files changed, 99 insertions(+), 5 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index 20d618e..b020c86 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -33,8 +33,8 @@ pub struct ChunkResponse { pub(crate) seq: u64, /// The content associated with this particular ranged GetObject request. pub data: AggregatedBytes, - /// The metadata associated with this particular ranged Get request. This contains all the - /// metadata returned by the S3 GetObject call. + /// The metadata associated with this particular ranged GetObject request. This contains all the + /// metadata returned by the S3 GetObject operation. pub metadata: ChunkMetadata, } diff --git a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs index 66b0375..96ee464 100644 --- a/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/chunk_meta.rs @@ -7,8 +7,8 @@ use aws_sdk_s3::operation::get_object::GetObjectOutput; use aws_sdk_s3::operation::RequestId; use aws_sdk_s3::operation::RequestIdExt; -/// Request metadata, other than the body, that will be set from the `GetObject` request. -#[derive(Debug, Clone, Default)] +/// Chunk Metadata, other than the body, that will be set from the `GetObject` request. +#[derive(Clone, Default)] #[non_exhaustive] pub struct ChunkMetadata { ///

Indicates whether the object retrieved was (true) or was not (false) a Delete Marker. If false, this response header does not appear in the response.

@@ -176,3 +176,53 @@ impl RequestId for ChunkMetadata { self._request_id.as_deref() } } + +impl ::std::fmt::Debug for ChunkMetadata { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + let mut formatter = f.debug_struct("ChunkMetadata"); + formatter.field("delete_marker", &self.delete_marker); + formatter.field("accept_ranges", &self.accept_ranges); + formatter.field("expiration", &self.expiration); + formatter.field("restore", &self.restore); + formatter.field("last_modified", &self.last_modified); + formatter.field("content_length", &self.content_length); + formatter.field("e_tag", &self.e_tag); + formatter.field("checksum_crc32", &self.checksum_crc32); + formatter.field("checksum_crc32_c", &self.checksum_crc32_c); + formatter.field("checksum_sha1", &self.checksum_sha1); + formatter.field("checksum_sha256", &self.checksum_sha256); + formatter.field("missing_meta", &self.missing_meta); + formatter.field("version_id", &self.version_id); + formatter.field("cache_control", &self.cache_control); + formatter.field("content_disposition", &self.content_disposition); + formatter.field("content_encoding", &self.content_encoding); + formatter.field("content_language", &self.content_language); + formatter.field("content_range", &self.content_range); + formatter.field("content_type", &self.content_type); + formatter.field("website_redirect_location", &self.website_redirect_location); + formatter.field("server_side_encryption", &self.server_side_encryption); + formatter.field("metadata", &self.metadata); + formatter.field("sse_customer_algorithm", &self.sse_customer_algorithm); + formatter.field("sse_customer_key_md5", &self.sse_customer_key_md5); + formatter.field("ssekms_key_id", &"*** Sensitive Data Redacted ***"); + formatter.field("bucket_key_enabled", &self.bucket_key_enabled); + formatter.field("storage_class", &self.storage_class); + formatter.field("request_charged", &self.request_charged); + formatter.field("replication_status", &self.replication_status); + formatter.field("parts_count", &self.parts_count); + formatter.field("tag_count", &self.tag_count); + formatter.field("object_lock_mode", &self.object_lock_mode); + formatter.field( + "object_lock_retain_until_date", + &self.object_lock_retain_until_date, + ); + formatter.field( + "object_lock_legal_hold_status", + &self.object_lock_legal_hold_status, + ); + formatter.field("expires_string", &self.expires_string); + formatter.field("_extended_request_id", &self._extended_request_id); + formatter.field("_request_id", &self._request_id); + formatter.finish() + } +} diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index bd9516b..f33b0c0 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -10,7 +10,7 @@ use aws_sdk_s3::operation::RequestIdExt; /// Object metadata other than the body that can be set from either `GetObject` or `HeadObject` /// In the case of GetObject, some data will be duplicated as part of the first chunk. -#[derive(Debug, Clone, Default)] +#[derive(Clone, Default)] #[non_exhaustive] pub struct ObjectMetadata { _request_id: Option, @@ -211,6 +211,50 @@ impl RequestId for ObjectMetadata { } } +impl ::std::fmt::Debug for ObjectMetadata { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + let mut formatter = f.debug_struct("ObjectMetadata"); + formatter.field("delete_marker", &self.delete_marker); + formatter.field("expiration", &self.expiration); + formatter.field("restore", &self.restore); + formatter.field("last_modified", &self.last_modified); + formatter.field("content_length", &self.content_length); + formatter.field("e_tag", &self.e_tag); + formatter.field("missing_meta", &self.missing_meta); + formatter.field("version_id", &self.version_id); + formatter.field("cache_control", &self.cache_control); + formatter.field("content_disposition", &self.content_disposition); + formatter.field("content_encoding", &self.content_encoding); + formatter.field("content_language", &self.content_language); + formatter.field("content_range", &self.content_range); + formatter.field("content_type", &self.content_type); + formatter.field("website_redirect_location", &self.website_redirect_location); + formatter.field("server_side_encryption", &self.server_side_encryption); + formatter.field("metadata", &self.metadata); + formatter.field("sse_customer_algorithm", &self.sse_customer_algorithm); + formatter.field("sse_customer_key_md5", &self.sse_customer_key_md5); + formatter.field("ssekms_key_id", &"*** Sensitive Data Redacted ***"); + formatter.field("bucket_key_enabled", &self.bucket_key_enabled); + formatter.field("storage_class", &self.storage_class); + formatter.field("request_charged", &self.request_charged); + formatter.field("replication_status", &self.replication_status); + formatter.field("parts_count", &self.parts_count); + formatter.field("object_lock_mode", &self.object_lock_mode); + formatter.field( + "object_lock_retain_until_date", + &self.object_lock_retain_until_date, + ); + formatter.field( + "object_lock_legal_hold_status", + &self.object_lock_legal_hold_status, + ); + formatter.field("expires_string", &self.expires_string); + formatter.field("_extended_request_id", &self._extended_request_id); + formatter.field("_request_id", &self._request_id); + formatter.finish() + } +} + #[cfg(test)] mod tests { use super::ObjectMetadata; From bb43ed2c101339a85323f1733ae87aa193070666 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Wed, 20 Nov 2024 10:26:03 -0800 Subject: [PATCH 40/48] rename ChunkResponse -> ChunkOutput --- .../src/operation/download.rs | 8 +++---- .../src/operation/download/body.rs | 24 +++++++++---------- .../src/operation/download/service.rs | 12 +++++----- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 502a8d5..45b2fa1 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -28,7 +28,7 @@ use crate::error; use crate::io::aggregated_bytes::AggregatedBytes; use crate::runtime::scheduler::OwnedWorkPermit; use aws_smithy_types::byte_stream::ByteStream; -use body::{Body, ChunkResponse}; +use body::{Body, ChunkOutput}; use chunk_meta::ChunkMetadata; use discovery::discover_obj; use object_meta::ObjectMetadata; @@ -94,7 +94,7 @@ impl Download { async fn send_discovery( tasks: Arc>>, ctx: DownloadContext, - comp_tx: mpsc::Sender>, + comp_tx: mpsc::Sender>, object_meta_tx: oneshot::Sender, input: DownloadInput, use_current_span_as_parent_for_tasks: bool, @@ -158,7 +158,7 @@ fn handle_discovery_chunk( tasks: &mut task::JoinSet<()>, ctx: DownloadContext, initial_chunk: Option, - completed: &mpsc::Sender>, + completed: &mpsc::Sender>, permit: OwnedWorkPermit, parent_span_for_tasks: tracing::Span, metadata: Option, @@ -171,7 +171,7 @@ fn handle_discovery_chunk( tasks.spawn(async move { let chunk = AggregatedBytes::from_byte_stream(stream) .await - .map(|aggregated| ChunkResponse { + .map(|aggregated| ChunkOutput { seq, data: aggregated, metadata: metadata.expect("chunk metadata is available"), diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index b020c86..cb52d01 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -21,13 +21,13 @@ pub struct Body { sequencer: Sequencer, } -type BodyChannel = mpsc::Receiver>; +type BodyChannel = mpsc::Receiver>; /// Contains body and metadata for each GetObject call made. This will be delivered sequentially /// in-order. #[derive(Debug, Clone)] #[non_exhaustive] -pub struct ChunkResponse { +pub struct ChunkOutput { // TODO(aws-sdk-rust#1159, design) - consider PartialOrd for ChunkResponse and hiding `seq` as internal only detail // the seq number pub(crate) seq: u64, @@ -71,7 +71,7 @@ impl Body { /// Returns [None] when there is no more data. /// Chunks returned from a [Body] are guaranteed to be sequenced /// in the right order. - pub async fn next(&mut self) -> Option> { + pub async fn next(&mut self) -> Option> { loop { if self.sequencer.is_ordered() { break; @@ -114,11 +114,11 @@ impl Sequencer { } } - fn push(&mut self, chunk: ChunkResponse) { + fn push(&mut self, chunk: ChunkOutput) { self.chunks.push(cmp::Reverse(SequencedChunk(chunk))) } - fn pop(&mut self) -> Option { + fn pop(&mut self) -> Option { self.chunks.pop().map(|c| c.0 .0) } @@ -131,7 +131,7 @@ impl Sequencer { next.unwrap().seq == self.next_seq } - fn peek(&self) -> Option<&ChunkResponse> { + fn peek(&self) -> Option<&ChunkOutput> { self.chunks.peek().map(|c| &c.0 .0) } @@ -141,7 +141,7 @@ impl Sequencer { } #[derive(Debug)] -struct SequencedChunk(ChunkResponse); +struct SequencedChunk(ChunkOutput); impl Ord for SequencedChunk { fn cmp(&self, other: &Self) -> Ordering { @@ -165,7 +165,7 @@ impl PartialEq for SequencedChunk { /// A body that returns chunks in whatever order they are received. #[derive(Debug)] pub(crate) struct UnorderedBody { - chunks: Option>>, + chunks: Option>>, } impl UnorderedBody { @@ -179,7 +179,7 @@ impl UnorderedBody { /// Chunks returned from an [UnorderedBody] are not guaranteed to be sorted /// in the right order. Consumers are expected to sort the data themselves /// using the chunk sequence number (starting from zero). - pub(crate) async fn next(&mut self) -> Option> { + pub(crate) async fn next(&mut self) -> Option> { match self.chunks.as_mut() { None => None, Some(ch) => ch.recv().await, @@ -196,15 +196,15 @@ impl UnorderedBody { #[cfg(test)] mod tests { - use crate::{error, operation::download::body::ChunkResponse}; + use crate::{error, operation::download::body::ChunkOutput}; use bytes::Bytes; use bytes_utils::SegmentedBuf; use tokio::sync::mpsc; use super::{AggregatedBytes, Body, Sequencer}; - fn chunk_resp(seq: u64, data: AggregatedBytes) -> ChunkResponse { - ChunkResponse { + fn chunk_resp(seq: u64, data: AggregatedBytes) -> ChunkOutput { + ChunkOutput { seq, data, metadata: Default::default(), diff --git a/aws-s3-transfer-manager/src/operation/download/service.rs b/aws-s3-transfer-manager/src/operation/download/service.rs index 20a748e..de9bcfc 100644 --- a/aws-s3-transfer-manager/src/operation/download/service.rs +++ b/aws-s3-transfer-manager/src/operation/download/service.rs @@ -18,7 +18,7 @@ use tokio::task; use tower::{service_fn, Service, ServiceBuilder, ServiceExt}; use tracing::Instrument; -use super::body::ChunkResponse; +use super::body::ChunkOutput; use super::{DownloadInput, DownloadInputBuilder}; /// Request/input type for our "chunk" service. @@ -45,7 +45,7 @@ fn next_chunk( /// handler (service fn) for a single chunk async fn download_chunk_handler( request: DownloadChunkRequest, -) -> Result { +) -> Result { let seq: u64 = request.ctx.next_seq(); // the rest of the work is in its own fn, so we can log `seq` in the tracing span @@ -57,7 +57,7 @@ async fn download_chunk_handler( async fn download_specific_chunk( request: DownloadChunkRequest, seq: u64, -) -> Result { +) -> Result { let ctx = request.ctx; let part_size = ctx.handle.download_part_size_bytes(); let input = next_chunk( @@ -83,7 +83,7 @@ async fn download_specific_chunk( )) .await?; - Ok(ChunkResponse { + Ok(ChunkOutput { seq, data: body, metadata: resp.into(), @@ -93,7 +93,7 @@ async fn download_specific_chunk( /// Create a new tower::Service for downloading individual chunks of an object from S3 pub(super) fn chunk_service( ctx: &DownloadContext, -) -> impl Service +) -> impl Service + Clone + Send { let svc = service_fn(download_chunk_handler); @@ -120,7 +120,7 @@ pub(super) fn distribute_work( remaining: RangeInclusive, input: DownloadInput, start_seq: u64, - comp_tx: mpsc::Sender>, + comp_tx: mpsc::Sender>, parent_span_for_tasks: tracing::Span, ) { let svc = chunk_service(&ctx); From dcd6013b8619299122dfa737f3ff4ca25c874012 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Wed, 20 Nov 2024 10:46:32 -0800 Subject: [PATCH 41/48] better docs --- aws-s3-transfer-manager/src/config.rs | 2 +- aws-s3-transfer-manager/src/io/aggregated_bytes.rs | 4 ++-- aws-s3-transfer-manager/src/operation/download.rs | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/aws-s3-transfer-manager/src/config.rs b/aws-s3-transfer-manager/src/config.rs index c6b137e..a728959 100644 --- a/aws-s3-transfer-manager/src/config.rs +++ b/aws-s3-transfer-manager/src/config.rs @@ -128,7 +128,7 @@ impl Builder { self } - /// Consumes the builder and constructs a [`Config`](crate::config::Config) + /// Consumes the builder and constructs a [`Config`] pub fn build(self) -> Config { Config { multipart_threshold: self.multipart_threshold_part_size, diff --git a/aws-s3-transfer-manager/src/io/aggregated_bytes.rs b/aws-s3-transfer-manager/src/io/aggregated_bytes.rs index c8d6293..e470657 100644 --- a/aws-s3-transfer-manager/src/io/aggregated_bytes.rs +++ b/aws-s3-transfer-manager/src/io/aggregated_bytes.rs @@ -14,9 +14,9 @@ use crate::error::ErrorKind; /// Non-contiguous Binary Data Storage /// /// When data is read from the network, it is read in a sequence of chunks that are not in -/// contiguous memory. [`AggregatedBytes`](crate::byte_stream::AggregatedBytes) provides a view of +/// contiguous memory. [`AggregatedBytes`] provides a view of /// this data via [`impl Buf`](bytes::Buf) or it can be copied into contiguous storage with -/// [`.into_bytes()`](crate::byte_stream::AggregatedBytes::into_bytes). +/// [`.into_bytes()`](crate::io::aggregated_bytes::AggregatedBytes::into_bytes). #[derive(Debug, Clone)] pub struct AggregatedBytes(pub(crate) SegmentedBuf); diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 45b2fa1..249a84f 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -20,8 +20,10 @@ mod handle; pub use handle::DownloadHandle; use tracing::Instrument; -mod chunk_meta; -mod object_meta; +/// Provides metadata for each chunk during an object download. +pub mod chunk_meta; +/// Provides metadata for a single S3 object during download. +pub mod object_meta; mod service; use crate::error; From 6ec3a3f1ad7ce82b11c44cc0b6a89e6c4b9ab6bc Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Mon, 25 Nov 2024 09:41:54 -0800 Subject: [PATCH 42/48] use pub use for aggregated bytes --- aws-s3-transfer-manager/src/io/mod.rs | 3 ++- aws-s3-transfer-manager/src/operation/download.rs | 2 +- aws-s3-transfer-manager/src/operation/download/body.rs | 3 ++- aws-s3-transfer-manager/src/operation/download/service.rs | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/aws-s3-transfer-manager/src/io/mod.rs b/aws-s3-transfer-manager/src/io/mod.rs index 185a660..63ca5e1 100644 --- a/aws-s3-transfer-manager/src/io/mod.rs +++ b/aws-s3-transfer-manager/src/io/mod.rs @@ -4,7 +4,7 @@ */ /// Download Body Type -pub mod aggregated_bytes; +mod aggregated_bytes; pub(crate) mod part_reader; mod path_body; mod stream; @@ -17,3 +17,4 @@ mod size_hint; pub use self::path_body::PathBodyBuilder; pub use self::size_hint::SizeHint; pub use self::stream::InputStream; +pub use self::aggregated_bytes::AggregatedBytes; diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 249a84f..12a3fe6 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -27,7 +27,7 @@ pub mod object_meta; mod service; use crate::error; -use crate::io::aggregated_bytes::AggregatedBytes; +use crate::io::AggregatedBytes; use crate::runtime::scheduler::OwnedWorkPermit; use aws_smithy_types::byte_stream::ByteStream; use body::{Body, ChunkOutput}; diff --git a/aws-s3-transfer-manager/src/operation/download/body.rs b/aws-s3-transfer-manager/src/operation/download/body.rs index cb52d01..e5348c9 100644 --- a/aws-s3-transfer-manager/src/operation/download/body.rs +++ b/aws-s3-transfer-manager/src/operation/download/body.rs @@ -8,8 +8,9 @@ use std::cmp::Ordering; use std::collections::BinaryHeap; use tokio::sync::mpsc; +use crate::io::AggregatedBytes; + use super::chunk_meta::ChunkMetadata; -use crate::io::aggregated_bytes::AggregatedBytes; /// Stream of binary data representing an Amazon S3 Object's contents. /// diff --git a/aws-s3-transfer-manager/src/operation/download/service.rs b/aws-s3-transfer-manager/src/operation/download/service.rs index de9bcfc..ef20736 100644 --- a/aws-s3-transfer-manager/src/operation/download/service.rs +++ b/aws-s3-transfer-manager/src/operation/download/service.rs @@ -4,7 +4,7 @@ */ use crate::error; use crate::http::header; -use crate::io::aggregated_bytes::AggregatedBytes; +use crate::io::AggregatedBytes; use crate::middleware::limit::concurrency::ConcurrencyLimitLayer; use crate::middleware::retry; use crate::operation::download::DownloadContext; From bf99d44d5991c1846aa6921cd49ba54b8b9db70e Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Mon, 25 Nov 2024 10:03:38 -0800 Subject: [PATCH 43/48] update docs --- aws-s3-transfer-manager/src/operation/download.rs | 5 +++-- .../src/operation/download/object_meta.rs | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 12a3fe6..88ca604 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -56,8 +56,8 @@ impl Download { /// /// If `use_current_span_as_parent_for_tasks` is false, spawned tasks will /// "follow from" the current span, but be under their own root of the trace tree. - /// Use this for `TransferManager.download().send()`, where the spawned tasks - /// should NOT extend the life of the current `send()` span. + /// Use this for `TransferManager.download().initiate()`, where the spawned tasks + /// should NOT extend the life of the current `initiate()` span. pub(crate) fn orchestrate( handle: Arc, input: crate::operation::download::DownloadInput, @@ -118,6 +118,7 @@ async fn send_discovery( // make initial discovery about the object size, metadata, possibly first chunk let mut discovery = discover_obj(&ctx, &input).await?; + // This will only fail if the handle has already been dropped. let _ = object_meta_tx.send(discovery.object_meta); let initial_chunk = discovery.initial_chunk.take(); diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index f33b0c0..beac8f7 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -124,6 +124,9 @@ impl ObjectMetadata { } } +// The `GetObjectOutput` is used to create both `object_meta` and `chunk_meta`. +// We take a reference here instead of ownership because `GetObjectOutput` is not cloneable +// due to containing the body. impl From<&GetObjectOutput> for ObjectMetadata { fn from(value: &GetObjectOutput) -> Self { Self { From 49cd55d1b15720f4ab7a58b787b3d63b96b0f0af Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Mon, 25 Nov 2024 10:39:14 -0800 Subject: [PATCH 44/48] use pub use instead of pub mod --- aws-s3-transfer-manager/src/operation/download.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 88ca604..9899ccc 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -21,9 +21,12 @@ pub use handle::DownloadHandle; use tracing::Instrument; /// Provides metadata for each chunk during an object download. -pub mod chunk_meta; +mod chunk_meta; +pub use chunk_meta::ChunkMetadata; /// Provides metadata for a single S3 object during download. -pub mod object_meta; +mod object_meta; +pub use object_meta::ObjectMetadata; + mod service; use crate::error; @@ -31,9 +34,7 @@ use crate::io::AggregatedBytes; use crate::runtime::scheduler::OwnedWorkPermit; use aws_smithy_types::byte_stream::ByteStream; use body::{Body, ChunkOutput}; -use chunk_meta::ChunkMetadata; use discovery::discover_obj; -use object_meta::ObjectMetadata; use service::distribute_work; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; From 566a50e9f87e0487028c3e621bbb28c279a87a55 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Mon, 25 Nov 2024 10:39:26 -0800 Subject: [PATCH 45/48] fmt --- aws-s3-transfer-manager/src/io/mod.rs | 7 +++---- aws-s3-transfer-manager/src/operation/download.rs | 2 +- .../src/operation/download/object_meta.rs | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/aws-s3-transfer-manager/src/io/mod.rs b/aws-s3-transfer-manager/src/io/mod.rs index d2e08f8..7edcca6 100644 --- a/aws-s3-transfer-manager/src/io/mod.rs +++ b/aws-s3-transfer-manager/src/io/mod.rs @@ -3,11 +3,10 @@ * SPDX-License-Identifier: Apache-2.0 */ - -/// Download Body Type -mod aggregated_bytes; /// Adapters for other IO library traits to map to `InputStream` pub mod adapters; +/// Download Body Type +mod aggregated_bytes; mod buffer; pub(crate) mod part_reader; mod path_body; @@ -18,11 +17,11 @@ pub mod error; mod size_hint; // re-exports +pub use self::aggregated_bytes::AggregatedBytes; pub(crate) use self::buffer::Buffer; pub use self::path_body::PathBodyBuilder; pub use self::size_hint::SizeHint; pub use self::stream::InputStream; -pub use self::aggregated_bytes::AggregatedBytes; pub use self::stream::PartData; pub use self::stream::PartStream; pub use self::stream::StreamContext; diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 9899ccc..77c8ac8 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -119,7 +119,7 @@ async fn send_discovery( // make initial discovery about the object size, metadata, possibly first chunk let mut discovery = discover_obj(&ctx, &input).await?; - // This will only fail if the handle has already been dropped. + // This will only fail if the handle has already been dropped. let _ = object_meta_tx.send(discovery.object_meta); let initial_chunk = discovery.initial_chunk.take(); diff --git a/aws-s3-transfer-manager/src/operation/download/object_meta.rs b/aws-s3-transfer-manager/src/operation/download/object_meta.rs index beac8f7..ff421e8 100644 --- a/aws-s3-transfer-manager/src/operation/download/object_meta.rs +++ b/aws-s3-transfer-manager/src/operation/download/object_meta.rs @@ -124,8 +124,8 @@ impl ObjectMetadata { } } -// The `GetObjectOutput` is used to create both `object_meta` and `chunk_meta`. -// We take a reference here instead of ownership because `GetObjectOutput` is not cloneable +// The `GetObjectOutput` is used to create both `object_meta` and `chunk_meta`. +// We take a reference here instead of ownership because `GetObjectOutput` is not cloneable // due to containing the body. impl From<&GetObjectOutput> for ObjectMetadata { fn from(value: &GetObjectOutput) -> Self { From 06fc9cbb68cbd0f99fad9705d7c2a98bb916a57c Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Mon, 25 Nov 2024 10:44:24 -0800 Subject: [PATCH 46/48] external types --- aws-s3-transfer-manager/external-types.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aws-s3-transfer-manager/external-types.toml b/aws-s3-transfer-manager/external-types.toml index b42fd32..e372912 100644 --- a/aws-s3-transfer-manager/external-types.toml +++ b/aws-s3-transfer-manager/external-types.toml @@ -7,4 +7,6 @@ allowed_external_types = [ "tokio::io::async_read::AsyncRead", "bytes::bytes::Bytes", "bytes::buf::buf_impl::Buf", + "aws_types::request_id::RequestId", + "aws_types::request_id::RequestIdExt" ] From 0246b9ca6c18da7bbb466c51fc4c5e6d3e3abd4a Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Mon, 25 Nov 2024 13:17:30 -0800 Subject: [PATCH 47/48] Add fixme --- aws-s3-transfer-manager/src/operation/download.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index 77c8ac8..f04387b 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -119,9 +119,9 @@ async fn send_discovery( // make initial discovery about the object size, metadata, possibly first chunk let mut discovery = discover_obj(&ctx, &input).await?; - // This will only fail if the handle has already been dropped. + // FIXME - This will fail if the handle is dropped at this point. We should handle + // the cancellation gracefully here. let _ = object_meta_tx.send(discovery.object_meta); - let initial_chunk = discovery.initial_chunk.take(); let mut tasks = tasks.lock().await; From 6376db143422a560564141ad57cb5775b71f7ffc Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Khan Date: Mon, 25 Nov 2024 13:22:01 -0800 Subject: [PATCH 48/48] fmt --- aws-s3-transfer-manager/src/operation/download.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-s3-transfer-manager/src/operation/download.rs b/aws-s3-transfer-manager/src/operation/download.rs index f04387b..df03fc3 100644 --- a/aws-s3-transfer-manager/src/operation/download.rs +++ b/aws-s3-transfer-manager/src/operation/download.rs @@ -119,7 +119,7 @@ async fn send_discovery( // make initial discovery about the object size, metadata, possibly first chunk let mut discovery = discover_obj(&ctx, &input).await?; - // FIXME - This will fail if the handle is dropped at this point. We should handle + // FIXME - This will fail if the handle is dropped at this point. We should handle // the cancellation gracefully here. let _ = object_meta_tx.send(discovery.object_meta); let initial_chunk = discovery.initial_chunk.take();