diff --git a/src/memory/device/device_box.rs b/src/memory/device/device_box.rs index 3b7fd4b..2748c06 100644 --- a/src/memory/device/device_box.rs +++ b/src/memory/device/device_box.rs @@ -2,8 +2,8 @@ use crate::error::{CudaResult, DropResult, ToResult}; use crate::memory::device::AsyncCopyDestination; use crate::memory::device::CopyDestination; use crate::memory::malloc::{cuda_free, cuda_malloc}; -use crate::memory::DeviceCopy; use crate::memory::DevicePointer; +use crate::memory::{DeviceCopy, LockedBox}; use crate::stream::Stream; use std::fmt::{self, Pointer}; use std::mem; @@ -345,6 +345,35 @@ impl AsyncCopyDestination> for DeviceBox { Ok(()) } } +impl AsyncCopyDestination> for DeviceBox { + unsafe fn async_copy_from(&mut self, val: &LockedBox, stream: &Stream) -> CudaResult<()> { + let size = mem::size_of::(); + if size != 0 { + cuda_driver_sys::cuMemcpyHtoDAsync_v2( + self.ptr.as_raw_mut() as u64, + val.as_ptr() as *const c_void, + size, + stream.as_inner(), + ) + .to_result()? + } + Ok(()) + } + + unsafe fn async_copy_to(&self, val: &mut LockedBox, stream: &Stream) -> CudaResult<()> { + let size = mem::size_of::(); + if size != 0 { + cuda_driver_sys::cuMemcpyDtoHAsync_v2( + val.as_mut_ptr() as *mut c_void, + self.ptr.as_raw() as u64, + size, + stream.as_inner(), + ) + .to_result()? + } + Ok(()) + } +} #[cfg(test)] mod test_device_box { diff --git a/src/memory/locked.rs b/src/memory/locked.rs index c3e3676..23ed490 100644 --- a/src/memory/locked.rs +++ b/src/memory/locked.rs @@ -1,11 +1,305 @@ use super::DeviceCopy; use crate::error::*; use crate::memory::malloc::{cuda_free_locked, cuda_malloc_locked}; +use core::fmt; +use std::borrow::{Borrow, BorrowMut}; +use std::cmp::Ordering; +use std::fmt::{Display, Pointer}; +use std::hash::{Hash, Hasher}; use std::mem; -use std::ops; +use std::ops::{Deref, DerefMut}; use std::ptr; use std::slice; +/// A pointer type for heap-allocation in CUDA page-locked memory. +/// +/// See the [`module-level documentation`](../memory/index.html) for more information on page-locked memory. +#[derive(Debug)] +pub struct LockedBox { + pub(crate) ptr: *mut T, +} +impl LockedBox { + /// Allocate page-locked memory and place val into it. + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Errors + /// + /// If a CUDA error occurs, return the error. + /// + /// # Examples + /// + /// ``` + /// # let _context = rustacuda::quick_init().unwrap(); + /// use rustacuda::memory::*; + /// let five = LockedBox::new(5).unwrap(); + /// ``` + pub fn new(val: T) -> CudaResult { + let locked_box = unsafe { LockedBox::uninitialized()? }; + unsafe { core::ptr::write(locked_box.ptr, val) }; + Ok(locked_box) + } +} +impl LockedBox { + /// Allocate page-locked memory, but do not initialize it. + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Safety + /// + /// Since the backing memory is not initialized, this function is not safe. The caller must + /// ensure that the backing memory is set to a valid value before it is read, else undefined + /// behavior may occur. + /// + /// # Examples + /// + /// ``` + /// # let _context = rustacuda::quick_init().unwrap(); + /// use rustacuda::memory::*; + /// let mut five = unsafe { LockedBox::uninitialized().unwrap() }; + /// *five = 5u64; + /// ``` + pub unsafe fn uninitialized() -> CudaResult { + if mem::size_of::() == 0 { + Ok(LockedBox { + ptr: core::ptr::null_mut(), + }) + } else { + let ptr = cuda_malloc_locked(1)?; + Ok(LockedBox { ptr }) + } + } + + /// Constructs a LockedBox from a **page-locked** raw pointer. + /// + /// After calling this function, the raw pointer and the memory it points to is owned by the + /// LockedBox. The LockedBox destructor will free the allocated memory, but will not call the destructor + /// of `T`. This function may accept any pointer produced by the `cuMemAllocManaged` CUDA API + /// call. + /// + /// # Safety + /// + /// This function is unsafe because improper use may lead to memory problems. For example, a + /// double free may occur if this function is called twice on the same pointer, or a segfault + /// may occur if the pointer is not one returned by the appropriate API call. + /// + /// Additionally, this function has the additional requirement that the pointer must be page-locked. + /// + /// # Examples + /// + /// ``` + /// # let _context = rustacuda::quick_init().unwrap(); + /// use rustacuda::memory::*; + /// let x = LockedBox::new(5).unwrap(); + /// let ptr = LockedBox::into_raw(x); + /// let x = unsafe { LockedBox::from_raw(ptr) }; + /// ``` + pub unsafe fn from_raw(ptr: *mut T) -> Self { + LockedBox { ptr } + } + + /// Consumes the LockedBox, returning a pointer to the underlying data. + /// + /// After calling this function, the caller is responsible for the memory previously managed by + /// the LockedBox. In particular, the caller should properly destroy T and deallocate the memory. + /// The easiest way to do so is to create a new LockedBox using the `LockedBox::from_raw` function. + /// + /// Note: This is an associated function, which means that you have to all it as + /// `LockedBox::into_raw(b)` instead of `b.into_raw()` This is so that there is no conflict with + /// a method on the inner type. + /// + /// # Examples + /// + /// ``` + /// # let _context = rustacuda::quick_init().unwrap(); + /// use rustacuda::memory::*; + /// let x = LockedBox::new(5).unwrap(); + /// let ptr = LockedBox::into_raw(x); + /// # unsafe { LockedBox::from_raw(ptr) }; + /// ``` + #[allow(clippy::wrong_self_convention)] + pub fn into_raw(mut b: LockedBox) -> *mut T { + let ptr = mem::replace(&mut b.ptr, core::ptr::null_mut()); + mem::forget(b); + ptr + } + + /// Consumes and leaks the LockedBox, returning a mutable reference, &'a mut T. Note that the type T + /// must outlive the chosen lifetime 'a. If the type has only static references, or none at all, + /// this may be chosen to be 'static. + /// + /// This is mainly useful for data that lives for the remainder of the program's life. Dropping + /// the returned reference will cause a memory leak. If this is not acceptable, the reference + /// should be wrapped with the LockedBox::from_raw function to produce a new LockedBox. This LockedBox can then + /// be dropped, which will properly destroy T and release the allocated memory. + /// + /// Note: This is an associated function, which means that you have to all it as + /// `LockedBox::leak(b)` instead of `b.leak()` This is so that there is no conflict with + /// a method on the inner type. + pub fn leak<'a>(b: LockedBox) -> &'a mut T + where + T: 'a, + { + unsafe { &mut *LockedBox::into_raw(b) } + } + + /// Returns the contained pointer without consuming the box. + /// + /// # Examples + /// + /// ``` + /// # let _context = rustacuda::quick_init().unwrap(); + /// use rustacuda::memory::*; + /// let mut x = LockedBox::new(5).unwrap(); + /// let ptr = x.as_ptr(); + /// println!("{:p}", ptr); + /// ``` + pub fn as_ptr(&self) -> *const T { + self.ptr + } + + /// Returns the contained mutable pointer without consuming the box. + /// + /// # Examples + /// + /// ``` + /// # let _context = rustacuda::quick_init().unwrap(); + /// use rustacuda::memory::*; + /// let mut x = LockedBox::new(5).unwrap(); + /// let ptr = x.as_mut_ptr(); + /// println!("{:p}", ptr); + /// ``` + pub fn as_mut_ptr(&mut self) -> *mut T { + self.ptr + } + + /// Destroy a `LockedBox`, returning an error. + /// + /// Deallocating locked memory can return errors from previous asynchronous work. This function + /// destroys the given box and returns the error and the un-destroyed box on failure. + /// + /// # Example + /// + /// ``` + /// # let _context = rustacuda::quick_init().unwrap(); + /// use rustacuda::memory::*; + /// let x = LockedBox::new(5).unwrap(); + /// match LockedBox::drop(x) { + /// Ok(()) => println!("Successfully destroyed"), + /// Err((e, locked_box)) => { + /// println!("Failed to destroy box: {:?}", e); + /// // Do something with locked_box + /// }, + /// } + /// ``` + pub fn drop(mut locked_box: LockedBox) -> DropResult> { + if locked_box.ptr.is_null() { + return Ok(()); + } + + let ptr = mem::replace(&mut locked_box.ptr, core::ptr::null_mut()); + unsafe { + match cuda_free_locked(ptr) { + Ok(()) => { + mem::forget(locked_box); + Ok(()) + } + Err(e) => Err((e, LockedBox { ptr })), + } + } + } +} +impl Drop for LockedBox { + fn drop(&mut self) { + if self.ptr.is_null() { + return; + } + + let ptr = mem::replace(&mut self.ptr, core::ptr::null_mut()); + // No choice but to panic if this fails. + unsafe { + cuda_free_locked(ptr).expect("Failed to deallocate CUDA memory."); + } + } +} +impl crate::private::Sealed for LockedBox {} + +impl Borrow for LockedBox { + fn borrow(&self) -> &T { + &**self + } +} +impl BorrowMut for LockedBox { + fn borrow_mut(&mut self) -> &mut T { + &mut **self + } +} +impl AsRef for LockedBox { + fn as_ref(&self) -> &T { + &**self + } +} +impl AsMut for LockedBox { + fn as_mut(&mut self) -> &mut T { + &mut **self + } +} +impl Deref for LockedBox { + type Target = T; + + fn deref(&self) -> &T { + unsafe { &*self.ptr } + } +} +impl DerefMut for LockedBox { + fn deref_mut(&mut self) -> &mut T { + unsafe { &mut *self.ptr } + } +} +impl Display for LockedBox { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} +impl Pointer for LockedBox { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Pointer::fmt(&self.ptr, f) + } +} +impl PartialEq for LockedBox { + fn eq(&self, other: &LockedBox) -> bool { + PartialEq::eq(&**self, &**other) + } +} +impl Eq for LockedBox {} +impl PartialOrd for LockedBox { + fn partial_cmp(&self, other: &LockedBox) -> Option { + PartialOrd::partial_cmp(&**self, &**other) + } + fn lt(&self, other: &LockedBox) -> bool { + PartialOrd::lt(&**self, &**other) + } + fn le(&self, other: &LockedBox) -> bool { + PartialOrd::le(&**self, &**other) + } + fn ge(&self, other: &LockedBox) -> bool { + PartialOrd::ge(&**self, &**other) + } + fn gt(&self, other: &LockedBox) -> bool { + PartialOrd::gt(&**self, &**other) + } +} +impl Ord for LockedBox { + fn cmp(&self, other: &LockedBox) -> Ordering { + Ord::cmp(&**self, &**other) + } +} +impl Hash for LockedBox { + fn hash(&self, state: &mut H) { + (**self).hash(state); + } +} + /// Fixed-size host-side buffer in page-locked memory. /// /// See the [`module-level documentation`](../memory/index.html) for more details on page-locked @@ -233,7 +527,7 @@ impl AsMut<[T]> for LockedBuffer { self } } -impl ops::Deref for LockedBuffer { +impl Deref for LockedBuffer { type Target = [T]; fn deref(&self) -> &[T] { @@ -243,7 +537,7 @@ impl ops::Deref for LockedBuffer { } } } -impl ops::DerefMut for LockedBuffer { +impl DerefMut for LockedBuffer { fn deref_mut(&mut self) -> &mut [T] { unsafe { let ptr = self.buf;