Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LockedBox<T> and implement AsyncCopyDestination for DeviceBox<T> #70

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion src/memory/device/device_box.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use crate::error::{CudaResult, DropResult, ToResult};
use crate::memory::device::AsyncCopyDestination;
use crate::memory::device::CopyDestination;
use crate::memory::malloc::{cuda_free, cuda_malloc};
use crate::memory::DeviceCopy;
use crate::memory::DevicePointer;
use crate::memory::{DeviceCopy, LockedBox};
use crate::stream::Stream;
use std::fmt::{self, Pointer};
use std::mem;
Expand Down Expand Up @@ -345,6 +345,35 @@ impl<T: DeviceCopy> AsyncCopyDestination<DeviceBox<T>> for DeviceBox<T> {
Ok(())
}
}
impl<T: DeviceCopy> AsyncCopyDestination<LockedBox<T>> for DeviceBox<T> {
unsafe fn async_copy_from(&mut self, val: &LockedBox<T>, stream: &Stream) -> CudaResult<()> {
let size = mem::size_of::<T>();
if size != 0 {
cuda_driver_sys::cuMemcpyHtoDAsync_v2(
self.ptr.as_raw_mut() as u64,
val.as_ptr() as *const c_void,
size,
stream.as_inner(),
)
.to_result()?
}
Ok(())
}

unsafe fn async_copy_to(&self, val: &mut LockedBox<T>, stream: &Stream) -> CudaResult<()> {
let size = mem::size_of::<T>();
if size != 0 {
cuda_driver_sys::cuMemcpyDtoHAsync_v2(
val.as_mut_ptr() as *mut c_void,
self.ptr.as_raw() as u64,
size,
stream.as_inner(),
)
.to_result()?
}
Ok(())
}
}

#[cfg(test)]
mod test_device_box {
Expand Down
300 changes: 297 additions & 3 deletions src/memory/locked.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,305 @@
use super::DeviceCopy;
use crate::error::*;
use crate::memory::malloc::{cuda_free_locked, cuda_malloc_locked};
use core::fmt;
use std::borrow::{Borrow, BorrowMut};
use std::cmp::Ordering;
use std::fmt::{Display, Pointer};
use std::hash::{Hash, Hasher};
use std::mem;
use std::ops;
use std::ops::{Deref, DerefMut};
use std::ptr;
use std::slice;

/// A pointer type for heap-allocation in CUDA page-locked memory.
///
/// See the [`module-level documentation`](../memory/index.html) for more information on page-locked memory.
#[derive(Debug)]
pub struct LockedBox<T> {
pub(crate) ptr: *mut T,
}
impl<T: Copy> LockedBox<T> {
/// Allocate page-locked memory and place val into it.
///
/// This doesn't actually allocate if `T` is zero-sized.
///
/// # Errors
///
/// If a CUDA error occurs, return the error.
///
/// # Examples
///
/// ```
/// # let _context = rustacuda::quick_init().unwrap();
/// use rustacuda::memory::*;
/// let five = LockedBox::new(5).unwrap();
/// ```
pub fn new(val: T) -> CudaResult<Self> {
let locked_box = unsafe { LockedBox::uninitialized()? };
unsafe { core::ptr::write(locked_box.ptr, val) };
Ok(locked_box)
}
}
impl<T> LockedBox<T> {
/// Allocate page-locked memory, but do not initialize it.
///
/// This doesn't actually allocate if `T` is zero-sized.
///
/// # Safety
///
/// Since the backing memory is not initialized, this function is not safe. The caller must
/// ensure that the backing memory is set to a valid value before it is read, else undefined
/// behavior may occur.
///
/// # Examples
///
/// ```
/// # let _context = rustacuda::quick_init().unwrap();
/// use rustacuda::memory::*;
/// let mut five = unsafe { LockedBox::uninitialized().unwrap() };
/// *five = 5u64;
/// ```
pub unsafe fn uninitialized() -> CudaResult<Self> {
if mem::size_of::<T>() == 0 {
Ok(LockedBox {
ptr: core::ptr::null_mut(),
})
} else {
let ptr = cuda_malloc_locked(1)?;
Ok(LockedBox { ptr })
}
}

/// Constructs a LockedBox from a **page-locked** raw pointer.
///
/// After calling this function, the raw pointer and the memory it points to is owned by the
/// LockedBox. The LockedBox destructor will free the allocated memory, but will not call the destructor
/// of `T`. This function may accept any pointer produced by the `cuMemAllocManaged` CUDA API
/// call.
///
/// # Safety
///
/// This function is unsafe because improper use may lead to memory problems. For example, a
/// double free may occur if this function is called twice on the same pointer, or a segfault
/// may occur if the pointer is not one returned by the appropriate API call.
///
/// Additionally, this function has the additional requirement that the pointer must be page-locked.
///
/// # Examples
///
/// ```
/// # let _context = rustacuda::quick_init().unwrap();
/// use rustacuda::memory::*;
/// let x = LockedBox::new(5).unwrap();
/// let ptr = LockedBox::into_raw(x);
/// let x = unsafe { LockedBox::from_raw(ptr) };
/// ```
pub unsafe fn from_raw(ptr: *mut T) -> Self {
LockedBox { ptr }
}

/// Consumes the LockedBox, returning a pointer to the underlying data.
///
/// After calling this function, the caller is responsible for the memory previously managed by
/// the LockedBox. In particular, the caller should properly destroy T and deallocate the memory.
/// The easiest way to do so is to create a new LockedBox using the `LockedBox::from_raw` function.
///
/// Note: This is an associated function, which means that you have to all it as
/// `LockedBox::into_raw(b)` instead of `b.into_raw()` This is so that there is no conflict with
/// a method on the inner type.
///
/// # Examples
///
/// ```
/// # let _context = rustacuda::quick_init().unwrap();
/// use rustacuda::memory::*;
/// let x = LockedBox::new(5).unwrap();
/// let ptr = LockedBox::into_raw(x);
/// # unsafe { LockedBox::from_raw(ptr) };
/// ```
#[allow(clippy::wrong_self_convention)]
pub fn into_raw(mut b: LockedBox<T>) -> *mut T {
let ptr = mem::replace(&mut b.ptr, core::ptr::null_mut());
mem::forget(b);
ptr
}

/// Consumes and leaks the LockedBox, returning a mutable reference, &'a mut T. Note that the type T
/// must outlive the chosen lifetime 'a. If the type has only static references, or none at all,
/// this may be chosen to be 'static.
///
/// This is mainly useful for data that lives for the remainder of the program's life. Dropping
/// the returned reference will cause a memory leak. If this is not acceptable, the reference
/// should be wrapped with the LockedBox::from_raw function to produce a new LockedBox. This LockedBox can then
/// be dropped, which will properly destroy T and release the allocated memory.
///
/// Note: This is an associated function, which means that you have to all it as
/// `LockedBox::leak(b)` instead of `b.leak()` This is so that there is no conflict with
/// a method on the inner type.
pub fn leak<'a>(b: LockedBox<T>) -> &'a mut T
where
T: 'a,
{
unsafe { &mut *LockedBox::into_raw(b) }
}

/// Returns the contained pointer without consuming the box.
///
/// # Examples
///
/// ```
/// # let _context = rustacuda::quick_init().unwrap();
/// use rustacuda::memory::*;
/// let mut x = LockedBox::new(5).unwrap();
/// let ptr = x.as_ptr();
/// println!("{:p}", ptr);
/// ```
pub fn as_ptr(&self) -> *const T {
self.ptr
}

/// Returns the contained mutable pointer without consuming the box.
///
/// # Examples
///
/// ```
/// # let _context = rustacuda::quick_init().unwrap();
/// use rustacuda::memory::*;
/// let mut x = LockedBox::new(5).unwrap();
/// let ptr = x.as_mut_ptr();
/// println!("{:p}", ptr);
/// ```
pub fn as_mut_ptr(&mut self) -> *mut T {
self.ptr
}

/// Destroy a `LockedBox`, returning an error.
///
/// Deallocating locked memory can return errors from previous asynchronous work. This function
/// destroys the given box and returns the error and the un-destroyed box on failure.
///
/// # Example
///
/// ```
/// # let _context = rustacuda::quick_init().unwrap();
/// use rustacuda::memory::*;
/// let x = LockedBox::new(5).unwrap();
/// match LockedBox::drop(x) {
/// Ok(()) => println!("Successfully destroyed"),
/// Err((e, locked_box)) => {
/// println!("Failed to destroy box: {:?}", e);
/// // Do something with locked_box
/// },
/// }
/// ```
pub fn drop(mut locked_box: LockedBox<T>) -> DropResult<LockedBox<T>> {
if locked_box.ptr.is_null() {
return Ok(());
}

let ptr = mem::replace(&mut locked_box.ptr, core::ptr::null_mut());
unsafe {
match cuda_free_locked(ptr) {
Ok(()) => {
mem::forget(locked_box);
Ok(())
}
Err(e) => Err((e, LockedBox { ptr })),
}
}
}
}
impl<T> Drop for LockedBox<T> {
fn drop(&mut self) {
if self.ptr.is_null() {
return;
}

let ptr = mem::replace(&mut self.ptr, core::ptr::null_mut());
// No choice but to panic if this fails.
unsafe {
cuda_free_locked(ptr).expect("Failed to deallocate CUDA memory.");
}
}
}
impl<T> crate::private::Sealed for LockedBox<T> {}

impl<T: DeviceCopy> Borrow<T> for LockedBox<T> {
fn borrow(&self) -> &T {
&**self
}
}
impl<T: DeviceCopy> BorrowMut<T> for LockedBox<T> {
fn borrow_mut(&mut self) -> &mut T {
&mut **self
}
}
impl<T: DeviceCopy> AsRef<T> for LockedBox<T> {
fn as_ref(&self) -> &T {
&**self
}
}
impl<T: DeviceCopy> AsMut<T> for LockedBox<T> {
fn as_mut(&mut self) -> &mut T {
&mut **self
}
}
impl<T: DeviceCopy> Deref for LockedBox<T> {
type Target = T;

fn deref(&self) -> &T {
unsafe { &*self.ptr }
}
}
impl<T: DeviceCopy> DerefMut for LockedBox<T> {
fn deref_mut(&mut self) -> &mut T {
unsafe { &mut *self.ptr }
}
}
impl<T: Display + DeviceCopy> Display for LockedBox<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&**self, f)
}
}
impl<T: DeviceCopy> Pointer for LockedBox<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Pointer::fmt(&self.ptr, f)
}
}
impl<T: DeviceCopy + PartialEq> PartialEq for LockedBox<T> {
fn eq(&self, other: &LockedBox<T>) -> bool {
PartialEq::eq(&**self, &**other)
}
}
impl<T: DeviceCopy + Eq> Eq for LockedBox<T> {}
impl<T: DeviceCopy + PartialOrd> PartialOrd for LockedBox<T> {
fn partial_cmp(&self, other: &LockedBox<T>) -> Option<Ordering> {
PartialOrd::partial_cmp(&**self, &**other)
}
fn lt(&self, other: &LockedBox<T>) -> bool {
PartialOrd::lt(&**self, &**other)
}
fn le(&self, other: &LockedBox<T>) -> bool {
PartialOrd::le(&**self, &**other)
}
fn ge(&self, other: &LockedBox<T>) -> bool {
PartialOrd::ge(&**self, &**other)
}
fn gt(&self, other: &LockedBox<T>) -> bool {
PartialOrd::gt(&**self, &**other)
}
}
impl<T: DeviceCopy + Ord> Ord for LockedBox<T> {
fn cmp(&self, other: &LockedBox<T>) -> Ordering {
Ord::cmp(&**self, &**other)
}
}
impl<T: DeviceCopy + Hash> Hash for LockedBox<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
(**self).hash(state);
}
}

/// Fixed-size host-side buffer in page-locked memory.
///
/// See the [`module-level documentation`](../memory/index.html) for more details on page-locked
Expand Down Expand Up @@ -233,7 +527,7 @@ impl<T: DeviceCopy> AsMut<[T]> for LockedBuffer<T> {
self
}
}
impl<T: DeviceCopy> ops::Deref for LockedBuffer<T> {
impl<T: DeviceCopy> Deref for LockedBuffer<T> {
type Target = [T];

fn deref(&self) -> &[T] {
Expand All @@ -243,7 +537,7 @@ impl<T: DeviceCopy> ops::Deref for LockedBuffer<T> {
}
}
}
impl<T: DeviceCopy> ops::DerefMut for LockedBuffer<T> {
impl<T: DeviceCopy> DerefMut for LockedBuffer<T> {
fn deref_mut(&mut self) -> &mut [T] {
unsafe {
let ptr = self.buf;
Expand Down