From 33ce29673bed2139a0cffd10ef61716fbfcde183 Mon Sep 17 00:00:00 2001 From: Clemens Lutz Date: Mon, 4 Mar 2019 20:37:38 +0100 Subject: [PATCH] Add CUDA Event --- src/event.rs | 411 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 2 files changed, 412 insertions(+) create mode 100644 src/event.rs diff --git a/src/event.rs b/src/event.rs new file mode 100644 index 0000000..476845e --- /dev/null +++ b/src/event.rs @@ -0,0 +1,411 @@ +//! Events to track status and dependencies, and measure duration of work +//! submitted to a CUDA stream. +//! +//! In CUDA, most work is performed asynchronously. Events help to manage tasks +//! scheduled on an asynchronous stream. This includes waiting for a task (or +//! multiple tasks) to complete, and measuring the time duration it takes to +//! complete a task. Events can also be used to sequence tasks on multiple +//! streams within the same context by specifying dependent tasks (not supported +//! yet by RustaCUDA). +//! +//! Events may be reused multiple times. + +use crate::cuda_sys::cuda::{ + cuEventCreate, cuEventDestroy_v2, cuEventElapsedTime, cuEventQuery, cuEventRecord, + cuEventSynchronize, CUevent, +}; +use crate::error::{CudaError, CudaResult, DropResult, ToResult}; +use crate::stream::Stream; + +use std::mem; +use std::ptr; + +bitflags! { + /// Bit flags for configuring a CUDA Event. + /// + /// Nvidia claims that setting `DISABLE_TIMING` and `BLOCKING_SYNC` provides + /// the best performance for `query()` and `stream.wait_event()`. + pub struct EventFlags: u32 { + /// The default event creation flag. + const DEFAULT = 0x0; + + /// Specify that the created event should busy-wait on blocking + /// function calls. + const BLOCKING_SYNC = 0x1; + + /// Specify that the created event does not need to record timing data. + const DISABLE_TIMING = 0x2; + + /// Specify that the created event may be used as an interprocess event. + /// (not supported yet by RustaCUDA). This flag requires + /// `DISABLE_TIMING` to be set as well. + const INTERPROCESS = 0x4; + } +} + +/// Status enum that represents the current status of an event. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum EventStatus { + /// Ready indicates that all work captured by the event has been completed. + /// + /// NVidia states that for Unified Memory, `EventStatus::Ready` is + /// equivalent to having called `Event::synchronize`. + Ready, + + /// `EventStatus::NotReady` indicates that the work captured by the event is still + /// incomplete. + NotReady, +} + +/// An event to track work submitted to a stream. +/// +/// See the module-level documentation for more information. +#[derive(Debug)] +pub struct Event(CUevent); + +impl Event { + /// Create a new event with the specified flags. + /// + /// # Example: + /// + /// ``` + /// # use rustacuda::quick_init; + /// # use std::error::Error; + /// # fn main() -> Result<(), Box> { + /// # let _context = quick_init()?; + /// use rustacuda::event::{Event, EventFlags}; + /// + /// // With default settings + /// let event = Event::new(EventFlags::DEFAULT)?; + /// # Ok(()) + /// # } + /// ``` + pub fn new(flags: EventFlags) -> CudaResult { + unsafe { + let mut event: CUevent = mem::zeroed(); + cuEventCreate(&mut event, flags.bits()).to_result()?; + Ok(Event(event)) + } + } + + /// Record the state of a stream at the time of the call. + /// + /// This function is used together with `query`, `synchronize`, and + /// `elapsed_time_f32`. See the respective function for more information. + /// + /// If the event is created with `EventFlags::BLOCKING_SYNC`, then `record` + /// blocks until the event has actually been recorded. + /// + /// # Errors + /// + /// If the event and stream are not from the same context, an error is + /// returned. + /// + /// # Example + /// + /// ``` + /// # use rustacuda::quick_init; + /// # use rustacuda::stream::{Stream, StreamFlags}; + /// # use std::error::Error; + /// # fn main() -> Result<(), Box> { + /// # let _context = quick_init()?; + /// use rustacuda::event::{Event, EventFlags}; + /// + /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; + /// let event = Event::new(EventFlags::DEFAULT)?; + /// + /// // do some work ... + /// + /// event.record(&stream)?; + /// # Ok(()) + /// } + /// ``` + pub fn record(&self, stream: &Stream) -> CudaResult<()> { + unsafe { + cuEventRecord(self.0, stream.as_inner()).to_result()?; + Ok(()) + } + } + + /// Return the current status of the event. + /// + /// The status captures the contents of the stream underlying the event. If + /// the stream is empty, `query` will return `EventStatus::Ready`. + /// + /// # Example + /// + /// ``` + /// # use rustacuda::quick_init; + /// # use rustacuda::stream::{Stream, StreamFlags}; + /// # use std::error::Error; + /// # fn main() -> Result<(), Box> { + /// # let _context = quick_init()?; + /// use rustacuda::event::{Event, EventFlags, EventStatus}; + /// + /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; + /// let event = Event::new(EventFlags::DEFAULT)?; + /// + /// // do some work ... + /// + /// // record an event + /// event.record(&stream)?; + /// + /// // ... wait some time ... + /// # event.synchronize()?; + /// + /// // query if the work is finished + /// let status = event.query()?; + /// assert_eq!(status, EventStatus::Ready); + /// # Ok(()) + /// } + /// ``` + pub fn query(&self) -> CudaResult { + let result = unsafe { cuEventQuery(self.0).to_result() }; + + match result { + Ok(()) => Ok(EventStatus::Ready), + Err(CudaError::NotReady) => Ok(EventStatus::NotReady), + Err(other) => Err(other), + } + } + + /// Wait for an event to complete. + /// + /// Blocks thread execution until all work submitted before the event was + /// recorded has completed. `EventFlags::BLOCKING_SYNC` controls the mode of + /// blocking. If the flag is set on event creation, the thread will sleep. + /// Otherwise, the thread will busy-wait. + /// + /// # Example + /// + /// ``` + /// # use rustacuda::quick_init; + /// # use rustacuda::stream::{Stream, StreamFlags}; + /// # use std::error::Error; + /// # fn main() -> Result<(), Box> { + /// # let _context = quick_init()?; + /// use rustacuda::event::{Event, EventFlags}; + /// + /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; + /// let event = Event::new(EventFlags::DEFAULT)?; + /// + /// // do some work ... + /// + /// // record an event + /// event.record(&stream)?; + /// + /// // wait until the work is finished + /// event.synchronize()?; + /// # Ok(()) + /// } + /// ``` + pub fn synchronize(&self) -> CudaResult<()> { + unsafe { + cuEventSynchronize(self.0).to_result()?; + Ok(()) + } + } + + /// Return the duration between two events. + /// + /// The duration is computed in milliseconds with a resulution of + /// approximatly 0.5 microseconds. As `record` takes place asynchronously, + /// other work can be scheduled in between the events, thus yieling an + /// unexpectedly long duration. + /// + /// # Errors + /// + /// `CudaError::NotReady` is returned if either event is not yet complete. + /// + /// `CudaError::InvalidHandle` is returned if + /// - the two events are not from the same context, or if + /// - `record` has not been called on either event, or if + /// - the `DISABLE_TIMING` flag is set on either event. + /// + /// # Example + /// + /// ``` + /// # use rustacuda::quick_init; + /// # use rustacuda::stream::{Stream, StreamFlags}; + /// # use rustacuda::launch; + /// # use rustacuda::module::Module; + /// # use rustacuda::memory::DeviceBox; + /// # use std::error::Error; + /// # use std::ffi::CString; + /// # fn main() -> Result<(), Box> { + /// # let _context = quick_init()?; + /// # let module_data = CString::new(include_str!("../resources/add.ptx"))?; + /// # let module = Module::load_from_string(&module_data)?; + /// # let mut x = DeviceBox::new(&10.0f32)?; + /// # let mut y = DeviceBox::new(&20.0f32)?; + /// # let mut result = DeviceBox::new(&0.0f32)?; + /// use rustacuda::event::{Event, EventFlags}; + /// + /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; + /// let start_event = Event::new(EventFlags::DEFAULT)?; + /// let stop_event = Event::new(EventFlags::DEFAULT)?; + /// + /// // start recording time + /// start_event.record(&stream)?; + /// + /// // do some work ... + /// # unsafe { + /// # launch!(module.sum<<<1, 1, 0, stream>>>( + /// # x.as_device_ptr(), + /// # y.as_device_ptr(), + /// # result.as_device_ptr(), + /// # 1 // Length + /// # ))?; + /// # } + /// + /// // stop recording time + /// stop_event.record(&stream)?; + /// + /// // wait for the work to complete + /// stop_event.synchronize()?; + /// + /// // compute the time elapsed between the start and stop events + /// let time = stop_event.elapsed_time_f32(&start_event)?; + /// + /// # assert!(time > 0.0); + /// # Ok(()) + /// } + /// ``` + pub fn elapsed_time_f32(&self, start: &Self) -> CudaResult { + unsafe { + let mut millis: f32 = 0.0; + cuEventElapsedTime(&mut millis, start.0, self.0).to_result()?; + Ok(millis) + } + } + + /// Destroy an `Event` returning an error. + /// + /// Destroying an event can return errors from previous asynchronous work. + /// This function destroys the given event and returns the error and the + /// un-destroyed event on failure. + /// + /// # Example: + /// + /// ``` + /// # use rustacuda::*; + /// # use std::error::Error; + /// # fn main() -> Result<(), Box> { + /// # let _context = quick_init()?; + /// use rustacuda::event::{Event, EventFlags}; + /// + /// let event = Event::new(EventFlags::DEFAULT)?; + /// match Event::drop(event) { + /// Ok(()) => println!("Successfully destroyed"), + /// Err((cuda_error, event)) => { + /// println!("Failed to destroy event: {:?}", cuda_error); + /// // Do something with event + /// }, + /// } + /// # Ok(()) + /// # } + /// ``` + pub fn drop(mut event: Event) -> DropResult { + if event.0.is_null() { + return Ok(()); + } + + unsafe { + let inner = mem::replace(&mut event.0, ptr::null_mut()); + match cuEventDestroy_v2(inner).to_result() { + Ok(()) => { + mem::forget(event); + Ok(()) + } + Err(e) => Err((e, Event(inner))), + } + } + } +} + +impl Drop for Event { + fn drop(&mut self) { + unsafe { cuEventDestroy_v2(self.0) } + .to_result() + .expect("Failed to destroy CUDA event"); + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::quick_init; + use crate::stream::StreamFlags; + use std::error::Error; + + #[test] + fn test_new_with_flags() -> Result<(), Box> { + let _context = quick_init()?; + let _event = Event::new(EventFlags::BLOCKING_SYNC | EventFlags::DISABLE_TIMING)?; + Ok(()) + } + + #[test] + fn test_record_with_wrong_context() -> Result<(), Box> { + let _context = quick_init()?; + let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; + let _new_context = quick_init()?; + let event = Event::new(EventFlags::DEFAULT)?; + let result = event.record(&stream); + assert_eq!(result, Err(CudaError::InvalidHandle)); + Ok(()) + } + + #[test] + fn test_elapsed_time_f32_with_wrong_context() -> Result<(), Box> { + let _context = quick_init()?; + let fst_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; + let fst_event = Event::new(EventFlags::DEFAULT)?; + fst_event.record(&fst_stream)?; + + let _context = quick_init()?; + let snd_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; + let snd_event = Event::new(EventFlags::DEFAULT)?; + snd_event.record(&snd_stream)?; + + fst_event.synchronize()?; + snd_event.synchronize()?; + let result = snd_event.elapsed_time_f32(&fst_event); + assert_eq!(result, Err(CudaError::InvalidHandle)); + Ok(()) + } + + #[test] + fn test_elapsed_time_f32_with_different_streams() -> Result<(), Box> { + let _context = quick_init()?; + let fst_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; + let fst_event = Event::new(EventFlags::DEFAULT)?; + fst_event.record(&fst_stream)?; + + let snd_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; + let snd_event = Event::new(EventFlags::DEFAULT)?; + snd_event.record(&snd_stream)?; + + fst_event.synchronize()?; + snd_event.synchronize()?; + let _result = snd_event.elapsed_time_f32(&fst_event)?; + Ok(()) + } + + #[test] + fn test_elapsed_time_f32_with_disable_timing() -> Result<(), Box> { + let _context = quick_init()?; + let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; + + let start_event = Event::new(EventFlags::DISABLE_TIMING)?; + start_event.record(&stream)?; + + let stop_event = Event::new(EventFlags::DEFAULT)?; + stop_event.record(&stream)?; + + stop_event.synchronize()?; + let result = stop_event.elapsed_time_f32(&start_event); + assert_eq!(result, Err(CudaError::InvalidHandle)); + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 47ab986..fb51bbd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -164,6 +164,7 @@ pub use rustacuda_derive::*; pub mod context; pub mod device; pub mod error; +pub mod event; pub mod function; pub mod memory; pub mod module;