diff --git a/examples/memcpy_bench/Cargo.toml b/examples/memcpy_bench/Cargo.toml new file mode 100644 index 00000000..d7ce4b2a --- /dev/null +++ b/examples/memcpy_bench/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "psp-memcpy-bench" +version = "0.1.0" +authors = ["Paul Sajna "] +edition = "2021" + +[dependencies] +psp = { path = "../../psp" } + +[profile.release] +debug=true diff --git a/examples/memcpy_bench/src/main.rs b/examples/memcpy_bench/src/main.rs new file mode 100644 index 00000000..aa03548e --- /dev/null +++ b/examples/memcpy_bench/src/main.rs @@ -0,0 +1,124 @@ +#![no_std] +#![no_main] + +extern crate alloc; +use alloc::alloc::Layout; +use alloc::format; +use core::time::Duration; +use core::ffi::c_void; +use psp::sys::SceUid; + +psp::module!("sample_module", 1, 1); + +fn psp_main() { + psp::enable_home_button(); + + // Enable the VFPU + //unsafe { + //use psp::sys::{self, ThreadAttributes}; + //sys::sceKernelChangeCurrentThreadAttr(0, ThreadAttributes::VFPU); + //} + + let iters: [usize; 11] = [16, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1]; + let sizes: [usize; 11] = [32,64,512,1024,2048,16348,32768,65536,131072,524288,1048576]; + + let mut cpu_dur: Duration; + let mut kernel_dur: Duration; + let mut dmac_dur: Duration; + let mut vfpu_dur: Duration; + + let fd = unsafe { psp::sys::sceIoOpen(b"host0:/results.txt\0".as_ptr(), psp::sys::IoOpenFlags::CREAT | psp::sys::IoOpenFlags::RD_WR, 0o777) }; + + for i in 0..11 { + let size = sizes[i]; + let iterations = iters[i]; + let src = unsafe { alloc::alloc::alloc(Layout::from_size_align_unchecked(size, 16)) }; + let dst = unsafe { alloc::alloc::alloc(Layout::from_size_align_unchecked(size, 16)) }; + + let src = unsafe { core::mem::transmute::<*mut u8, *mut u32>(src) }; + let dst = unsafe { core::mem::transmute::<*mut u8, *mut u32>(dst) }; + + unsafe { psp::sys::sceKernelMemset(src, 0xAA, size) }; + + let src = unsafe { core::mem::transmute::<*mut u32, *mut u8>(src) }; + let dst = unsafe { core::mem::transmute::<*mut u32, *mut u8>(dst) }; + + + cpu_dur = psp::benchmark(|| { + for _ in 0..iterations { + unsafe { memcpy(dst, src as *const u8, size); } + } + }, 10); + assert_eq!(unsafe { *dst }, 0xAA); + + + let src = unsafe { core::mem::transmute::<*mut u8, *mut u32>(src) }; + let dst = unsafe { core::mem::transmute::<*mut u8, *mut u32>(dst) }; + + unsafe { psp::sys::sceKernelMemset(src, 0x00, size) }; + + unsafe { psp::sys::sceKernelMemset(src, 0xAA, size) }; + kernel_dur = psp::benchmark(|| { + for _ in 0..iterations { + unsafe { psp::sys::sceKernelMemcpy(dst, src, size); } + } + }, 10); + assert_eq!(unsafe { *dst }, 0xAA); + unsafe { psp::sys::sceKernelMemset(src, 0x00, size) }; + + unsafe { psp::sys::sceKernelMemset(src, 0xAA, size) }; + dmac_dur = psp::benchmark(|| { + for _ in 0..iterations { + unsafe { psp::sys::sceDmacMemcpy(dst, src, size); } + } + }, 10); + assert_eq!(unsafe { *dst }, 0xAA); + unsafe { psp::sys::sceKernelMemset(src, 0x00, size) }; + + vfpu_dur = Duration::new(0, 0); + + //unsafe { psp::sys::sceKernelMemset(src, 0xAA, size) }; + //vfpu_dur = psp::benchmark(|| { + //for _ in 0..iterations { + //unsafe { psp::sys::sceVfpuMemcpy(dst, src as *const u8, size); } + //} + //}, 10); + //assert_eq!(unsafe { *dst }, 0xAA); + //unsafe { psp::sys::sceKernelMemset(src, 0x00, size) }; + + let src = unsafe { core::mem::transmute::<*mut u32, *mut u8>(src) }; + let dst = unsafe { core::mem::transmute::<*mut u32, *mut u8>(dst) }; + + unsafe { alloc::alloc::dealloc(src, Layout::from_size_align_unchecked(size, 16)); } + unsafe { alloc::alloc::dealloc(dst, Layout::from_size_align_unchecked(size, 16)); } + + let output = format!( + "size: {} bytes +iterations: {} +cpu: {} microseconds +kernel: {} microseconds +dmac: {} microseconds +vfpu: {} microseconds\n\n", + size, iterations, cpu_dur.as_micros(), + kernel_dur.as_micros(), dmac_dur.as_micros(), + vfpu_dur.as_micros() + ); + write_to_fd(fd, output); + } + unsafe { psp::sys::sceIoClose(fd) }; +} + +fn write_to_fd(fd: SceUid, msg: alloc::string::String) { + + unsafe { + psp::sys::sceIoWrite( + fd, + msg.as_str().as_bytes().as_ptr() as *const u8 as *const c_void, + msg.len() + ) + }; +} + +extern "C" { + fn memcpy(dst: *mut u8, src: *const u8, num: usize) -> *mut u8; +} diff --git a/psp/src/sys/dmac.rs b/psp/src/sys/dmac.rs new file mode 100644 index 00000000..cc5ae7fc --- /dev/null +++ b/psp/src/sys/dmac.rs @@ -0,0 +1,8 @@ +psp_extern! { + #![name = "sceDmac"] + #![flags = 0x4001] + #![version = (0x00, 0x11)] + + #[psp(0x617F3FE6)] + pub fn sceDmacMemcpy(dst: *mut u32, src: *const u32, size: usize) -> i32; +} diff --git a/psp/src/sys/kernel/mod.rs b/psp/src/sys/kernel/mod.rs index 9e1574ae..b37db359 100644 --- a/psp/src/sys/kernel/mod.rs +++ b/psp/src/sys/kernel/mod.rs @@ -645,8 +645,8 @@ psp_extern! { psp_extern! { #![name = "Kernel_Library"] - #![flags = 0x0001] - #![version = (0x00, 0x00)] + #![flags = 0x0011] + #![version = (0x00, 0x01)] #[psp(0x092968F4)] /// Suspend all interrupts. @@ -691,6 +691,12 @@ psp_extern! { /// /// 1 if interrupts are currently enabled. pub fn sceKernelIsCpuIntrEnable() -> i32; + + #[psp(0x1839852A)] + pub fn sceKernelMemcpy(dst: *mut u32, src: *const u32, num: usize) -> *mut u32; + + #[psp(0xA089ECA4)] + pub fn sceKernelMemset(dst: *mut u32, val: u32, num: usize) -> *mut u32; } #[repr(C)] diff --git a/psp/src/sys/mod.rs b/psp/src/sys/mod.rs index 9eb65656..66a94e23 100644 --- a/psp/src/sys/mod.rs +++ b/psp/src/sys/mod.rs @@ -107,6 +107,12 @@ pub use font::*; mod psmf; pub use psmf::*; +mod dmac; +pub use dmac::*; + +mod vfpu; +pub use vfpu::*; + // These are not found (likely because this was tested in user mode on a PSP-2000). // pub mod sircs; // pub mod codec; diff --git a/psp/src/sys/vfpu.rs b/psp/src/sys/vfpu.rs new file mode 100644 index 00000000..e90ec176 --- /dev/null +++ b/psp/src/sys/vfpu.rs @@ -0,0 +1,65 @@ +//#[no_mangle] +//pub unsafe extern "C" fn sceVfpuMemcpy( + //dst: *mut u8, + //src: *const u8, + //size: usize, +//) -> *mut u8 { + //if size == 0 { + //return dst + //} + + //let mut size = size; + //let mut dst8 = dst; + //let mut src8 = src; + + //if ((src8 as u32)&0xF) == 0 //Both src and dst are 16byte aligned + //{ + //while size > 63 { + //vfpu_asm!( + //lv.q C000, 0(a1); + //lv.q C010, 16(a1); + //lv.q C020, 32(a1); + //lv.q C030, 48(a1); + //sv.q C000, 0(a0); + //sv.q C010, 16(a0); + //sv.q C020, 32(a0); + //sv.q C030, 48(a0); + //: : "{4}"(dst8), "{5}"(src8), "{6}"(size) : "memory" : "volatile" + //); + //dst8 = dst8.add(64); + //src8 = src8.add(64); + //size = size.saturating_sub(64); + //} + + //while size > 15 { + //vfpu_asm!( + //lv.q C000, 0(a1); + //sv.q C000, 0(a0); + //: : "{4}"(dst8), "{5}"(src8), "{6}"(size) : "memory" : "volatile" + //) + //dst8 = dst8.add(16); + //src8 = src8.add(16); + //size = size.saturating_sub(16); + //} + + //let mut dst32 = dst8 as *mut u32; + //let mut src32 = src8 as *const u32; + + //while size > 3 { + //*dst32 = *src32; + //dst32 = dst32.add(1); + //src32 = src32.add(1); + //size = size.saturating_sub(4); + //} + + //while size > 0 { + //*dst8 = *src8; + //dst8 = dst8.add(1); + //src8 = src8.add(1); + //size = size.saturating_sub(1); + //} + //dst + //} else { + //panic!("Unaligned vfpu memcpy"); + //} +//}