Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

alloc: mem{cpy, set, move} improvements #75

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions examples/memcpy_bench/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "psp-memcpy-bench"
version = "0.1.0"
authors = ["Paul Sajna <[email protected]>"]
edition = "2021"

[dependencies]
psp = { path = "../../psp" }

[profile.release]
debug=true
124 changes: 124 additions & 0 deletions examples/memcpy_bench/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#![no_std]
#![no_main]

extern crate alloc;
use alloc::alloc::Layout;
use alloc::format;
use core::time::Duration;
use core::ffi::c_void;
use psp::sys::SceUid;

psp::module!("sample_module", 1, 1);

fn psp_main() {
psp::enable_home_button();

// Enable the VFPU
//unsafe {
//use psp::sys::{self, ThreadAttributes};
//sys::sceKernelChangeCurrentThreadAttr(0, ThreadAttributes::VFPU);
//}

let iters: [usize; 11] = [16, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1];
let sizes: [usize; 11] = [32,64,512,1024,2048,16348,32768,65536,131072,524288,1048576];

let mut cpu_dur: Duration;
let mut kernel_dur: Duration;
let mut dmac_dur: Duration;
let mut vfpu_dur: Duration;

let fd = unsafe { psp::sys::sceIoOpen(b"host0:/results.txt\0".as_ptr(), psp::sys::IoOpenFlags::CREAT | psp::sys::IoOpenFlags::RD_WR, 0o777) };

for i in 0..11 {
let size = sizes[i];
let iterations = iters[i];
let src = unsafe { alloc::alloc::alloc(Layout::from_size_align_unchecked(size, 16)) };
let dst = unsafe { alloc::alloc::alloc(Layout::from_size_align_unchecked(size, 16)) };

let src = unsafe { core::mem::transmute::<*mut u8, *mut u32>(src) };
let dst = unsafe { core::mem::transmute::<*mut u8, *mut u32>(dst) };

unsafe { psp::sys::sceKernelMemset(src, 0xAA, size) };

let src = unsafe { core::mem::transmute::<*mut u32, *mut u8>(src) };
let dst = unsafe { core::mem::transmute::<*mut u32, *mut u8>(dst) };


cpu_dur = psp::benchmark(|| {
for _ in 0..iterations {
unsafe { memcpy(dst, src as *const u8, size); }
}
}, 10);
assert_eq!(unsafe { *dst }, 0xAA);


let src = unsafe { core::mem::transmute::<*mut u8, *mut u32>(src) };
let dst = unsafe { core::mem::transmute::<*mut u8, *mut u32>(dst) };

unsafe { psp::sys::sceKernelMemset(src, 0x00, size) };

unsafe { psp::sys::sceKernelMemset(src, 0xAA, size) };
kernel_dur = psp::benchmark(|| {
for _ in 0..iterations {
unsafe { psp::sys::sceKernelMemcpy(dst, src, size); }
}
}, 10);
assert_eq!(unsafe { *dst }, 0xAA);
unsafe { psp::sys::sceKernelMemset(src, 0x00, size) };

unsafe { psp::sys::sceKernelMemset(src, 0xAA, size) };
dmac_dur = psp::benchmark(|| {
for _ in 0..iterations {
unsafe { psp::sys::sceDmacMemcpy(dst, src, size); }
}
}, 10);
assert_eq!(unsafe { *dst }, 0xAA);
unsafe { psp::sys::sceKernelMemset(src, 0x00, size) };

vfpu_dur = Duration::new(0, 0);

//unsafe { psp::sys::sceKernelMemset(src, 0xAA, size) };
//vfpu_dur = psp::benchmark(|| {
//for _ in 0..iterations {
//unsafe { psp::sys::sceVfpuMemcpy(dst, src as *const u8, size); }
//}
//}, 10);
//assert_eq!(unsafe { *dst }, 0xAA);
//unsafe { psp::sys::sceKernelMemset(src, 0x00, size) };

let src = unsafe { core::mem::transmute::<*mut u32, *mut u8>(src) };
let dst = unsafe { core::mem::transmute::<*mut u32, *mut u8>(dst) };

unsafe { alloc::alloc::dealloc(src, Layout::from_size_align_unchecked(size, 16)); }
unsafe { alloc::alloc::dealloc(dst, Layout::from_size_align_unchecked(size, 16)); }

let output = format!(
"size: {} bytes
iterations: {}
cpu: {} microseconds
kernel: {} microseconds
dmac: {} microseconds
vfpu: {} microseconds\n\n",
size, iterations, cpu_dur.as_micros(),
kernel_dur.as_micros(), dmac_dur.as_micros(),
vfpu_dur.as_micros()
);
write_to_fd(fd, output);
}
unsafe { psp::sys::sceIoClose(fd) };
}

fn write_to_fd(fd: SceUid, msg: alloc::string::String) {

unsafe {
psp::sys::sceIoWrite(
fd,
msg.as_str().as_bytes().as_ptr() as *const u8 as *const c_void,
msg.len()
)
};
}

extern "C" {
fn memcpy(dst: *mut u8, src: *const u8, num: usize) -> *mut u8;
}
8 changes: 8 additions & 0 deletions psp/src/sys/dmac.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
psp_extern! {
#![name = "sceDmac"]
#![flags = 0x4001]
#![version = (0x00, 0x11)]

#[psp(0x617F3FE6)]
pub fn sceDmacMemcpy(dst: *mut u32, src: *const u32, size: usize) -> i32;
}
10 changes: 8 additions & 2 deletions psp/src/sys/kernel/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -645,8 +645,8 @@ psp_extern! {

psp_extern! {
#![name = "Kernel_Library"]
#![flags = 0x0001]
#![version = (0x00, 0x00)]
#![flags = 0x0011]
#![version = (0x00, 0x01)]

#[psp(0x092968F4)]
/// Suspend all interrupts.
Expand Down Expand Up @@ -691,6 +691,12 @@ psp_extern! {
///
/// 1 if interrupts are currently enabled.
pub fn sceKernelIsCpuIntrEnable() -> i32;

#[psp(0x1839852A)]
pub fn sceKernelMemcpy(dst: *mut u32, src: *const u32, num: usize) -> *mut u32;

#[psp(0xA089ECA4)]
pub fn sceKernelMemset(dst: *mut u32, val: u32, num: usize) -> *mut u32;
}

#[repr(C)]
Expand Down
6 changes: 6 additions & 0 deletions psp/src/sys/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ pub use font::*;
mod psmf;
pub use psmf::*;

mod dmac;
pub use dmac::*;

mod vfpu;
pub use vfpu::*;

// These are not found (likely because this was tested in user mode on a PSP-2000).
// pub mod sircs;
// pub mod codec;
Expand Down
65 changes: 65 additions & 0 deletions psp/src/sys/vfpu.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
//#[no_mangle]
//pub unsafe extern "C" fn sceVfpuMemcpy(
//dst: *mut u8,
//src: *const u8,
//size: usize,
//) -> *mut u8 {
//if size == 0 {
//return dst
//}

//let mut size = size;
//let mut dst8 = dst;
//let mut src8 = src;

//if ((src8 as u32)&0xF) == 0 //Both src and dst are 16byte aligned
//{
//while size > 63 {
//vfpu_asm!(
//lv.q C000, 0(a1);
//lv.q C010, 16(a1);
//lv.q C020, 32(a1);
//lv.q C030, 48(a1);
//sv.q C000, 0(a0);
//sv.q C010, 16(a0);
//sv.q C020, 32(a0);
//sv.q C030, 48(a0);
//: : "{4}"(dst8), "{5}"(src8), "{6}"(size) : "memory" : "volatile"
//);
//dst8 = dst8.add(64);
//src8 = src8.add(64);
//size = size.saturating_sub(64);
//}

//while size > 15 {
//vfpu_asm!(
//lv.q C000, 0(a1);
//sv.q C000, 0(a0);
//: : "{4}"(dst8), "{5}"(src8), "{6}"(size) : "memory" : "volatile"
//)
//dst8 = dst8.add(16);
//src8 = src8.add(16);
//size = size.saturating_sub(16);
//}

//let mut dst32 = dst8 as *mut u32;
//let mut src32 = src8 as *const u32;

//while size > 3 {
//*dst32 = *src32;
//dst32 = dst32.add(1);
//src32 = src32.add(1);
//size = size.saturating_sub(4);
//}

//while size > 0 {
//*dst8 = *src8;
//dst8 = dst8.add(1);
//src8 = src8.add(1);
//size = size.saturating_sub(1);
//}
//dst
//} else {
//panic!("Unaligned vfpu memcpy");
//}
//}