Skip to content

Commit

Permalink
implement b16 loads in lds
Browse files Browse the repository at this point in the history
ds_store_b16

use iter

misc bitwise ops

ds load u16
  • Loading branch information
Qazalin committed Feb 10, 2024
1 parent 331215e commit f1f14e9
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 6 deletions.
4 changes: 1 addition & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,9 @@ pub extern "C" fn hipModuleLaunchKernel(
}

let (kernel, function_name) = utils::read_asm(&lib_bytes);
if DEBUG.load(SeqCst) {
println!(
println!(
"[remu] launching kernel {function_name} with global_size {gx} {gy} {gz} local_size {lx} {ly} {lz} args {:?}", args
);
}

let dispatch_dim = match (gy != 1, gz != 1) {
(true, true) => 3,
Expand Down
20 changes: 17 additions & 3 deletions src/thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::dtype::IEEEClass;
use crate::memory::VecDataStore;
use crate::state::{Register, Value, VecMutation, WaveValue, VGPR};
use crate::todo_instr;
use crate::utils::{as_signed, f16_hi, f16_lo, nth, Colorize, DEBUG};
use crate::utils::{as_signed, f16_hi, f16_lo, nth, Colorize, DEBUG, END_PRG};
use half::f16;
use ndarray::Array;
use num_traits::Float;
Expand Down Expand Up @@ -1295,6 +1295,9 @@ impl<'a> Thread<'a> {
}) as u32
}
283 => s0 & s1,
284 => s0 | s1,
285 => s0 ^ s1,
286 => !(s0 ^ s1),
523 => s0 * s1 + s2, // TODO 24 bit trunc
528 => (s0 >> s1) & ((1 << s2) - 1),
530 => (s0 & s1) | (!s0 & s2),
Expand Down Expand Up @@ -1385,6 +1388,7 @@ impl<'a> Thread<'a> {
self.vec_reg[vdst + i] = self.lds.read(single_addr() + 4 * i);
});
}
60 => self.vec_reg[vdst] = self.lds.read(single_addr()) as u16 as u32,
55 => {
let (addr0, addr1) = double_addr(4);
self.vec_reg[vdst] = self.lds.read(addr0);
Expand All @@ -1402,6 +1406,18 @@ impl<'a> Thread<'a> {
.write(single_addr() + 4 * i, self.vec_reg[data0 + i]);
})
}
31 => {
let addr = single_addr();
if addr + 2 >= self.lds.data.len() {
self.lds.data.resize(self.lds.data.len() + addr + 3, 0);
}
self.lds.data[addr..addr + 2]
.iter_mut()
.enumerate()
.for_each(|(i, x)| {
*x = (self.vec_reg[data0] as u16).to_le_bytes()[i];
});
}
14 => {
let (addr0, addr1) = double_addr(4);
self.lds.write(addr0, self.vec_reg[data0]);
Expand Down Expand Up @@ -3393,5 +3409,3 @@ fn _helper_test_thread() -> Thread<'static> {
thread.exec.default_lane = Some(0);
return thread;
}
#[allow(dead_code)]
const END_PRG: u32 = 0xbfb00000;

0 comments on commit f1f14e9

Please sign in to comment.