From e0ebc85e0843a3b4bf653a79c8356acffc06a053 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Sun, 29 Dec 2024 09:20:06 +0000 Subject: [PATCH] Format "Load/Store + PtrAdd" more carefully. This commit does two things which might not seem immediately related: 1. It only prints out instructions which aren't tombstones or equivalent. 2. It formats loads/store that inline ptradds as `load %x + y` and `*(%x + y) = z` respectively. In essence, before this commit we printed out all the "not used" `PtrAdd` instructions, which meant that one got things like: ``` ; %2: ... ... ; %3: ptr = ptr_add %2, 4 ; %4: i8 = load %3 mov ... ``` In other words, such `PtrAdd`s didn't generate machine code. That's fine but it turns out that we also printed some non-`PtrAdd`-but-dead instructions too that the x64 backend's DCE had worked out. That's actively misleading: it meant that some optimisations could be misread as not being as effective as expected, because they would appear in `jit-post-opt` even though they didn't generate code. Step #2 above changes the formatting so that the example above is now shown as the following in `jit-asm` output: ``` ; %2: ... ... ; %4: i8 = load %2 + 4 mov ... ``` This means that the x64 backend produces JIT IR that isn't exactly parseable -- at least not right now. It's also different than `jit-post-opt`. This initially felt a bit surprising to me, but then I realised that `jit-asm` really means "backend specific output" and suddenly it made more sense to me. The x64 backend *is* manipulating the trace IR in a way that lets it generate better code and once upon a time I briefly considered giving it a new IR before realising that I could repurpose the trace IR. So if one views the `jit-asm` output as "another IR that happens to mostly look the same as trace IR" it all makes sense -- at least to me! --- ykrt/src/compile/jitc_yk/codegen/x64/mod.rs | 56 +++++++++++++++++---- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs b/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs index 6aab3136d..913d05a4f 100644 --- a/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs +++ b/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs @@ -456,11 +456,11 @@ impl<'a> Assemble<'a> { let mut next = iter.next(); let mut in_header = true; while let Some((iidx, inst)) = next { - self.comment(self.asm.offset(), inst.display(self.m, iidx).to_string()); if self.ra.is_inst_tombstone(iidx) { next = iter.next(); continue; } + self.comment_inst(self.asm.offset(), iidx, inst); self.ra.expire_regs(iidx); match &inst { @@ -531,11 +531,50 @@ impl<'a> Assemble<'a> { Ok(()) } - /// Add a comment to the trace, for use when disassembling its native code. + /// Add a comment to the trace. Note: for instructions, use [Self::comment_inst] which formats + /// things more appropriately for instructions. fn comment(&mut self, off: AssemblyOffset, line: String) { self.comments.get_mut().entry(off.0).or_default().push(line); } + /// Add a comment to the trace for a "JIT IR" instruction. This function will format some + /// instructions differently to the normal trace IR, because this x64 backend has some + /// non-generic optimisations / modifications. + fn comment_inst(&mut self, off: AssemblyOffset, iidx: InstIdx, inst: Inst) { + match inst { + Inst::Load(_) => { + if let Some(painst) = self.ra.ptradd(iidx) { + self.comment( + off, + format!( + "%{iidx}: {} = load {} + {}", + self.m.type_(inst.tyidx(self.m)).display(self.m), + painst.ptr(self.m).display(self.m), + painst.off() + ), + ); + return; + } + } + Inst::Store(sinst) => { + if let Some(painst) = self.ra.ptradd(iidx) { + self.comment( + off, + format!( + "*({} + {}) = {}", + painst.ptr(self.m).display(self.m), + painst.off(), + sinst.val(self.m).display(self.m) + ), + ); + return; + } + } + _ => (), + } + self.comment(off, inst.display(self.m, iidx).to_string()) + } + /// Emit the prologue of the JITted code. /// /// The JITted code is executed inside the same frame as the main interpreter loop. This allows @@ -2819,12 +2858,12 @@ mod tests { ", " ... - ; %2: ptr = ptr_add %0, 64 - ; %3: i64 = load %2 + ; %1: ... + ; %3: i64 = load %0 + 64 mov r.64.x, [rbx+{{_}}] ; %4: ptr = ptr_add %1, 32 lea r.64.y, [r.64.z+0x20] - ; %5: i64 = load %4 + ; %5: i64 = load %1 + 32 mov r.64._, [r.64.z+0x20] ... ", @@ -2848,12 +2887,11 @@ mod tests { ", " ... - ; *%2 = 1i8 + ; *(%0 + 64) = 1i8 mov byte ptr [rbx+{{_}}], 0x01 - ; %4: ptr = ptr_add %1, 32 - ; %5: i64 = load %4 + ; %5: i64 = load %1 + 32 mov r.64.y, [r.64.x+0x20] - ; *%4 = 2i8 + ; *(%1 + 32) = 2i8 mov byte ptr [r.64.x+0x20], 0x02 ... ",