diff --git a/src/chunk.zig b/src/chunk.zig index 3c0e06a52..36027612b 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -13,6 +13,7 @@ const llvm = @import("llvm.zig"); const llvm_gen = @import("llvm_gen.zig"); const bc_gen = @import("bc_gen.zig"); const jitgen = @import("jit/gen.zig"); +const X64 = @import("jit/x64.zig"); pub const ChunkId = u32; @@ -129,6 +130,7 @@ pub const Chunk = struct { /// Shared final code buffer. buf: *cy.ByteCodeBuffer, jitBuf: *jitgen.CodeBuffer, + x64Enc: X64.Encoder, nodes: []cy.Node, tokens: []const cy.Token, @@ -216,6 +218,7 @@ pub const Chunk = struct { .curObjectSym = null, .buf = undefined, .jitBuf = undefined, + .x64Enc = undefined, .curNodeId = cy.NullId, .symInitDeps = .{}, .symInitInfos = .{}, diff --git a/src/jit/a64.zig b/src/jit/a64.zig index d023c0a99..8e14cc54b 100644 --- a/src/jit/a64.zig +++ b/src/jit/a64.zig @@ -227,8 +227,8 @@ pub const BrCond = packed struct { return @bitCast(self); } - pub fn init(cond: Cond, imm: u19) BrCond { - return .{ .cond = @intFromEnum(cond), .imm19 = imm }; + pub fn init(cond: Cond, imm: i19) BrCond { + return .{ .cond = @intFromEnum(cond), .imm19 = @bitCast(imm) }; } }; diff --git a/src/jit/a64_assembler.zig b/src/jit/a64_assembler.zig index 8812546c6..8bfa7cf7f 100644 --- a/src/jit/a64_assembler.zig +++ b/src/jit/a64_assembler.zig @@ -3,35 +3,61 @@ const stdx = @import("stdx"); const cy = @import("../cyber.zig"); const t = stdx.testing; const Slot = cy.register.RegisterId; -const sasm = @import("assembler.zig"); +const assm = @import("assembler.zig"); const A64 = @import("a64.zig"); -const VRegister = sasm.VRegister; +const LRegister = assm.LRegister; const Register = A64.Register; const gen = @import("gen.zig"); pub const FpReg: A64.Register = .x1; -pub fn genLoadSlot(c: *cy.Chunk, dst: VRegister, src: Slot) !void { - try c.jitPushU32(A64.LoadStore.ldrImmOff(FpReg, src, fromVReg(dst)).bitCast()); +pub fn genLoadSlot(c: *cy.Chunk, dst: LRegister, src: Slot) !void { + try c.jitPushU32(A64.LoadStore.ldrImmOff(FpReg, src, toReg(dst)).bitCast()); } -pub fn genStoreSlot(c: *cy.Chunk, dst: Slot, src: VRegister) !void { - try c.jitPushU32(A64.LoadStore.strImmOff(FpReg, dst, fromVReg(src)).bitCast()); +pub fn genStoreSlot(c: *cy.Chunk, dst: Slot, src: LRegister) !void { + try c.jitPushU32(A64.LoadStore.strImmOff(FpReg, dst, toReg(src)).bitCast()); } -pub fn genAddImm(c: *cy.Chunk, dst: VRegister, src: VRegister, imm: u64) !void { - try c.jitPushU32(A64.AddSubImm.add(fromVReg(dst), fromVReg(src), @intCast(imm)).bitCast()); +pub fn genAddImm(c: *cy.Chunk, dst: LRegister, src: LRegister, imm: u64) !void { + try c.jitPushU32(A64.AddSubImm.add(toReg(dst), toReg(src), @intCast(imm)).bitCast()); } -pub fn genMovImm(c: *cy.Chunk, dst: VRegister, imm: u64) !void { - try copyImm64(c, fromVReg(dst), imm); +pub fn genMovImm(c: *cy.Chunk, dst: LRegister, imm: u64) !void { + try copyImm64(c, toReg(dst), imm); } -pub fn genMovPcRel(c: *cy.Chunk, dst: VRegister, offset: i32) !void { - try c.jitPushU32(A64.PcRelAddr.adr(fromVReg(dst), @intCast(offset)).bitCast()); +pub fn genPatchableJumpRel(c: *cy.Chunk) !void { + try c.jitPushU32(A64.BrImm.bl(0).bitCast()); } -pub fn patchMovPcRelTo(c: *cy.Chunk, pc: usize, to: usize) !void { +pub fn patchJumpRel(c: *cy.Chunk, pc: usize, to: usize) void { + var inst: *A64.BrImm = @ptrCast(@alignCast(&c.jitBuf.buf.items[pc])); + inst.setOffsetFrom(pc, to); +} + +pub fn genCmp(c: *cy.Chunk, left: LRegister, right: LRegister) !void { + try c.jitPushU32(A64.AddSubShifted.cmp(toReg(left), toReg(right)).bitCast()); +} + +pub fn genJumpCond(c: *cy.Chunk, cond: assm.LCond, offset: i32) !void { + try c.jitPushU32(A64.BrCond.init(toCond(cond), offset).bitCast()); +} + +pub fn patchJumpCond(c: *cy.Chunk, pc: usize, to: usize) void { + const inst = c.jitGetA64Inst(pc, A64.BrCond); + inst.imm19 = @intCast((to - pc) >> 2); +} + +pub fn genMovPcRel(c: *cy.Chunk, dst: LRegister, to: usize) !void { + try c.jitPushU32(A64.PcRelAddr.adrFrom(toReg(dst), c.jitGetPos(), to).bitCast()); +} + +pub fn genPatchableMovPcRel(c: *cy.Chunk, dst: LRegister) !void { + try c.jitPushU32(A64.PcRelAddr.adr(toReg(dst), 0).bitCast()); +} + +pub fn patchMovPcRelTo(c: *cy.Chunk, pc: usize, to: usize) void { const adr = c.jitGetA64Inst(pc, A64.PcRelAddr); adr.setOffsetFrom(pc, to); } @@ -49,6 +75,23 @@ pub fn genMainReturn(c: *cy.Chunk) !void { try c.jitPushU32(A64.Br.ret().bitCast()); } +pub fn genCallFunc(c: *cy.Chunk, ret: Slot, func: *cy.Func) !void { + // Skip ret info. + // Skip bc pc slot. + try genStoreSlot(c, ret + 3, .fp); + + // Advance fp. + try genAddImm(c, .fp, .fp, 8 * ret); + + // Push empty branch. + const jumpPc = c.jitGetPos(); + try c.jitBuf.relocs.append(c.alloc, .{ .type = .jumpToFunc, .data = .{ .jumpToFunc = .{ + .func = func, + .pc = @intCast(jumpPc), + }}}); + try assm.genPatchableJumpRel(c); +} + pub fn genCallFuncPtr(c: *cy.Chunk, ptr: *const anyopaque) !void { // No reloc needed, copy address to x30 (since it's already spilled) and invoke with blr. try copyImm64(c, .x30, @intFromPtr(ptr)); @@ -70,13 +113,20 @@ pub fn genBreakpoint(c: *cy.Chunk) !void { try c.jitPushU32(A64.Exception.brk(0xf000).bitCast()); } -fn fromVReg(arg: VRegister) Register { - return switch (arg) { +fn toCond(cond: LRegister) A64.Cond { + return switch (cond) { + .ge => .ge, + else => unreachable, + }; +} + +fn toReg(reg: LRegister) Register { + return switch (reg) { .arg0 => .x2, .arg1 => .x3, .arg2 => .x4, .arg3 => .x5, - .fp => .x1, + .fp => FpReg, .temp => .x8, }; } diff --git a/src/jit/assembler.zig b/src/jit/assembler.zig index c30adc8ea..5df30ab22 100644 --- a/src/jit/assembler.zig +++ b/src/jit/assembler.zig @@ -3,11 +3,13 @@ const cy = @import("../cyber.zig"); const Slot = cy.register.RegisterId; const a64 = @import("a64_assembler.zig"); +const x64 = @import("x64_assembler.zig"); /// Provides a common interface for assembling machine code related to stencils. /// Most machine code is still being generated from stencils. -pub const VRegister = enum { +/// Logical register. +pub const LRegister = enum { fp, arg0, arg1, @@ -16,51 +18,111 @@ pub const VRegister = enum { temp, }; -pub fn genLoadSlot(c: *cy.Chunk, dst: VRegister, src: Slot) !void { +pub const LCond = enum(u8) { + ge, + _, +}; + +pub fn genLoadSlot(c: *cy.Chunk, dst: LRegister, src: Slot) !void { switch (builtin.cpu.arch) { .aarch64 => try a64.genLoadSlot(c, dst, src), + .x86_64 => try x64.genLoadSlot(c, dst, src), else => return error.Unsupported, } } -pub fn genStoreSlot(c: *cy.Chunk, dst: Slot, src: VRegister) !void { +pub fn genStoreSlot(c: *cy.Chunk, dst: Slot, src: LRegister) !void { switch (builtin.cpu.arch) { .aarch64 => try a64.genStoreSlot(c, dst, src), + .x86_64 => try x64.genStoreSlot(c, dst, src), else => return error.Unsupported, } } -pub fn genAddImm(c: *cy.Chunk, dst: VRegister, src: VRegister, imm: u64) !void { +pub fn genAddImm(c: *cy.Chunk, dst: LRegister, src: LRegister, imm: u64) !void { switch (builtin.cpu.arch) { .aarch64 => try a64.genAddImm(c, dst, src, imm), + .x86_64 => try x64.genAddImm(c, dst, src, imm), else => return error.Unsupported, } } -pub fn genMovImm(c: *cy.Chunk, dst: VRegister, imm: u64) !void { +pub fn genMovImm(c: *cy.Chunk, dst: LRegister, imm: u64) !void { switch (builtin.cpu.arch) { .aarch64 => try a64.genMovImm(c, dst, imm), + .x86_64 => try x64.genMovImm(c, dst, imm), + else => return error.Unsupported, + } +} + +pub fn genJumpCond(c: *cy.Chunk, cond: LCond, offset: i32) !void { + switch (builtin.cpu.arch) { + .aarch64 => try a64.genJumpCond(c, cond, offset), + .x86_64 => try x64.genJumpCond(c, cond, offset), + else => return error.Unsupported, + } +} + +pub fn patchJumpCond(c: *cy.Chunk, pc: usize, to: usize) void { + switch (builtin.cpu.arch) { + .aarch64 => a64.patchJumpCond(c, pc, to), + .x86_64 => x64.patchJumpCond(c, pc, to), + else => unreachable, + } +} + +pub fn genPatchableJumpRel(c: *cy.Chunk) !void { + switch (builtin.cpu.arch) { + .aarch64 => try a64.genPatchableJumpRel(c), + .x86_64 => try x64.genPatchableJumpRel(c), + else => return error.Unsupported, + } +} + +pub fn patchJumpRel(c: *cy.Chunk, pc: usize, to: usize) void { + switch (builtin.cpu.arch) { + .aarch64 => a64.patchJumpRel(c, pc, to), + .x86_64 => x64.patchJumpRel(c, pc, to), + else => unreachable, + } +} + +pub fn genCmp(c: *cy.Chunk, left: LRegister, right: LRegister) !void { + switch (builtin.cpu.arch) { + .aarch64 => try a64.genCmp(c, left, right), + .x86_64 => try x64.genCmp(c, left, right), else => return error.Unsupported, } } -pub fn genMovPcRel(c: *cy.Chunk, dst: VRegister, offset: i32) !void { +pub fn genMovPcRel(c: *cy.Chunk, dst: LRegister, to: usize) !void { switch (builtin.cpu.arch) { - .aarch64 => try a64.genMovPcRel(c, dst, offset), + .aarch64 => try a64.genMovPcRel(c, dst, to), + .x86_64 => try x64.genMovPcRel(c, dst, to), else => return error.Unsupported, } } -pub fn patchMovPcRelTo(c: *cy.Chunk, pc: usize, to: usize) !void { +pub fn genPatchableMovPcRel(c: *cy.Chunk, dst: LRegister) !void { switch (builtin.cpu.arch) { - .aarch64 => try a64.patchMovPcRelTo(c, pc, to), + .aarch64 => try a64.genPatchableMovPcRel(c, dst), + .x86_64 => try x64.genPatchableMovPcRel(c, dst), else => return error.Unsupported, } } +pub fn patchMovPcRelTo(c: *cy.Chunk, pc: usize, to: usize) void { + switch (builtin.cpu.arch) { + .aarch64 => a64.patchMovPcRelTo(c, pc, to), + .x86_64 => x64.patchMovPcRelTo(c, pc, to), + else => unreachable, + } +} + pub fn genStoreSlotImm(c: *cy.Chunk, dst: Slot, imm: u64) !void { switch (builtin.cpu.arch) { .aarch64 => try a64.genStoreSlotImm(c, dst, imm), + .x86_64 => try x64.genStoreSlotImm(c, dst, imm), else => return error.Unsupported, } } @@ -72,6 +134,15 @@ pub fn genStoreSlotValue(c: *cy.Chunk, dst: Slot, val: cy.Value) !void { pub fn genBreakpoint(c: *cy.Chunk) !void { switch (builtin.cpu.arch) { .aarch64 => try a64.genBreakpoint(c), + .x86_64 => try x64.genBreakpoint(c), + else => return error.Unsupported, + } +} + +pub fn genCallFunc(c: *cy.Chunk, ret: Slot, func: *cy.Func) !void { + switch (builtin.cpu.arch) { + .aarch64 => try a64.genCallFunc(c, ret, func), + .x86_64 => try x64.genCallFunc(c, ret, func), else => return error.Unsupported, } } @@ -79,6 +150,7 @@ pub fn genBreakpoint(c: *cy.Chunk) !void { pub fn genCallFuncPtr(c: *cy.Chunk, ptr: *const anyopaque) !void { switch (builtin.cpu.arch) { .aarch64 => try a64.genCallFuncPtr(c, ptr), + .x86_64 => try x64.genCallFuncPtr(c, ptr), else => return error.Unsupported, } } @@ -86,6 +158,7 @@ pub fn genCallFuncPtr(c: *cy.Chunk, ptr: *const anyopaque) !void { pub fn genFuncReturn(c: *cy.Chunk) !void { switch (builtin.cpu.arch) { .aarch64 => try a64.genFuncReturn(c), + .x86_64 => try x64.genFuncReturn(c), else => return error.Unsupported, } } @@ -93,6 +166,7 @@ pub fn genFuncReturn(c: *cy.Chunk) !void { pub fn genMainReturn(c: *cy.Chunk) !void { switch (builtin.cpu.arch) { .aarch64 => try a64.genMainReturn(c), + .x86_64 => try x64.genMainReturn(c), else => return error.Unsupported, } } \ No newline at end of file diff --git a/src/jit/gen.zig b/src/jit/gen.zig index b60c9c2d2..e752dae61 100644 --- a/src/jit/gen.zig +++ b/src/jit/gen.zig @@ -2,29 +2,45 @@ const builtin = @import("builtin"); const std = @import("std"); const cy = @import("../cyber.zig"); const bcgen = @import("../bc_gen.zig"); -const stencils = @import("stencils.zig"); +const stencils = switch (builtin.cpu.arch) { + .aarch64 => @import("a64_stencils.zig"), + .x86_64 => @import("x64_stencils.zig"), + else => void, +}; const log = cy.log.scoped(.jit_gen); const bt = cy.types.BuiltinTypes; const v = cy.fmt.v; const rt = cy.rt; const ir = cy.ir; -const sasm = @import("assembler.zig"); +const assm = @import("assembler.zig"); const a64 = @import("a64_assembler.zig"); const A64 = @import("a64.zig"); +const x64 = @import("x64_assembler.zig"); +const X64 = @import("x64.zig"); const GenValue = bcgen.GenValue; const RegisterCstr = cy.register.RegisterCstr; const RegisterId = cy.register.RegisterId; const genValue = bcgen.genValue; -/// Nop insts with an immediate id are generated before each IR node visit. -/// The id can then be set to `GenBreakpointAtMarker` so the next run +const CallHoleLen = switch (builtin.cpu.arch) { + .aarch64 => 4, + .x86_64 => 5, + else => 0, +}; + +/// When verbose=true, a debug dump call is generated for each IR expression. +/// The chunk and irIdx dumped can then be set to `GenBreakpointAtIr` so the next run /// can print the relevant source location and generate a breakpoint. -const GenNopDebugMarkers = cy.Trace and true; -const GenBreakpointAtMarker: ?u64 = null;// 9; -var GenNextMarkerId: u64 = 1; +// const GenBreakpointAtIr: ?ChunkIr = .{ .chunkId = 0, .irIdx = 633 }; +const GenBreakpointAtIr: ?ChunkIr = null; var DumpCodeFrom: ?usize = null; +const ChunkIr = struct { + chunkId: cy.ChunkId, + irIdx: u32, +}; + pub const RelocType = enum { jumpToFunc, }; @@ -68,7 +84,9 @@ pub const CodeBuffer = struct { }; pub const ChunkExt = struct { - pub const jitEnsureCap = ensureCap; + pub fn jitEnsureUnusedCap(c: *cy.Chunk, size: usize) !usize { + return ensureUnusedCap(&c.jitBuf.buf, c.alloc, size); + } pub const jitCopyAdvance = copyAdvance; pub const jitPush = push; pub const jitPushU32 = pushU32; @@ -91,15 +109,15 @@ pub fn getPos(c: *cy.Chunk) usize { return c.jitBuf.buf.items.len; } -pub fn ensureCap(c: *cy.Chunk, size: usize) !usize { - if (c.jitBuf.buf.items.len + size > c.jitBuf.buf.capacity) { - var inc = c.jitBuf.buf.capacity / 2; - if (inc == 0) { +pub fn ensureUnusedCap(buf: *std.ArrayListAlignedUnmanaged(u8, std.mem.page_size), alloc: std.mem.Allocator, size: usize) !usize { + if (buf.items.len + size > buf.capacity) { + var inc = buf.capacity / 2; + if (inc <= std.mem.page_size) { inc = std.mem.page_size; } - try c.jitBuf.buf.ensureTotalCapacityPrecise(c.alloc, c.jitBuf.buf.capacity + inc); + try buf.ensureTotalCapacityPrecise(alloc, buf.capacity + inc); } - return c.jitBuf.buf.items.len; + return buf.items.len; } pub fn copyAdvance(c: *cy.Chunk, dst: usize, src: []const u8) void { @@ -109,28 +127,28 @@ pub fn copyAdvance(c: *cy.Chunk, dst: usize, src: []const u8) void { } pub fn pushU32(c: *cy.Chunk, code: u32) !void { - const start = try ensureCap(c, @sizeOf(u32)); + const start = try ensureUnusedCap(c, @sizeOf(u32)); c.jitBuf.buf.items.len += @sizeOf(u32); const dst: []u8 = @ptrCast(c.jitBuf.buf.items.ptr[start..start+@sizeOf(u32)]); @memcpy(dst, std.mem.asBytes(&code)); } pub fn pushU64(c: *cy.Chunk, code: u64) !void { - const start = try ensureCap(c, @sizeOf(u64)); + const start = try c.jitEnsureUnusedCap(@sizeOf(u64)); c.jitBuf.buf.items.len += @sizeOf(u64); const dst: []u8 = @ptrCast(c.jitBuf.buf.items.ptr[start..start+@sizeOf(u64)]); @memcpy(dst, std.mem.asBytes(&code)); } pub fn push(c: *cy.Chunk, code: []const u8) !void { - const start = try ensureCap(c, code.len); + const start = try c.jitEnsureUnusedCap(code.len); c.jitBuf.buf.items.len += code.len; const dst: []u8 = @ptrCast(c.jitBuf.buf.items.ptr[start..start+code.len]); @memcpy(dst, code); } pub fn pushStencil(c: *cy.Chunk, code: []const u8) !usize { - const start = try ensureCap(c, code.len); + const start = try c.jitEnsureUnusedCap(code.len); c.jitBuf.buf.items.len += code.len; const dst: []u8 = @ptrCast(c.jitBuf.buf.items.ptr[start..start+code.len]); @memcpy(dst, code); @@ -141,20 +159,20 @@ fn genStmt(c: *cy.Chunk, idx: u32) anyerror!void { const code = c.irGetStmtCode(idx); const nodeId = c.irGetNode(idx); c.curNodeId = nodeId; + + var dumpEndPc: usize = undefined; if (cy.Trace) { const contextStr = try c.encoder.formatNode(nodeId, &cy.tempBuf); log.tracev("----{s}: {{{s}}}", .{@tagName(code), contextStr}); - if (GenNopDebugMarkers) { - // try A64.copyImm64(c, .xzr, GenNextMarkerId); - if (GenBreakpointAtMarker) |id| { - if (id == GenNextMarkerId) { - DumpCodeFrom = c.jitGetPos(); - try cy.debug.printTraceAtNode(c, nodeId); - try A64.breakpoint(c); - } + if (cy.verbose) { + dumpEndPc = try genCallDumpJitSection(c, idx, true); + } + + if (GenBreakpointAtIr) |chunkIr| { + if (c.id == chunkIr.chunkId and idx == chunkIr.irIdx) { + try assm.genBreakpoint(c); } - GenNextMarkerId += 1; } } switch (code) { @@ -202,6 +220,12 @@ fn genStmt(c: *cy.Chunk, idx: u32) anyerror!void { try bcgen.checkStack(c, nodeId); } log.tracev("----{s}: end", .{@tagName(code)}); + + if (cy.Trace) { + if (cy.verbose) { + assm.patchMovPcRelTo(c, dumpEndPc, c.jitGetPos()); + } + } } fn exprStmt(c: *cy.Chunk, idx: usize, nodeId: cy.NodeId) !void { @@ -350,12 +374,12 @@ fn genBinOp(c: *cy.Chunk, idx: usize, cstr: RegisterCstr, opts: BinOpOptions, no // try pushInlineBinExpr(c, getIntOpCode(data.op), leftv.local, rightv.local, inst.dst, nodeId); if (cstr.type == .simple and cstr.data.simple.jitPreferCondFlag) { // Load operands. - try sasm.genLoadSlot(c, .arg0, leftv.local); - try sasm.genLoadSlot(c, .arg1, rightv.local); + try assm.genLoadSlot(c, .arg0, leftv.local); + try assm.genLoadSlot(c, .arg1, rightv.local); try c.jitPush(&stencils.intPair); // Compare. - try c.jitPushU32(A64.AddSubShifted.cmp(.x2, .x3).bitCast()); + try assm.genCmp(c, .arg0, .arg1); optCondFlag = .lt; } else { return error.TODO; @@ -372,8 +396,8 @@ fn genBinOp(c: *cy.Chunk, idx: usize, cstr: RegisterCstr, opts: BinOpOptions, no // try pushInlineBinExpr(c, getFloatOpCode(data.op), leftv.local, rightv.local, inst.dst, nodeId); // Load operands. - try sasm.genLoadSlot(c, .arg0, leftv.local); - try sasm.genLoadSlot(c, .arg1, rightv.local); + try assm.genLoadSlot(c, .arg0, leftv.local); + try assm.genLoadSlot(c, .arg1, rightv.local); if (data.op == .minus) { try c.jitPush(&stencils.subFloat); @@ -388,13 +412,13 @@ fn genBinOp(c: *cy.Chunk, idx: usize, cstr: RegisterCstr, opts: BinOpOptions, no } // Save result. - try sasm.genStoreSlot(c, inst.dst, .arg0); + try assm.genStoreSlot(c, inst.dst, .arg0); } else if (data.leftT == bt.Integer) { // try pushInlineBinExpr(c, getIntOpCode(data.op), leftv.local, rightv.local, inst.dst, nodeId); // Load operands. - try sasm.genLoadSlot(c, .arg0, leftv.local); - try sasm.genLoadSlot(c, .arg1, rightv.local); + try assm.genLoadSlot(c, .arg0, leftv.local); + try assm.genLoadSlot(c, .arg1, rightv.local); if (data.op == .minus) { try c.jitPush(&stencils.subInt); @@ -409,7 +433,7 @@ fn genBinOp(c: *cy.Chunk, idx: usize, cstr: RegisterCstr, opts: BinOpOptions, no } // Save result. - try sasm.genStoreSlot(c, inst.dst, .arg0); + try assm.genStoreSlot(c, inst.dst, .arg0); } else return error.Unexpected; }, // .equal_equal => { @@ -462,7 +486,7 @@ fn ifStmt(c: *cy.Chunk, idx: usize, nodeId: cy.NodeId) !void { prevCaseMissJump = c.jitGetPos(); prevCaseMissJumpFromCondFlag = true; if (condv.data.jitCondFlag.type == .lt) { - try c.jitPushU32(A64.BrCond.init(.ge, 0).bitCast()); + try assm.genJumpCond(c, .ge, 0); } else { return error.TODO; } @@ -526,8 +550,7 @@ fn ifStmt(c: *cy.Chunk, idx: usize, nodeId: cy.NodeId) !void { // c.patchJumpNotCondToCurPc(prevCaseMissJump); if (prevCaseMissJumpFromCondFlag) { - const inst = c.jitGetA64Inst(prevCaseMissJump, A64.BrCond); - inst.imm19 = @intCast((c.jitBuf.buf.items.len - prevCaseMissJump) >> 2); + assm.patchJumpCond(c, prevCaseMissJump, c.jitGetPos()); } else { return error.TODO; } @@ -543,7 +566,7 @@ fn genFloat(c: *cy.Chunk, idx: usize, cstr: RegisterCstr, nodeId: cy.NodeId) !Ge } const val = cy.Value.initF64(data.val); - try sasm.genStoreSlotValue(c, inst.dst, val); + try assm.genStoreSlotValue(c, inst.dst, val); const value = genValue(c, inst.dst, false); return finishInst(c, value, inst.finalDst); @@ -559,7 +582,7 @@ fn genInt(c: *cy.Chunk, idx: usize, cstr: RegisterCstr, nodeId: cy.NodeId) !GenV } const val = cy.Value.initInt(@intCast(data.val)); - try sasm.genStoreSlotValue(c, inst.dst, val); + try assm.genStoreSlotValue(c, inst.dst, val); const value = genValue(c, inst.dst, false); return finishInst(c, value, inst.finalDst); @@ -598,8 +621,8 @@ fn genToDst(c: *cy.Chunk, val: GenValue, dst: RegisterCstr, desc: cy.bytecode.In return error.TODO; } else { // try c.buf.pushOp2Ext(.copy, val.local, local.reg, desc); - try sasm.genLoadSlot(c, .temp, val.local); - try sasm.genStoreSlot(c, local.reg, .temp); + try assm.genLoadSlot(c, .temp, val.local); + try assm.genStoreSlot(c, local.reg, .temp); } // Parent only cares about the retained property. return GenValue.initRetained(val.retained); @@ -659,31 +682,18 @@ fn genCallFuncSym(c: *cy.Chunk, idx: usize, cstr: RegisterCstr, nodeId: cy.NodeI if (data.func.type == .hostFunc) { // Populate callHost stencil args. - try sasm.genAddImm(c, .arg0, .fp, 8 * (inst.ret + cy.vm.CallArgStart)); - try sasm.genMovImm(c, .arg1, data.numArgs); + try assm.genAddImm(c, .arg0, .fp, 8 * (inst.ret + cy.vm.CallArgStart)); + try assm.genMovImm(c, .arg1, data.numArgs); try c.jitPush(stencils.callHost[0..stencils.callHost_hostFunc]); - try sasm.genCallFuncPtr(c, data.func.data.hostFunc.ptr); - try c.jitPush(stencils.callHost[stencils.callHost_hostFunc+4..]); + try assm.genCallFuncPtr(c, data.func.data.hostFunc.ptr); + try c.jitPush(stencils.callHost[stencils.callHost_hostFunc+CallHoleLen..]); // Copy result to ret. // TODO: Copy directly to final dst. - try sasm.genStoreSlot(c, inst.ret, .arg2); + try assm.genStoreSlot(c, inst.ret, .arg2); } else if (data.func.type == .userFunc) { - // Skip ret info. - // Skip bc pc slot. - try sasm.genStoreSlot(c, inst.ret + 3, .fp); - - // Advance fp. - try sasm.genAddImm(c, .fp, .fp, 8 * inst.ret); - - // Push empty branch. - const jumpPc = c.jitGetPos(); - try c.jitBuf.relocs.append(c.alloc, .{ .type = .jumpToFunc, .data = .{ .jumpToFunc = .{ - .func = data.func, - .pc = @intCast(jumpPc), - }}}); - try c.jitPushU32(A64.BrImm.bl(0).bitCast()); + try assm.genCallFunc(c, inst.ret, data.func); } else return error.TODO; const argvs = bcgen.popValues(c, data.numArgs); @@ -707,14 +717,43 @@ fn genAndPushExpr(c: *cy.Chunk, idx: usize, cstr: RegisterCstr) !void { try c.genValueStack.append(c.alloc, val); } -fn zDumpJitSection(vm: *cy.VM, fp: [*]const cy.Value, chunkId: u64, irIdx: u64, startPc: [*]const u8, endPc: [*]const u8) void { +fn zDumpJitStmtSection(vm: *cy.VM, fp: [*]const cy.Value, chunkId: u64, irIdx: u64, startPc: [*]const u8, endPc: [*]const u8) void { + const c = vm.compiler.chunks.items[@intCast(chunkId)]; + const code = c.irGetStmtCode(@intCast(irIdx)); + const nodeId = c.irGetNode(@intCast(irIdx)); + + const mc = startPc[0..@intFromPtr(endPc)-@intFromPtr(startPc)]; + const contextStr = c.encoder.formatNode(nodeId, &cy.tempBuf) catch cy.fatal(); + log.tracev("{s} {{{s}}} {*} {} ({}:{})", .{@tagName(code), contextStr, fp, std.fmt.fmtSliceHexLower(mc), chunkId, irIdx}); +} + +fn zDumpJitExprSection(vm: *cy.VM, fp: [*]const cy.Value, chunkId: u64, irIdx: u64, startPc: [*]const u8, endPc: [*]const u8) void { const c = vm.compiler.chunks.items[@intCast(chunkId)]; const code = c.irGetExprCode(@intCast(irIdx)); const nodeId = c.irGetNode(@intCast(irIdx)); const mc = startPc[0..@intFromPtr(endPc)-@intFromPtr(startPc)]; const contextStr = c.encoder.formatNode(nodeId, &cy.tempBuf) catch cy.fatal(); - log.tracev("{s} {{{s}}} {*} {}", .{@tagName(code), contextStr, fp, std.fmt.fmtSliceHexLower(mc)}); + log.tracev("{s} {{{s}}} {*} {} ({}:{})", .{@tagName(code), contextStr, fp, std.fmt.fmtSliceHexLower(mc), chunkId, irIdx}); +} + +fn genCallDumpJitSection(c: *cy.Chunk, idx: usize, isStmt: bool) !usize { + try assm.genMovImm(c, .arg0, c.id); + try assm.genMovImm(c, .arg1, idx); + const dumpStartPc = c.jitGetPos(); + try assm.genPatchableMovPcRel(c, .arg2); + + const dumpEndPc = c.jitGetPos(); + try assm.genPatchableMovPcRel(c, .arg3); + try c.jitPush(stencils.dumpJitSection[0..stencils.dumpJitSection_zDumpJitSection]); + if (isStmt) { + try assm.genCallFuncPtr(c, &zDumpJitStmtSection); + } else { + try assm.genCallFuncPtr(c, &zDumpJitExprSection); + } + try c.jitPush(stencils.dumpJitSection[stencils.dumpJitSection_zDumpJitSection+CallHoleLen..]); + assm.patchMovPcRelTo(c, dumpStartPc, c.jitGetPos()); + return dumpEndPc; } fn genExpr(c: *cy.Chunk, idx: usize, cstr: RegisterCstr) anyerror!GenValue { @@ -726,29 +765,14 @@ fn genExpr(c: *cy.Chunk, idx: usize, cstr: RegisterCstr) anyerror!GenValue { const contextStr = try c.encoder.formatNode(nodeId, &cy.tempBuf); log.tracev("{s}: {{{s}}} {s}", .{@tagName(code), contextStr, @tagName(cstr.type)}); - if (GenNopDebugMarkers) { - // try A64.copyImm64(c, .xzr, GenNextMarkerId); - if (GenBreakpointAtMarker) |id| { - if (id == GenNextMarkerId) { - DumpCodeFrom = c.jitGetPos(); - try cy.debug.printTraceAtNode(c, nodeId); - try A64.breakpoint(c); - } - } - GenNextMarkerId += 1; + if (cy.verbose) { + dumpEndPc = try genCallDumpJitSection(c, idx, false); } - if (cy.verbose) { - try sasm.genMovImm(c, .arg0, c.id); - try sasm.genMovImm(c, .arg1, idx); - const dumpStartPc = c.jitGetPos(); - try sasm.genMovPcRel(c, .arg2, 0); - dumpEndPc = c.jitGetPos(); - try sasm.genMovPcRel(c, .arg3, 0); - try c.jitPush(stencils.dumpJitSection[0..stencils.dumpJitSection_zDumpJitSection]); - try sasm.genCallFuncPtr(c, &zDumpJitSection); - try c.jitPush(stencils.dumpJitSection[stencils.dumpJitSection_zDumpJitSection+4..]); - try sasm.patchMovPcRelTo(c, dumpStartPc, c.jitGetPos()); + if (GenBreakpointAtIr) |chunkIr| { + if (c.id == chunkIr.chunkId and idx == chunkIr.irIdx) { + try assm.genBreakpoint(c); + } } } const res = try switch (code) { @@ -796,8 +820,7 @@ fn genExpr(c: *cy.Chunk, idx: usize, cstr: RegisterCstr) anyerror!GenValue { if (cy.Trace) { if (cy.verbose) { - const adr = c.jitGetA64Inst(dumpEndPc, A64.PcRelAddr); - adr.setOffsetFrom(dumpEndPc, c.jitGetPos()); + assm.patchMovPcRelTo(c, dumpEndPc, c.jitGetPos()); } } @@ -826,6 +849,10 @@ fn mainBlock(c: *cy.Chunk, idx: usize, nodeId: cy.NodeId) !void { // Spill return addr to slot 0. if (builtin.cpu.arch == .aarch64) { try c.jitPushU32(A64.LoadStore.strImmOff(a64.FpReg, 0, .x30).bitCast()); + } else if (builtin.cpu.arch == .x86_64) { + // try c.x64Enc.int3(); + try c.x64Enc.movMem(.rax, x64.MemSibBase(x64.BaseReg(.rsp), 0)); + try c.x64Enc.movToMem(x64.MemSibBase(x64.BaseReg(x64.FpReg), 0), .rax); } var child = data.bodyHead; @@ -853,9 +880,9 @@ fn mainBlock(c: *cy.Chunk, idx: usize, nodeId: cy.NodeId) !void { fn mainEnd(c: *cy.Chunk, optReg: ?u8) !void { const retSlot = optReg orelse cy.NullU8; - try sasm.genMovImm(c, .arg0, retSlot); + try assm.genMovImm(c, .arg0, retSlot); try c.jitPush(&stencils.end); - try sasm.genMainReturn(c); + try assm.genMainReturn(c); } fn genStringTemplate(c: *cy.Chunk, idx: usize, cstr: RegisterCstr, nodeId: cy.NodeId) !GenValue { @@ -876,7 +903,12 @@ fn genStringTemplate(c: *cy.Chunk, idx: usize, cstr: RegisterCstr, nodeId: cy.No // Inline const strings. const skipPc = c.jitGetPos(); - try c.jitPushU32(@bitCast(A64.BrImm.b(0))); + try assm.genPatchableJumpRel(c); + + // Forward align. + const advanceLen = std.mem.alignForward(usize, c.jitGetPos(), 8) - c.jitGetPos(); + _ = try c.jitEnsureUnusedCap(advanceLen); + c.jitBuf.buf.items.len += advanceLen; const strsPc = c.jitGetPos(); for (strs) |str| { @@ -886,27 +918,26 @@ fn genStringTemplate(c: *cy.Chunk, idx: usize, cstr: RegisterCstr, nodeId: cy.No try c.jitPushU64(@bitCast(constStr)); } - const skipi = c.jitGetA64Inst(skipPc, A64.BrImm); - skipi.setOffsetFrom(skipPc, c.jitGetPos()); + assm.patchJumpRel(c, skipPc, c.jitGetPos()); // try c.pushOptionalDebugSym(nodeId); // try c.buf.pushOp3(.stringTemplate, argStart, data.numExprs, inst.dst); // Load strs. - try c.jitPushU32(A64.PcRelAddr.adrFrom(.x2, c.jitGetPos(), strsPc).bitCast()); + try assm.genMovPcRel(c, .arg0, strsPc); // Load exprs. - try sasm.genAddImm(c, .arg1, .fp, 8 * argStart); + try assm.genAddImm(c, .arg1, .fp, 8 * argStart); // Load expr count. - try sasm.genMovImm(c, .arg2, data.numExprs); + try assm.genMovImm(c, .arg2, data.numExprs); try c.jitPush(stencils.stringTemplate[0..stencils.stringTemplate_zAllocStringTemplate2]); - try sasm.genCallFuncPtr(c, &cy.vm.zAllocStringTemplate2); - try c.jitPush(stencils.stringTemplate[stencils.stringTemplate_zAllocStringTemplate2+4..]); + try assm.genCallFuncPtr(c, &cy.vm.zAllocStringTemplate2); + try c.jitPush(stencils.stringTemplate[stencils.stringTemplate_zAllocStringTemplate2+CallHoleLen..]); // Save result. - try sasm.genStoreSlot(c, inst.dst, .arg0); + try assm.genStoreSlot(c, inst.dst, .arg0); const argvs = bcgen.popValues(c, data.numExprs); try bcgen.checkArgs(argStart, argvs); @@ -932,8 +963,8 @@ fn pushReleaseVals(c: *cy.Chunk, vals: []const GenValue, debugNodeId: cy.NodeId) } else if (vals.len == 1) { // try pushRelease(self, vals[0].local, debugNodeId); try c.jitPush(stencils.release[0..stencils.release_zFreeObject]); - try sasm.genCallFuncPtr(c, &cy.vm.zFreeObject); - try c.jitPush(stencils.release[stencils.release_zFreeObject+4..]); + try assm.genCallFuncPtr(c, &cy.vm.zFreeObject); + try c.jitPush(stencils.release[stencils.release_zFreeObject+CallHoleLen..]); } } @@ -978,6 +1009,9 @@ fn funcDecl(c: *cy.Chunk, idx: usize, nodeId: cy.NodeId) !void { // Ideally, the return addr shouldn't be spilled until the first function call. if (builtin.cpu.arch == .aarch64) { try c.jitPushU32(A64.LoadStore.strImmOff(a64.FpReg, 2, .x30).bitCast()); + } else if (builtin.cpu.arch == .x86_64) { + // Save rax to ret addr slot. + try assm.genStoreSlot(c, 2, .temp); } try c.compiler.genSymMap.putNoClobber(c.alloc, func, .{ .funcSym = .{ .id = 0, .pc = @intCast(funcPc) }}); @@ -1006,6 +1040,10 @@ fn funcDecl(c: *cy.Chunk, idx: usize, nodeId: cy.NodeId) !void { } pub fn genChunk(c: *cy.Chunk) !void { + if (builtin.cpu.arch == .x86_64) { + c.x64Enc = X64.Encoder{ .buf = &c.jitBuf.buf, .alloc = c.alloc }; + } + genChunkInner(c) catch |err| { if (err != error.CompileError) { // Wrap all other errors as a CompileError. @@ -1073,8 +1111,8 @@ fn genLocalReg(c: *cy.Chunk, reg: RegisterId, cstr: RegisterCstr, nodeId: cy.Nod return error.TODO; } else { // try c.buf.pushOp2Ext(.copy, reg, inst.dst, c.desc(nodeId)); - try sasm.genLoadSlot(c, .temp, reg); - try sasm.genStoreSlot(c, inst.dst, .temp); + try assm.genLoadSlot(c, .temp, reg); + try assm.genStoreSlot(c, inst.dst, .temp); } } } else { @@ -1131,6 +1169,6 @@ fn retExprStmt(c: *cy.Chunk, idx: usize, nodeId: cy.NodeId) !void { // try c.buf.pushOp1(.end, @intCast(childv.local)); return error.TODO; } else { - try sasm.genFuncReturn(c); + try assm.genFuncReturn(c); } } \ No newline at end of file diff --git a/src/jit/x64.zig b/src/jit/x64.zig new file mode 100644 index 000000000..38cbea0ad --- /dev/null +++ b/src/jit/x64.zig @@ -0,0 +1,712 @@ +const std = @import("std"); +const stdx = @import("stdx"); +const gen = @import("gen.zig"); +const t = stdx.testing; + +/// Based on: https://github.com/kubkon/zig-dis-x86_64 + +pub const jg: u8 = 0x8f; +pub const jge: u8 = 0x8d; +pub const jl: u8 = 0x8c; +pub const jle: u8 = 0x8e; + +pub const Register = enum(u8) { + // 64-bit general-purpose registers. + rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, + r8, r9, r10, r11, r12, r13, r14, r15, + + pub fn enc(self: Register) u4 { + return switch (@intFromEnum(self)) { + @intFromEnum(Register.rax)...@intFromEnum(Register.r15) => { + const base = @intFromEnum(Register.rax); + return @truncate(@intFromEnum(self) - base); + }, + else => unreachable, + }; + } + + fn lowEnc(self: Register) u3 { + return @truncate(self.enc()); + } + + pub fn bitSize(self: Register) u8 { + return switch (@intFromEnum(self)) { + @intFromEnum(Register.rax)...@intFromEnum(Register.r15) => 64, + else => unreachable, + }; + } + + fn isExtended(self: Register) bool { + return switch (@intFromEnum(self)) { + @intFromEnum(Register.r8)...@intFromEnum(Register.r15) => true, + else => false, + }; + } + + // pub fn encBitSize(self: Register) u1 { + // return switch (self.bitSize()) { + // 32 => 0, + // 64 => 1, + // else => unreachable, + // }; + // } +}; + +pub const Encoder = struct { + alloc: std.mem.Allocator, + buf: *std.ArrayListAlignedUnmanaged(u8, std.mem.page_size), + + fn ensureUnusedCap(self: Encoder, size: usize) !void { + _ = try gen.ensureUnusedCap(self.buf, self.alloc, size); + } + + /// TODO: Support rel8. + pub fn jumpCond(self: Encoder, code: u8, offset: i32) !void { + try self.ensureUnusedCap(6); + const i = self.buf.items.len; + self.buf.items.ptr[i] = 0x0f; + self.buf.items.ptr[i+1] = code; + + @memcpy(self.buf.items.ptr[i+2..i+2+4], std.mem.asBytes(&offset)); + self.buf.items.len += 6; + } + + /// TODO: Support rel8. + pub fn jumpRel(self: Encoder, offset: i32) !void { + try self.ensureUnusedCap(5); + const i = self.buf.items.len; + self.buf.items.ptr[i] = 0xe9; + + @memcpy(self.buf.items.ptr[i+1..i+1+4], std.mem.asBytes(&offset)); + self.buf.items.len += 5; + } + + pub fn jumpReg(self: Encoder, reg: Register) !void { + const enc = Encoding.init(.m, 4, .none, .none); + var out = try self.prepInstBuf(); + var len = self.encodeHeader(out, enc, &.{0xff}, &.{ Op.reg(reg) }); + self.encodeMOp(out, &len, enc, Op.reg(reg)); + self.buf.items.len += len; + } + + pub fn cmp(self: Encoder, left: Register, right: Register) !void { + const enc = Encoding.init(.mr, 0, .long, .none); + var out = try self.prepInstBuf(); + var len = self.encodeHeader(out, enc, &.{ 0x3b}, &.{ Op.reg(left), Op.reg(right) }); + self.encodeRMOps(out, &len, enc, Op.reg(left), Op.reg(right)); + self.buf.items.len += len; + } + + pub fn lea(self: Encoder, dst: Register, src: Memory) !void { + const enc = Encoding.init(.rm, 0, .long, .none); + try self.encode(enc, &.{ 0x8d }, &.{ Op.reg(dst), Op.mem(src)}); + } + + pub fn pushReg(self: Encoder, r: Register) !void { + const enc = Encoding.init(.o, 0, .none, .none); + var out = try self.prepInstBuf(); + var len = self.encodeHeader(out, enc, &.{ 0x50 }, &.{ Op.reg(r) }); + self.buf.items.len += len; + } + + pub fn movImm(self: Encoder, dst: Register, imm: u64) !void { + const enc = Encoding.init(.oi, 0, .long, .none); + var out = try self.prepInstBuf(); + var len = self.encodeHeader(out, enc, &.{ 0xb8 }, &.{ Op.reg(dst), Op.imm(imm) }); + encodeImm64(out, &len, imm); + self.buf.items.len += len; + } + + pub fn movReg(self: Encoder, dst: Register, src: Register) !void { + const enc = Encoding.init(.rm, 0, .long, .none); + var out = try self.prepInstBuf(); + var len = self.encodeHeader(out, enc, &.{ 0x8b }, &.{ Op.reg(dst), Op.reg(src) }); + self.encodeRMOps(out, &len, enc, Op.reg(dst), Op.reg(src)); + self.buf.items.len += len; + } + + pub fn movMem(self: Encoder, dst: Register, mem: Memory) !void { + const enc = Encoding.init(.rm, 0, .long, .none); + var out = try self.prepInstBuf(); + var len = self.encodeHeader(out, enc, &.{ 0x8b }, &.{ Op.reg(dst), Op.mem(mem)}); + self.encodeRMOps(out, &len, enc, Op.reg(dst), Op.mem(mem)); + self.buf.items.len += len; + } + + pub fn movToMem(self: Encoder, mem: Memory, dst: Register) !void { + const enc = Encoding.init(.mr, 0, .long, .none); + var out = try self.prepInstBuf(); + var len = self.encodeHeader(out, enc, &.{ 0x89 }, &.{ Op.mem(mem), Op.reg(dst) }); + self.encodeRMOps(out, &len, enc, Op.reg(dst), Op.mem(mem)); + self.buf.items.len += len; + } + + pub fn callRel(self: Encoder, offset: i32) !void { + try self.ensureUnusedCap(5); + const i = self.buf.items.len; + self.buf.items.ptr[i] = 0xe8; + + @memcpy(self.buf.items.ptr[i+1..i+1+4], std.mem.asBytes(&offset)); + self.buf.items.len += 5; + } + + pub fn callReg(self: Encoder, reg: Register) !void { + const enc = Encoding.init(.m, 2, .none, .none); + var out = try self.prepInstBuf(); + var len = self.encodeHeader(out, enc, &.{ 0xff }, &.{ Op.reg(reg) }); + self.encodeMOp(out, &len, enc, Op.reg(reg)); + self.buf.items.len += len; + } + + pub fn int3(self: Encoder) !void { + try self.ensureUnusedCap(1); + self.buf.items.ptr[self.buf.items.len] = 0xcc; + self.buf.items.len += 1; + } + + pub fn ret(self: Encoder) !void { + try self.ensureUnusedCap(1); + self.buf.items.ptr[self.buf.items.len] = 0xc3; + self.buf.items.len += 1; + } + + fn prepInstBuf(self: Encoder) ![]u8 { + try self.ensureUnusedCap(32); + return self.buf.items.ptr[self.buf.items.len..self.buf.items.len + 32]; + } + + fn encodeHeader(_: Encoder, out: []u8, enc: Encoding, opc: []const u8, ops: []const Op) usize { + // const prefix = Prefix.none; + var len: usize = 0; + + var hasMandatoryPrefix = false; + if (mandatoryPrefix(opc)) |byte| { + out[len] = byte; + len += 1; + hasMandatoryPrefix = true; + } + + encodeRexPrefix(out, &len, enc, ops); + + // Encode opcode. + const first = @intFromBool(hasMandatoryPrefix); + const final = opc.len-1; + const finalOpc = opc[first..final]; + if (finalOpc.len > 0) { + @memcpy(out[len..len+finalOpc.len], finalOpc); + len += finalOpc.len; + } + if (enc.opEn == .oi or enc.opEn == .o) { + // First assign to var to get around miscompilation. + out[len] = opc[final] | ops[0].data.reg.lowEnc(); + len += 1; + } else { + out[len] = opc[final]; + len += 1; + } + return len; + } + + fn encodeMOp(_: Encoder, out: []u8, len: *usize, enc: Encoding, op: Op) void { + const rm = enc.modRmExt(); + switch (op.type) { + .reg => { + out[len.*] = ModRM.direct(rm, op.data.reg.lowEnc()); + len.* += 1; + }, + .mem => |_| { + unreachable; + }, + else => unreachable, + } + } + + fn encodeRMOps(_: Encoder, out: []u8, len: *usize, enc: Encoding, r: Op, m: Op) void { + switch (m.type) { + .reg => { + const rm = r.data.reg.lowEnc(); + out[len.*] = ModRM.direct(rm, m.data.reg.lowEnc()); + len.* += 1; + }, + .mem => { + encodeMemory(out, len, enc, m.data.mem, r); + }, + else => unreachable, + } + } + + fn encode(self: Encoder, enc: Encoding, opc: []const u8, ops: []const Op) !void { + var out = try self.prepInstBuf(); + var len = self.encodeHeader(out, enc, opc, ops); + + // Encode operands. + switch (enc.opEn) { + .rm, .rmi => { + const memop = ops[1]; + switch (memop.type) { + .reg => { + const rm = switch (enc.opEn) { + .rm, .rmi => ops[0].data.reg.lowEnc(), + else => unreachable, + }; + out[len] = ModRM.direct(rm, memop.data.reg.lowEnc()); + len += 1; + }, + .mem => { + const op = ops[0]; + encodeMemory(out, &len, enc, memop.data.mem, op); + }, + else => return error.Unexpected, + } + + switch(enc.opEn) { + // .rmi => encodeImm(out, &len, ops[2].imm, ops[2].immBitSize()), + else => {}, + } + }, + else => return error.Unexpected, + } + + // Increase buffer length by inst len. + self.buf.items.len += len; + } +}; + +const SIBByte = packed struct { + base: u3, + index: u3, + scale: u2, + + fn disp32() u8 { + return @bitCast(SIBByte{ .base = 5, .index = 4, .scale = 0 }); + } + + fn baseDisp8(base: u3) u8 { + return @bitCast(SIBByte{ .base = base, .index = 4, .scale = 0 }); + } + + fn initBase(base: u3) u8 { + return @bitCast(SIBByte{ .base = base, .index = 4, .scale = 0 }); + } +}; + +const ModRM = packed struct { + rm: u3, + regOrOpx: u3, + mod: u2, + + fn direct(regOrOpx: u3, rm: u3) u8 { + return @bitCast(ModRM{ .rm = rm, .regOrOpx = regOrOpx, .mod = 0b11 }); + } + + fn sibDisp0(regOrOpx: u3) u8 { + return @bitCast(ModRM{ .rm = 0b100, .regOrOpx = regOrOpx, .mod = 0b00 }); + } + + fn sibDisp8(regOrOpx: u3) u8 { + return @bitCast(ModRM{ .rm = 0b100, .regOrOpx = regOrOpx, .mod = 0b01 }); + } + + fn indirectDisp0(regOrOpx: u3, rm: u3) u8 { + return @bitCast(ModRM{ .rm = rm, .regOrOpx = regOrOpx, .mod = 0b00 }); + } + + fn indirectDisp8(regOrOpx: u3, rm: u3) u8 { + return @bitCast(ModRM{ .rm = rm, .regOrOpx = regOrOpx, .mod = 0b01 }); + } + + fn indirectDisp32(regOrOpx: u3, rm: u3) u8 { + return @bitCast(ModRM{ .rm = rm, .regOrOpx = regOrOpx, .mod = 0b10 }); + } + + fn ripDisp32(regOrOpx: u3) u8 { + return @bitCast(ModRM{ .rm = 0b101, .regOrOpx = regOrOpx, .mod = 0b00 }); + } +}; + +const Rex = struct { + w: bool = false, + r: bool = false, + x: bool = false, + b: bool = false, + present: bool = false, + + fn isSet(self: Rex) bool { + return self.w or self.r or self.x or self.b; + } +}; + +fn encodeImm64(buf: []u8, len: *usize, imm: u64) void { + @memcpy(buf[len.*..len.*+8], std.mem.asBytes(&imm)); + len.* += 8; +} + +fn encodeMemory(buf: []u8, len: *usize, enc: Encoding, mem: Memory, op: Op) void { + const opEnc: u3 = switch (op.type) { + .reg => op.data.reg.lowEnc(), + .none => enc.modRmExt(), + else => unreachable, + }; + + switch (mem) { + .sib => |sib| { + switch (sib.base) { + .none => { + buf[len.*] = ModRM.sibDisp0(opEnc); + len.* += 1; + + if (sib.scaleIndex.scale > 0) { + unreachable; + } else { + buf[len.*] = SIBByte.disp32(); + len.* += 1; + } + + @memcpy(buf[len.*..len.*+4], std.mem.asBytes(&sib.disp)); + len.* += 4; + }, + .reg => |base| { + if (false) { + // TODO: base.class() == .segment + } else { + const baseEnc = base.lowEnc(); + if (baseEnc == 4 or sib.scaleIndex.scale > 0) { + if (sib.disp == 0 and baseEnc != 5) { + buf[len.*] = ModRM.sibDisp0(opEnc); + len.* += 1; + if (sib.scaleIndex.scale > 0) { + unreachable; + } else { + buf[len.*] = SIBByte.initBase(baseEnc); + len.* += 1; + } + } else if (std.math.cast(i8, sib.disp)) |disp| { + buf[len.*] = ModRM.sibDisp8(opEnc); + len.* += 1; + + if (sib.scaleIndex.scale > 0) { + unreachable; + } else { + buf[len.*] = SIBByte.baseDisp8(baseEnc); + len.* += 1; + } + + buf[len.*] = @bitCast(disp); + len.* += 1; + } else { + unreachable; + } + } else { + if (sib.disp == 0 and baseEnc != 5) { + buf[len.*] = ModRM.indirectDisp0(opEnc, baseEnc); + len.* += 1; + } else if (std.math.cast(i8, sib.disp)) |disp| { + buf[len.*] = ModRM.indirectDisp8(opEnc, baseEnc); + len.* += 1; + + buf[len.*] = @bitCast(disp); + len.* += 1; + } else { + buf[len.*] = ModRM.indirectDisp32(opEnc, baseEnc); + len.* += 1; + + @memcpy(buf[len.*..len.*+4], std.mem.asBytes(&sib.disp)); + len.* += 4; + } + } + } + }, + else => unreachable, + } + }, + .rip => |rip| { + buf[len.*] = ModRM.ripDisp32(opEnc); + len.* += 1; + + @memcpy(buf[len.*..len.*+4], std.mem.asBytes(&rip)); + len.* += 4; + }, + else => unreachable, + } +} + +fn encodeRexPrefix(buf: []u8, len: *usize, enc: Encoding, ops: []const Op) void { + var rex = Rex{ + .present = enc.mode == .rex, + .w = enc.mode == .long, + }; + + switch (enc.opEn) { + .o, .oi => { + rex.b = ops[0].data.reg.isExtended(); + }, + .m, .mr, .rm, .rmi => { + const rop = switch (enc.opEn) { + .mr => ops[1], + .m, .rm, .rmi => ops[0], + else => unreachable, + }; + rex.r = rop.isBaseExtended(); + const bxop = switch (enc.opEn) { + .m, .mr => ops[0], + else => ops[1], + }; + rex.b = bxop.isBaseExtended(); + rex.x = bxop.isIndexExtended(); + } + } + + if (!rex.present and !rex.isSet()) return; + var byte: u8 = 0b0100_0000; + if (rex.w) byte |= 0b1000; + if (rex.r) byte |= 0b0100; + if (rex.x) byte |= 0b0010; + if (rex.b) byte |= 0b0001; + + buf[len.*] = byte; + len.* += 1; +} + +const Prefix = enum(u3) { + none, +}; + +const OpType = enum(u8) { + none, + reg, + mem, + imm, +}; + +pub const Op = struct { + type: OpType, + data: union { + reg: Register, + mem: Memory, + imm: Immediate, + }, + + pub fn sibBase(base: Base, disp: i32) Op { + return .{ .type = .mem, .data = .{ .mem = Memory.sibBase(base, disp) }}; + } + + pub fn reg(r: Register) Op { + return .{ .type = .reg, .data = .{ .reg = r }}; + } + + pub fn mem(m: Memory) Op { + return .{ .type = .mem, .data = .{ .mem = m }}; + } + + pub fn imm(u: u64) Op { + return .{ .type = .imm, .data = .{ .imm = Immediate.u(u) }}; + } + + fn isBaseExtended(self: Op) bool { + return switch (self.type) { + .none, .imm => false, + .reg => self.data.reg.isExtended(), + .mem => { + const base = self.data.mem.base(); + if (base == .reg) { + return base.reg.isExtended(); + } else return false; + }, + }; + } + + fn isIndexExtended(self: Op) bool { + return switch (self.type) { + .none, .reg, .imm => false, + .mem => { + if (self.data.mem == .sib) { + if (self.data.mem.sib.scaleIndex.scale > 0) { + return self.data.mem.sib.scaleIndex.index.isExtended(); + } + } + return false; + }, + }; + } +}; + +pub const Memory = union(enum) { + sib: Sib, + moffs: Moffs, + rip: i32, + + pub fn sibBase(base_: Base, disp: i32) Memory { + return .{ .sib = Sib{ .base = base_, .disp = disp, .scaleIndex = ScaleIndex.none } }; + } + + pub fn rip(disp: i32) Memory { + return .{ .rip = disp }; + } + + fn base(self: Memory) Base { + return switch (self) { + .moffs => |moffs| .{ .reg = moffs.seg }, + .sib => |sib| sib.base, + .rip => .none, + }; + } +}; + +const ScaleIndex = struct { + scale: u4, + index: Register, + + const none = ScaleIndex{ .scale = 0, .index = undefined }; +}; + +pub const Base = union(enum) { + none, + reg: Register, + frame: FrameIndex, + + pub fn reg(r: Register) Base { + return .{ .reg = r }; + } +}; + +const FrameIndex = enum(u32) { + _, +}; + +const Sib = struct { + base: Base, + scaleIndex: ScaleIndex, + disp: i32, +}; + +const Moffs = struct { + seg: Register, + offset: u64, +}; + +const Immediate = union(enum) { + unsigned: u64, + + fn u(x: u64) Immediate { + return .{ .unsigned = x }; + } +}; + +const modrm_ext = u3; + +const Encoding = struct { + opEn: OpEn, + modrm_ext: modrm_ext, + mode: Mode, + feature: Feature, + + fn init(opEn_: OpEn, modrm_ext_: modrm_ext, mode_: Mode, feature_: Feature) Encoding { + var new = Encoding{ + .opEn = opEn_, + .modrm_ext = modrm_ext_, + .mode = mode_, + .feature = feature_, + }; + return new; + } + + fn modRmExt(self: Encoding) u3 { + return switch (self.opEn) { + .m => self.modrm_ext, + else => unreachable, + }; + } +}; + +fn mandatoryPrefix(opc: []const u8) ?u8 { + const prefix = opc[0]; + return switch (prefix) { + 0x66, 0xf2, 0xf3 => prefix, + else => null, + }; +} + +const OpEn = enum(u8) { + m, + mr, + rm, + rmi, + oi, + o, +}; + +const Mode = enum(u8) { + none, + rex, + long, +}; + +const Feature = enum(u8) { + none, +}; + +test "x64 encoding" { + var buf: std.ArrayListAlignedUnmanaged(u8, std.mem.page_size) = .{}; + defer buf.deinit(t.alloc); + const encoder = Encoder{ .alloc = t.alloc, .buf = &buf }; + + buf.clearRetainingCapacity(); + try encoder.pushReg(.rbp); + try t.eqSlice(u8, buf.items, &.{ 0x55 }); + + buf.clearRetainingCapacity(); + try encoder.movReg(.rbp, .rsp); + try t.eqSlice(u8, buf.items, &.{ 0x48, 0x8b, 0xec }); + + buf.clearRetainingCapacity(); + try encoder.cmp(.rdx, .rcx); + try t.eqSlice(u8, buf.items, &.{ 0x48, 0x3b, 0xd1 }); + + buf.clearRetainingCapacity(); + try encoder.jumpRel(100); + try t.eqSlice(u8, buf.items, &.{ 0xe9, 0x64, 0x00, 0x00, 0x00 }); + + buf.clearRetainingCapacity(); + try encoder.jumpReg(.rax); + try t.eqSlice(u8, buf.items, &.{0xff, 0xe0}); + + buf.clearRetainingCapacity(); + try encoder.jumpCond(jge, 100); + try t.eqSlice(u8, buf.items, &.{ 0x0f, 0x8d, 0x64, 0x00, 0x00, 0x00 }); + + buf.clearRetainingCapacity(); + try encoder.lea(.rcx, Memory.sibBase(Base.reg(.rdx), 100)); + try t.eqSlice(u8, buf.items, &.{ 0x48, 0x8d, 0x4a, 0x64 }); + + buf.clearRetainingCapacity(); + try encoder.lea(.rax, Memory.rip(16)); + try t.eqSlice(u8, buf.items, &.{ 0x48, 0x8d, 0x05, 0x10, 0x00, 0x00, 0x00 }); + + buf.clearRetainingCapacity(); + try encoder.movMem(.rcx, Memory.sibBase(Base.reg(.rbp), 8)); + try t.eqSlice(u8, buf.items, &.{ 0x48, 0x8b, 0x4d, 0x08 }); + + buf.clearRetainingCapacity(); + try encoder.movToMem(Memory.sibBase(Base.reg(.rbp), 8), .rcx); + try t.eqSlice(u8, buf.items, &.{ 0x48, 0x89, 0x4d, 0x08 }); + + buf.clearRetainingCapacity(); + try encoder.movImm(.rdx, 0x7ffc000100000001); + try t.eqSlice(u8, buf.items, &.{0x48, 0xba, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0xfc, 0x7f}); + + buf.clearRetainingCapacity(); + try encoder.callRel(100); + try t.eqSlice(u8, buf.items, &.{ 0xe8, 0x64, 0x00, 0x00, 0x00 }); + + buf.clearRetainingCapacity(); + try encoder.callReg(.rax); + try t.eqSlice(u8, buf.items, &.{0xff, 0xd0}); + + buf.clearRetainingCapacity(); + try encoder.ret(); + try t.eqSlice(u8, buf.items, &.{0xc3}); + + buf.clearRetainingCapacity(); + try encoder.int3(); + try t.eqSlice(u8, buf.items, &.{0xcc}); +} \ No newline at end of file diff --git a/src/jit/x64_assembler.zig b/src/jit/x64_assembler.zig new file mode 100644 index 000000000..870f12855 --- /dev/null +++ b/src/jit/x64_assembler.zig @@ -0,0 +1,162 @@ +const std = @import("std"); +const stdx = @import("stdx"); +const cy = @import("../cyber.zig"); +const t = stdx.testing; +const Slot = cy.register.RegisterId; +const assm = @import("assembler.zig"); + +const X64 = @import("x64.zig"); +pub const BaseReg = X64.Base.reg; +pub const MemSibBase = X64.Memory.sibBase; +const MemRip = X64.Memory.rip; + +const LRegister = assm.LRegister; +const Register = X64.Register; +const gen = @import("gen.zig"); + +comptime { + std.testing.refAllDecls(X64); +} + +pub const FpReg: X64.Register = .rsi; + +pub fn genLoadSlot(c: *cy.Chunk, dst: LRegister, src: Slot) !void { + try c.x64Enc.movMem(toReg(dst), MemSibBase(BaseReg(FpReg), src * 8)); +} + +pub fn genStoreSlot(c: *cy.Chunk, dst: Slot, src: LRegister) !void { + try c.x64Enc.movToMem(MemSibBase(BaseReg(FpReg), dst * 8), toReg(src)); +} + +pub fn genAddImm(c: *cy.Chunk, dst: LRegister, src: LRegister, imm: u64) !void { + try c.x64Enc.lea(toReg(dst), MemSibBase(BaseReg(toReg(src)), @intCast(imm))); +} + +pub fn genMovImm(c: *cy.Chunk, dst: LRegister, imm: u64) !void { + try c.x64Enc.movImm(toReg(dst), imm); +} + +pub fn genJumpCond(c: *cy.Chunk, cond: assm.LCond, offset: i32) !void { + try c.x64Enc.jumpCond(toCond(cond), offset); +} + +pub fn patchJumpCond(c: *cy.Chunk, pc: usize, to: usize) void { + const jumpInstLen = 6; + const offset: i32 = @intCast(@as(isize, @bitCast(to -% (pc + jumpInstLen)))); + // Displacement bytes start at the 3rd byte. + const disp = c.jitBuf.buf.items[pc+2..pc+2+4]; + @memcpy(disp, std.mem.asBytes(&offset)); +} + +pub fn genPatchableJumpRel(c: *cy.Chunk) !void { + try c.x64Enc.jumpRel(0); +} + +pub fn patchJumpRel(c: *cy.Chunk, pc: usize, to: usize) void { + const jumpInstLen = 5; + const offset: i32 = @intCast(@as(isize, @bitCast(to -% (pc + jumpInstLen)))); + // Displacement bytes start at the 2nd byte. + const disp = c.jitBuf.buf.items[pc+1..pc+1+4]; + @memcpy(disp, std.mem.asBytes(&offset)); +} + +pub fn genCmp(c: *cy.Chunk, left: LRegister, right: LRegister) !void { + try c.x64Enc.cmp(toReg(left), toReg(right)); +} + +pub fn genMovPcRel(c: *cy.Chunk, dst: LRegister, to: usize) !void { + const pc = c.jitGetPos(); + const instLen = 7; + const offset: i32 = @intCast(@as(isize, @bitCast(to -% (pc + instLen)))); + try c.x64Enc.lea(toReg(dst), MemRip(offset)); +} + +pub fn genPatchableMovPcRel(c: *cy.Chunk, dst: LRegister) !void { + try c.x64Enc.lea(toReg(dst), MemRip(0)); +} + +pub fn patchMovPcRelTo(c: *cy.Chunk, pc: usize, to: usize) void { + // Length of lea inst. + const instLen = 7; + const offset: i32 = @intCast(@as(isize, @bitCast(to -% (pc + instLen)))); + // Displacement bytes start at the 4th byte. + const disp = c.jitBuf.buf.items[pc+3..pc+3+4]; + @memcpy(disp, std.mem.asBytes(&offset)); +} + +pub fn genStoreSlotImm(c: *cy.Chunk, dst: Slot, imm: u64) !void { + try c.x64Enc.movImm(.rax, imm); + try c.x64Enc.movToMem(MemSibBase(BaseReg(FpReg), dst * 8), .rax); +} + +pub fn genMainReturn(c: *cy.Chunk) !void { + // // Load return addr into rax. + // try genLoadSlot(c, .temp, 0); + + // // Jump to rax. + // try c.x64Enc.jumpReg(.rax); + + // Assumes rbp and rsp were preserved. + try c.x64Enc.ret(); +} + +pub fn genCallFunc(c: *cy.Chunk, ret: Slot, func: *cy.Func) !void { + // Skip ret info. + // Skip bc pc slot. + try genStoreSlot(c, ret + 3, .fp); + + // Advance fp. + try genAddImm(c, .fp, .fp, 8 * ret); + + // Save pc to rax. Callee saves it to the stack. + const jumpInstLen = 5; + try c.x64Enc.lea(.rax, MemRip(jumpInstLen)); + + // Push empty call. + const jumpPc = c.jitGetPos(); + try c.jitBuf.relocs.append(c.alloc, .{ .type = .jumpToFunc, .data = .{ .jumpToFunc = .{ + .func = func, + .pc = @intCast(jumpPc), + }}}); + try c.x64Enc.jumpRel(0); +} + +pub fn genCallFuncPtr(c: *cy.Chunk, ptr: *const anyopaque) !void { + // No reloc, copy address to rax. + try c.x64Enc.movImm(.rax, @intFromPtr(ptr)); + try c.x64Enc.callReg(.rax); +} + +pub fn genFuncReturn(c: *cy.Chunk) !void { + // Load return addr into rax. + try genLoadSlot(c, .temp, 2); + + // Load prev fp. + try c.x64Enc.movMem(FpReg, MemSibBase(BaseReg(FpReg), 3 * 8)); + + // Jump to rax. + try c.x64Enc.jumpReg(.rax); +} + +pub fn genBreakpoint(c: *cy.Chunk) !void { + try c.x64Enc.int3(); +} + +fn toReg(reg: LRegister) Register { + return switch (reg) { + .arg0 => .rdx, + .arg1 => .rcx, + .arg2 => .r8, + .arg3 => .r9, + .fp => FpReg, + .temp => .rax, + //.vm => .rdi, + }; +} + +fn toCond(cond: assm.LCond) u8 { + return switch (cond) { + .ge => X64.jge, + else => unreachable, + }; +} \ No newline at end of file diff --git a/src/vm.zig b/src/vm.zig index 43f553d80..c147efe5e 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -590,9 +590,10 @@ pub const VM = struct { } const bytes = res.jitBuf.buf.items[res.jitBuf.mainPc..res.jitBuf.mainPc+12*4]; - log.tracev("main start {}", .{std.fmt.fmtSliceHexLower(bytes)}); + log.tracev("main start {}: {}", .{res.jitBuf.mainPc, std.fmt.fmtSliceHexLower(bytes)}); const main: *const fn(*VM, [*]Value) callconv(.C) void = @ptrCast(@alignCast(res.jitBuf.buf.items.ptr + res.jitBuf.mainPc)); + // @breakpoint(); main(self, self.framePtr); return Value.None; } else { diff --git a/src/vm_compiler.zig b/src/vm_compiler.zig index 19ddc4bdb..25511b9fc 100644 --- a/src/vm_compiler.zig +++ b/src/vm_compiler.zig @@ -17,7 +17,7 @@ const math_mod = @import("builtins/math.zig"); const llvm_gen = @import("llvm_gen.zig"); const bcgen = @import("bc_gen.zig"); const jitgen = @import("jit/gen.zig"); -const assembler = @import("jit/assembler.zig"); +const assm = @import("jit/assembler.zig"); const A64 = @import("jit/a64.zig"); const bindings = cy.bindings; const module = cy.module; @@ -752,6 +752,7 @@ fn performCodegen(self: *VMcompiler) !void { } // Perform relocation. + const mainChunk = self.chunks.items[0]; for (self.jitBuf.relocs.items) |reloc| { switch (reloc.type) { .jumpToFunc => { @@ -760,14 +761,8 @@ fn performCodegen(self: *VMcompiler) !void { if (func.type == .hostFunc) { return error.Unexpected; } else { - switch (builtin.cpu.arch) { - .aarch64 => { - const targetPc = self.genSymMap.get(func).?.funcSym.pc; - var inst: *A64.BrImm = @ptrCast(@alignCast(&self.jitBuf.buf.items[jumpPc])); - inst.setOffsetFrom(jumpPc, targetPc); - }, - else => return error.Unsupported, - } + const targetPc = self.genSymMap.get(func).?.funcSym.pc; + assm.patchJumpRel(mainChunk, jumpPc, targetPc); } } }