Skip to content

Commit

Permalink
wip: Add debug instruction counting
Browse files Browse the repository at this point in the history
  • Loading branch information
olofk committed Feb 2, 2024
1 parent 40a9e99 commit df9408a
Showing 1 changed file with 204 additions and 0 deletions.
204 changes: 204 additions & 0 deletions rtl/serv_top.v
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,210 @@ module serv_top
end
endgenerate

reg [4:0] flx_rs1;
reg [4:0] flx_rs2;
reg [4:0] flx_imm = 12; //Do we want to check immediate as well?
reg bit_rs1;
reg bit_rs2;
reg [4:0] bit_cnt = 5'd0;

reg [2:0] funct3;
reg [4:0] opcode;
reg [4:0] min_len;

integer cycles = 0;
integer saved = 0;
integer saved_cycles1 = 0;
integer saved_cycles16 = 0;
integer saved_cycles_min_12 = 0;

function automatic [4:0] maxof (input [4:0] a, b);
begin
maxof = (a>b) ? a : b;
end
endfunction

function automatic integer maxof3 (input [4:0] a, b, c);
reg [4:0] tmp;
begin
tmp = maxof(a, b);
maxof3 = {27'd0,maxof(tmp, c)};
end
endfunction
reg LUI, AUIPC, JAL, JALR, BEQ, BNE, BLT, BGE, BLTU, BGEU, LB, LH, LW, LBU, LHU, SB, SH, SW, ADDI, SLTI, SLTIU, XORI, ORI, ANDI,SLLI, SRLI, SRAI, ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND, FENCE, ECALL, EBREAK;
reg CSRRW, CSRRS, CSRRC, CSRRWI, CSRRSI, CSRRCI;
reg OTHER;

real savings1 = (saved_cycles1*1.0) / (cycles*1.0);
real savings16 = (saved_cycles16*1.0) / (cycles*1.0);
real savings_min_12 = (saved_cycles_min_12*1.0) / (cycles*1.0);

always @(posedge clk) begin

if (i_ibus_ack) begin
LUI <= 1'b0;
AUIPC <= 1'b0;
JAL <= 1'b0;
JALR <= 1'b0;
BEQ <= 1'b0;
BNE <= 1'b0;
BLT <= 1'b0;
BGE <= 1'b0;
BLTU <= 1'b0;
BGEU <= 1'b0;
LB <= 1'b0;
LH <= 1'b0;
LW <= 1'b0;
LBU <= 1'b0;
LHU <= 1'b0;
SB <= 1'b0;
SH <= 1'b0;
SW <= 1'b0;
ADDI <= 1'b0;
SLTI <= 1'b0;
SLTIU <= 1'b0;
XORI <= 1'b0;
ORI <= 1'b0;
ANDI <= 1'b0;
SLLI <= 1'b0;
SRLI <= 1'b0;
SRAI <= 1'b0;
ADD <= 1'b0;
SUB <= 1'b0;
SLL <= 1'b0;
SLT <= 1'b0;
SLTU <= 1'b0;
XOR <= 1'b0;
SRL <= 1'b0;
SRA <= 1'b0;
OR <= 1'b0;
AND <= 1'b0;
FENCE <= 1'b0;
ECALL <= 1'b0;
EBREAK <= 1'b0;
CSRRW <= 1'b0;
CSRRS <= 1'b0;
CSRRC <= 1'b0;
CSRRWI <= 1'b0;
CSRRSI <= 1'b0;
CSRRCI <= 1'b0;
OTHER <= 1'b0;

casez(i_ibus_rdt)
// 3322222_22222 11111_111 11
// 1098765_43210 98765_432 10987_65432_10
32'b???????_?????_?????_???_?????_01101_11 : LUI <= 1'b1;
32'b???????_?????_?????_???_?????_00101_11 : AUIPC <= 1'b1;
32'b???????_?????_?????_???_?????_11011_11 : JAL <= 1'b1;
32'b???????_?????_?????_000_?????_11001_11 : JALR <= 1'b1;
32'b???????_?????_?????_000_?????_11000_11 : BEQ <= 1'b1;
32'b???????_?????_?????_001_?????_11000_11 : BNE <= 1'b1;
32'b???????_?????_?????_100_?????_11000_11 : BLT <= 1'b1;
32'b???????_?????_?????_101_?????_11000_11 : BGE <= 1'b1;
32'b???????_?????_?????_110_?????_11000_11 : BLTU <= 1'b1;
32'b???????_?????_?????_111_?????_11000_11 : BGEU <= 1'b1;
32'b???????_?????_?????_000_?????_00000_11 : LB <= 1'b1;
32'b???????_?????_?????_001_?????_00000_11 : LH <= 1'b1;
32'b???????_?????_?????_010_?????_00000_11 : LW <= 1'b1;
32'b???????_?????_?????_100_?????_00000_11 : LBU <= 1'b1;
32'b???????_?????_?????_101_?????_00000_11 : LHU <= 1'b1;
32'b???????_?????_?????_000_?????_01000_11 : SB <= 1'b1;
32'b???????_?????_?????_001_?????_01000_11 : SH <= 1'b1;
32'b???????_?????_?????_010_?????_01000_11 : SW <= 1'b1;
32'b???????_?????_?????_000_?????_00100_11 : ADDI <= 1'b1;
32'b???????_?????_?????_010_?????_00100_11 : SLTI <= 1'b1;
32'b???????_?????_?????_011_?????_00100_11 : SLTIU <= 1'b1;
32'b???????_?????_?????_100_?????_00100_11 : XORI <= 1'b1;
32'b???????_?????_?????_110_?????_00100_11 : ORI <= 1'b1;
32'b???????_?????_?????_111_?????_00100_11 : ANDI <= 1'b1;
32'b0000000_?????_?????_001_?????_00100_11 : SLLI <= 1'b1;
32'b0000000_?????_?????_101_?????_00100_11 : SRLI <= 1'b1;
32'b0100000_?????_?????_101_?????_00100_11 : SRAI <= 1'b1;
32'b0000000_?????_?????_000_?????_01100_11 : ADD <= 1'b1;
32'b0100000_?????_?????_000_?????_01100_11 : SUB <= 1'b1;
32'b0000000_?????_?????_001_?????_01100_11 : SLL <= 1'b1;
32'b0000000_?????_?????_010_?????_01100_11 : SLT <= 1'b1;
32'b0000000_?????_?????_011_?????_01100_11 : SLTU <= 1'b1;
32'b???????_?????_?????_100_?????_01100_11 : XOR <= 1'b1;
32'b0000000_?????_?????_101_?????_01100_11 : SRL <= 1'b1;
32'b0100000_?????_?????_101_?????_01100_11 : SRA <= 1'b1;
32'b???????_?????_?????_110_?????_01100_11 : OR <= 1'b1;
32'b???????_?????_?????_111_?????_01100_11 : AND <= 1'b1;
32'b???????_?????_?????_000_?????_00011_11 : FENCE <= 1'b1;
32'b0000000_00000_00000_000_00000_11100_11 : ECALL <= 1'b1;
32'b0000000_00001_00000_000_00000_11100_11 : EBREAK <= 1'b1;
32'b???????_?????_?????_001_?????_11100_11 : CSRRW <= 1'b1;
32'b???????_?????_?????_010_?????_11100_11 : CSRRS <= 1'b1;
32'b???????_?????_?????_011_?????_11100_11 : CSRRC <= 1'b1;
32'b???????_?????_?????_101_?????_11100_11 : CSRRWI <= 1'b1;
32'b???????_?????_?????_110_?????_11100_11 : CSRRSI <= 1'b1;
32'b???????_?????_?????_111_?????_11100_11 : CSRRCI <= 1'b1;
default : OTHER <= 1'b1;
endcase
end

if (cnt_done & ctrl_pc_en) begin
if (LUI) saved=0; //LUI and AUIPC Operates on bits 31:12 and
else if (AUIPC) saved=0; //would need a separate 20-bit MSB detector
//JAL, JALR, B* : Ignoring branch ops for now, but optimizing especially BNE/BEQ
//could be very beneficial

else if (LB) saved = 24; //Can likely optimize data readout. Maybe also
else if (LH) saved = 16; //speed up address calculation, but not sure
else if (LW) saved = 0; //
else if (LBU) saved = 24; //Can likely optimize data readout. Maybe also
else if (LHU) saved = 16; //speed up address calculation, but not sure
//SB, SH, SW Not sure how much work to improve SB/SH
else if (ADDI) saved = 31-maxof3(flx_rs1, 0, flx_imm); // +1 for carry?
else if (SLTI) saved = 31; //SLT* only needs to write one bit to RD. Rest is
else if (SLTIU) saved = 31; //zero. Maybe possible to speed up comparison too?
else if (XORI) saved = 31-maxof3(flx_rs1, 0, flx_imm);
else if (ORI) saved = 31-maxof3(flx_rs1, 0, flx_imm);
else if (ANDI) saved = 31-maxof3(flx_rs1, 0, flx_imm);
//SLLI, SRLI, SRAI: Not sure what can be done here. Probably too much work
else if (ADD) saved = 31-maxof3(flx_rs1, flx_rs2, 0); // +1 for carry?
else if (SUB) saved = 31-maxof3(flx_rs1, flx_rs2, 0); // +1 for carry?
//SLL: Not sure what can be done here. Probably too much work
else if (SLT) saved = 31; //SLT* only needs to write one bit to RD. Rest is
else if (SLTU) saved = 31; //zero. Maybe possible to speed up comparison too?
else if (XOR) saved = 31-maxof3(flx_rs1, flx_rs2, 0);
//SRL, SRA: Not sure what can be done here. Probably too much work
else if (OR) saved = 31-maxof3(flx_rs1, flx_rs2, 0);
else if (AND) saved = 31-maxof3(flx_rs1, flx_rs2, 0);
else if (FENCE) saved = 32; //This is a nop in SERV
else if (ECALL) saved = 0; //Not worth optimizing?
else if (EBREAK) saved = 0; //Not worth optimizing?
else saved = 0;

//Minimum 12 cycles
saved_cycles_min_12 <= saved_cycles_min_12 + ((saved>20) ? 20 : saved);

//Pretending we have 16-bit granularity
saved_cycles16 <= saved_cycles16 + ((saved>15) ? 16 : 0);

saved_cycles1 <= saved_cycles1 + saved;
end

cycles <= cycles + 1;
if (cnt_en) begin
bit_cnt <= bit_cnt + 5'd1;
if (cnt0) begin
bit_rs1 <= rs1;
bit_rs2 <= rs2;
flx_rs1 <= 5'd0;
flx_rs2 <= 5'd0;
end else begin
if (rs1 != bit_rs1) begin
flx_rs1 <= bit_cnt;
bit_rs1 <= rs1;
end
if (rs2 != bit_rs2) begin
flx_rs2 <= bit_cnt;
bit_rs2 <= rs2;
end
end // else: !if(cnt0)
end
end

`ifdef RISCV_FORMAL
reg [31:0] pc = RESET_PC;
Expand Down

0 comments on commit df9408a

Please sign in to comment.