mirror of
https://github.com/olofk/serv.git
synced 2026-02-28 00:55:41 +00:00
wip: Add debug instruction counting
This commit is contained in:
204
rtl/serv_top.v
204
rtl/serv_top.v
@@ -574,6 +574,210 @@ module serv_top
|
||||
end
|
||||
endgenerate
|
||||
|
||||
reg [4:0] flx_rs1;
|
||||
reg [4:0] flx_rs2;
|
||||
reg [4:0] flx_imm = 12; //Do we want to check immediate as well?
|
||||
reg bit_rs1;
|
||||
reg bit_rs2;
|
||||
reg [4:0] bit_cnt = 5'd0;
|
||||
|
||||
reg [2:0] funct3;
|
||||
reg [4:0] opcode;
|
||||
reg [4:0] min_len;
|
||||
|
||||
integer cycles = 0;
|
||||
integer saved = 0;
|
||||
integer saved_cycles1 = 0;
|
||||
integer saved_cycles16 = 0;
|
||||
integer saved_cycles_min_12 = 0;
|
||||
|
||||
function automatic [4:0] maxof (input [4:0] a, b);
|
||||
begin
|
||||
maxof = (a>b) ? a : b;
|
||||
end
|
||||
endfunction
|
||||
|
||||
function automatic integer maxof3 (input [4:0] a, b, c);
|
||||
reg [4:0] tmp;
|
||||
begin
|
||||
tmp = maxof(a, b);
|
||||
maxof3 = {27'd0,maxof(tmp, c)};
|
||||
end
|
||||
endfunction
|
||||
reg LUI, AUIPC, JAL, JALR, BEQ, BNE, BLT, BGE, BLTU, BGEU, LB, LH, LW, LBU, LHU, SB, SH, SW, ADDI, SLTI, SLTIU, XORI, ORI, ANDI,SLLI, SRLI, SRAI, ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND, FENCE, ECALL, EBREAK;
|
||||
reg CSRRW, CSRRS, CSRRC, CSRRWI, CSRRSI, CSRRCI;
|
||||
reg OTHER;
|
||||
|
||||
real savings1 = (saved_cycles1*1.0) / (cycles*1.0);
|
||||
real savings16 = (saved_cycles16*1.0) / (cycles*1.0);
|
||||
real savings_min_12 = (saved_cycles_min_12*1.0) / (cycles*1.0);
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
||||
if (i_ibus_ack) begin
|
||||
LUI <= 1'b0;
|
||||
AUIPC <= 1'b0;
|
||||
JAL <= 1'b0;
|
||||
JALR <= 1'b0;
|
||||
BEQ <= 1'b0;
|
||||
BNE <= 1'b0;
|
||||
BLT <= 1'b0;
|
||||
BGE <= 1'b0;
|
||||
BLTU <= 1'b0;
|
||||
BGEU <= 1'b0;
|
||||
LB <= 1'b0;
|
||||
LH <= 1'b0;
|
||||
LW <= 1'b0;
|
||||
LBU <= 1'b0;
|
||||
LHU <= 1'b0;
|
||||
SB <= 1'b0;
|
||||
SH <= 1'b0;
|
||||
SW <= 1'b0;
|
||||
ADDI <= 1'b0;
|
||||
SLTI <= 1'b0;
|
||||
SLTIU <= 1'b0;
|
||||
XORI <= 1'b0;
|
||||
ORI <= 1'b0;
|
||||
ANDI <= 1'b0;
|
||||
SLLI <= 1'b0;
|
||||
SRLI <= 1'b0;
|
||||
SRAI <= 1'b0;
|
||||
ADD <= 1'b0;
|
||||
SUB <= 1'b0;
|
||||
SLL <= 1'b0;
|
||||
SLT <= 1'b0;
|
||||
SLTU <= 1'b0;
|
||||
XOR <= 1'b0;
|
||||
SRL <= 1'b0;
|
||||
SRA <= 1'b0;
|
||||
OR <= 1'b0;
|
||||
AND <= 1'b0;
|
||||
FENCE <= 1'b0;
|
||||
ECALL <= 1'b0;
|
||||
EBREAK <= 1'b0;
|
||||
CSRRW <= 1'b0;
|
||||
CSRRS <= 1'b0;
|
||||
CSRRC <= 1'b0;
|
||||
CSRRWI <= 1'b0;
|
||||
CSRRSI <= 1'b0;
|
||||
CSRRCI <= 1'b0;
|
||||
OTHER <= 1'b0;
|
||||
|
||||
casez(i_ibus_rdt)
|
||||
// 3322222_22222 11111_111 11
|
||||
// 1098765_43210 98765_432 10987_65432_10
|
||||
32'b???????_?????_?????_???_?????_01101_11 : LUI <= 1'b1;
|
||||
32'b???????_?????_?????_???_?????_00101_11 : AUIPC <= 1'b1;
|
||||
32'b???????_?????_?????_???_?????_11011_11 : JAL <= 1'b1;
|
||||
32'b???????_?????_?????_000_?????_11001_11 : JALR <= 1'b1;
|
||||
32'b???????_?????_?????_000_?????_11000_11 : BEQ <= 1'b1;
|
||||
32'b???????_?????_?????_001_?????_11000_11 : BNE <= 1'b1;
|
||||
32'b???????_?????_?????_100_?????_11000_11 : BLT <= 1'b1;
|
||||
32'b???????_?????_?????_101_?????_11000_11 : BGE <= 1'b1;
|
||||
32'b???????_?????_?????_110_?????_11000_11 : BLTU <= 1'b1;
|
||||
32'b???????_?????_?????_111_?????_11000_11 : BGEU <= 1'b1;
|
||||
32'b???????_?????_?????_000_?????_00000_11 : LB <= 1'b1;
|
||||
32'b???????_?????_?????_001_?????_00000_11 : LH <= 1'b1;
|
||||
32'b???????_?????_?????_010_?????_00000_11 : LW <= 1'b1;
|
||||
32'b???????_?????_?????_100_?????_00000_11 : LBU <= 1'b1;
|
||||
32'b???????_?????_?????_101_?????_00000_11 : LHU <= 1'b1;
|
||||
32'b???????_?????_?????_000_?????_01000_11 : SB <= 1'b1;
|
||||
32'b???????_?????_?????_001_?????_01000_11 : SH <= 1'b1;
|
||||
32'b???????_?????_?????_010_?????_01000_11 : SW <= 1'b1;
|
||||
32'b???????_?????_?????_000_?????_00100_11 : ADDI <= 1'b1;
|
||||
32'b???????_?????_?????_010_?????_00100_11 : SLTI <= 1'b1;
|
||||
32'b???????_?????_?????_011_?????_00100_11 : SLTIU <= 1'b1;
|
||||
32'b???????_?????_?????_100_?????_00100_11 : XORI <= 1'b1;
|
||||
32'b???????_?????_?????_110_?????_00100_11 : ORI <= 1'b1;
|
||||
32'b???????_?????_?????_111_?????_00100_11 : ANDI <= 1'b1;
|
||||
32'b0000000_?????_?????_001_?????_00100_11 : SLLI <= 1'b1;
|
||||
32'b0000000_?????_?????_101_?????_00100_11 : SRLI <= 1'b1;
|
||||
32'b0100000_?????_?????_101_?????_00100_11 : SRAI <= 1'b1;
|
||||
32'b0000000_?????_?????_000_?????_01100_11 : ADD <= 1'b1;
|
||||
32'b0100000_?????_?????_000_?????_01100_11 : SUB <= 1'b1;
|
||||
32'b0000000_?????_?????_001_?????_01100_11 : SLL <= 1'b1;
|
||||
32'b0000000_?????_?????_010_?????_01100_11 : SLT <= 1'b1;
|
||||
32'b0000000_?????_?????_011_?????_01100_11 : SLTU <= 1'b1;
|
||||
32'b???????_?????_?????_100_?????_01100_11 : XOR <= 1'b1;
|
||||
32'b0000000_?????_?????_101_?????_01100_11 : SRL <= 1'b1;
|
||||
32'b0100000_?????_?????_101_?????_01100_11 : SRA <= 1'b1;
|
||||
32'b???????_?????_?????_110_?????_01100_11 : OR <= 1'b1;
|
||||
32'b???????_?????_?????_111_?????_01100_11 : AND <= 1'b1;
|
||||
32'b???????_?????_?????_000_?????_00011_11 : FENCE <= 1'b1;
|
||||
32'b0000000_00000_00000_000_00000_11100_11 : ECALL <= 1'b1;
|
||||
32'b0000000_00001_00000_000_00000_11100_11 : EBREAK <= 1'b1;
|
||||
32'b???????_?????_?????_001_?????_11100_11 : CSRRW <= 1'b1;
|
||||
32'b???????_?????_?????_010_?????_11100_11 : CSRRS <= 1'b1;
|
||||
32'b???????_?????_?????_011_?????_11100_11 : CSRRC <= 1'b1;
|
||||
32'b???????_?????_?????_101_?????_11100_11 : CSRRWI <= 1'b1;
|
||||
32'b???????_?????_?????_110_?????_11100_11 : CSRRSI <= 1'b1;
|
||||
32'b???????_?????_?????_111_?????_11100_11 : CSRRCI <= 1'b1;
|
||||
default : OTHER <= 1'b1;
|
||||
endcase
|
||||
end
|
||||
|
||||
if (cnt_done & ctrl_pc_en) begin
|
||||
if (LUI) saved=0; //LUI and AUIPC Operates on bits 31:12 and
|
||||
else if (AUIPC) saved=0; //would need a separate 20-bit MSB detector
|
||||
//JAL, JALR, B* : Ignoring branch ops for now, but optimizing especially BNE/BEQ
|
||||
//could be very beneficial
|
||||
|
||||
else if (LB) saved = 24; //Can likely optimize data readout. Maybe also
|
||||
else if (LH) saved = 16; //speed up address calculation, but not sure
|
||||
else if (LW) saved = 0; //
|
||||
else if (LBU) saved = 24; //Can likely optimize data readout. Maybe also
|
||||
else if (LHU) saved = 16; //speed up address calculation, but not sure
|
||||
//SB, SH, SW Not sure how much work to improve SB/SH
|
||||
else if (ADDI) saved = 31-maxof3(flx_rs1, 0, flx_imm); // +1 for carry?
|
||||
else if (SLTI) saved = 31; //SLT* only needs to write one bit to RD. Rest is
|
||||
else if (SLTIU) saved = 31; //zero. Maybe possible to speed up comparison too?
|
||||
else if (XORI) saved = 31-maxof3(flx_rs1, 0, flx_imm);
|
||||
else if (ORI) saved = 31-maxof3(flx_rs1, 0, flx_imm);
|
||||
else if (ANDI) saved = 31-maxof3(flx_rs1, 0, flx_imm);
|
||||
//SLLI, SRLI, SRAI: Not sure what can be done here. Probably too much work
|
||||
else if (ADD) saved = 31-maxof3(flx_rs1, flx_rs2, 0); // +1 for carry?
|
||||
else if (SUB) saved = 31-maxof3(flx_rs1, flx_rs2, 0); // +1 for carry?
|
||||
//SLL: Not sure what can be done here. Probably too much work
|
||||
else if (SLT) saved = 31; //SLT* only needs to write one bit to RD. Rest is
|
||||
else if (SLTU) saved = 31; //zero. Maybe possible to speed up comparison too?
|
||||
else if (XOR) saved = 31-maxof3(flx_rs1, flx_rs2, 0);
|
||||
//SRL, SRA: Not sure what can be done here. Probably too much work
|
||||
else if (OR) saved = 31-maxof3(flx_rs1, flx_rs2, 0);
|
||||
else if (AND) saved = 31-maxof3(flx_rs1, flx_rs2, 0);
|
||||
else if (FENCE) saved = 32; //This is a nop in SERV
|
||||
else if (ECALL) saved = 0; //Not worth optimizing?
|
||||
else if (EBREAK) saved = 0; //Not worth optimizing?
|
||||
else saved = 0;
|
||||
|
||||
//Minimum 12 cycles
|
||||
saved_cycles_min_12 <= saved_cycles_min_12 + ((saved>20) ? 20 : saved);
|
||||
|
||||
//Pretending we have 16-bit granularity
|
||||
saved_cycles16 <= saved_cycles16 + ((saved>15) ? 16 : 0);
|
||||
|
||||
saved_cycles1 <= saved_cycles1 + saved;
|
||||
end
|
||||
|
||||
cycles <= cycles + 1;
|
||||
if (cnt_en) begin
|
||||
bit_cnt <= bit_cnt + 5'd1;
|
||||
if (cnt0) begin
|
||||
bit_rs1 <= rs1;
|
||||
bit_rs2 <= rs2;
|
||||
flx_rs1 <= 5'd0;
|
||||
flx_rs2 <= 5'd0;
|
||||
end else begin
|
||||
if (rs1 != bit_rs1) begin
|
||||
flx_rs1 <= bit_cnt;
|
||||
bit_rs1 <= rs1;
|
||||
end
|
||||
if (rs2 != bit_rs2) begin
|
||||
flx_rs2 <= bit_cnt;
|
||||
bit_rs2 <= rs2;
|
||||
end
|
||||
end // else: !if(cnt0)
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef RISCV_FORMAL
|
||||
reg [31:0] pc = RESET_PC;
|
||||
|
||||
Reference in New Issue
Block a user