1
0
mirror of https://github.com/olofk/serv.git synced 2026-02-28 00:55:41 +00:00

wip: Add debug instruction counting

This commit is contained in:
Olof Kindgren
2024-02-02 15:50:57 +01:00
parent 40a9e99f77
commit df9408a9a8

View File

@@ -574,6 +574,210 @@ module serv_top
end
endgenerate
reg [4:0] flx_rs1;
reg [4:0] flx_rs2;
reg [4:0] flx_imm = 12; //Do we want to check immediate as well?
reg bit_rs1;
reg bit_rs2;
reg [4:0] bit_cnt = 5'd0;
reg [2:0] funct3;
reg [4:0] opcode;
reg [4:0] min_len;
integer cycles = 0;
integer saved = 0;
integer saved_cycles1 = 0;
integer saved_cycles16 = 0;
integer saved_cycles_min_12 = 0;
function automatic [4:0] maxof (input [4:0] a, b);
begin
maxof = (a>b) ? a : b;
end
endfunction
function automatic integer maxof3 (input [4:0] a, b, c);
reg [4:0] tmp;
begin
tmp = maxof(a, b);
maxof3 = {27'd0,maxof(tmp, c)};
end
endfunction
reg LUI, AUIPC, JAL, JALR, BEQ, BNE, BLT, BGE, BLTU, BGEU, LB, LH, LW, LBU, LHU, SB, SH, SW, ADDI, SLTI, SLTIU, XORI, ORI, ANDI,SLLI, SRLI, SRAI, ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND, FENCE, ECALL, EBREAK;
reg CSRRW, CSRRS, CSRRC, CSRRWI, CSRRSI, CSRRCI;
reg OTHER;
real savings1 = (saved_cycles1*1.0) / (cycles*1.0);
real savings16 = (saved_cycles16*1.0) / (cycles*1.0);
real savings_min_12 = (saved_cycles_min_12*1.0) / (cycles*1.0);
always @(posedge clk) begin
if (i_ibus_ack) begin
LUI <= 1'b0;
AUIPC <= 1'b0;
JAL <= 1'b0;
JALR <= 1'b0;
BEQ <= 1'b0;
BNE <= 1'b0;
BLT <= 1'b0;
BGE <= 1'b0;
BLTU <= 1'b0;
BGEU <= 1'b0;
LB <= 1'b0;
LH <= 1'b0;
LW <= 1'b0;
LBU <= 1'b0;
LHU <= 1'b0;
SB <= 1'b0;
SH <= 1'b0;
SW <= 1'b0;
ADDI <= 1'b0;
SLTI <= 1'b0;
SLTIU <= 1'b0;
XORI <= 1'b0;
ORI <= 1'b0;
ANDI <= 1'b0;
SLLI <= 1'b0;
SRLI <= 1'b0;
SRAI <= 1'b0;
ADD <= 1'b0;
SUB <= 1'b0;
SLL <= 1'b0;
SLT <= 1'b0;
SLTU <= 1'b0;
XOR <= 1'b0;
SRL <= 1'b0;
SRA <= 1'b0;
OR <= 1'b0;
AND <= 1'b0;
FENCE <= 1'b0;
ECALL <= 1'b0;
EBREAK <= 1'b0;
CSRRW <= 1'b0;
CSRRS <= 1'b0;
CSRRC <= 1'b0;
CSRRWI <= 1'b0;
CSRRSI <= 1'b0;
CSRRCI <= 1'b0;
OTHER <= 1'b0;
casez(i_ibus_rdt)
// 3322222_22222 11111_111 11
// 1098765_43210 98765_432 10987_65432_10
32'b???????_?????_?????_???_?????_01101_11 : LUI <= 1'b1;
32'b???????_?????_?????_???_?????_00101_11 : AUIPC <= 1'b1;
32'b???????_?????_?????_???_?????_11011_11 : JAL <= 1'b1;
32'b???????_?????_?????_000_?????_11001_11 : JALR <= 1'b1;
32'b???????_?????_?????_000_?????_11000_11 : BEQ <= 1'b1;
32'b???????_?????_?????_001_?????_11000_11 : BNE <= 1'b1;
32'b???????_?????_?????_100_?????_11000_11 : BLT <= 1'b1;
32'b???????_?????_?????_101_?????_11000_11 : BGE <= 1'b1;
32'b???????_?????_?????_110_?????_11000_11 : BLTU <= 1'b1;
32'b???????_?????_?????_111_?????_11000_11 : BGEU <= 1'b1;
32'b???????_?????_?????_000_?????_00000_11 : LB <= 1'b1;
32'b???????_?????_?????_001_?????_00000_11 : LH <= 1'b1;
32'b???????_?????_?????_010_?????_00000_11 : LW <= 1'b1;
32'b???????_?????_?????_100_?????_00000_11 : LBU <= 1'b1;
32'b???????_?????_?????_101_?????_00000_11 : LHU <= 1'b1;
32'b???????_?????_?????_000_?????_01000_11 : SB <= 1'b1;
32'b???????_?????_?????_001_?????_01000_11 : SH <= 1'b1;
32'b???????_?????_?????_010_?????_01000_11 : SW <= 1'b1;
32'b???????_?????_?????_000_?????_00100_11 : ADDI <= 1'b1;
32'b???????_?????_?????_010_?????_00100_11 : SLTI <= 1'b1;
32'b???????_?????_?????_011_?????_00100_11 : SLTIU <= 1'b1;
32'b???????_?????_?????_100_?????_00100_11 : XORI <= 1'b1;
32'b???????_?????_?????_110_?????_00100_11 : ORI <= 1'b1;
32'b???????_?????_?????_111_?????_00100_11 : ANDI <= 1'b1;
32'b0000000_?????_?????_001_?????_00100_11 : SLLI <= 1'b1;
32'b0000000_?????_?????_101_?????_00100_11 : SRLI <= 1'b1;
32'b0100000_?????_?????_101_?????_00100_11 : SRAI <= 1'b1;
32'b0000000_?????_?????_000_?????_01100_11 : ADD <= 1'b1;
32'b0100000_?????_?????_000_?????_01100_11 : SUB <= 1'b1;
32'b0000000_?????_?????_001_?????_01100_11 : SLL <= 1'b1;
32'b0000000_?????_?????_010_?????_01100_11 : SLT <= 1'b1;
32'b0000000_?????_?????_011_?????_01100_11 : SLTU <= 1'b1;
32'b???????_?????_?????_100_?????_01100_11 : XOR <= 1'b1;
32'b0000000_?????_?????_101_?????_01100_11 : SRL <= 1'b1;
32'b0100000_?????_?????_101_?????_01100_11 : SRA <= 1'b1;
32'b???????_?????_?????_110_?????_01100_11 : OR <= 1'b1;
32'b???????_?????_?????_111_?????_01100_11 : AND <= 1'b1;
32'b???????_?????_?????_000_?????_00011_11 : FENCE <= 1'b1;
32'b0000000_00000_00000_000_00000_11100_11 : ECALL <= 1'b1;
32'b0000000_00001_00000_000_00000_11100_11 : EBREAK <= 1'b1;
32'b???????_?????_?????_001_?????_11100_11 : CSRRW <= 1'b1;
32'b???????_?????_?????_010_?????_11100_11 : CSRRS <= 1'b1;
32'b???????_?????_?????_011_?????_11100_11 : CSRRC <= 1'b1;
32'b???????_?????_?????_101_?????_11100_11 : CSRRWI <= 1'b1;
32'b???????_?????_?????_110_?????_11100_11 : CSRRSI <= 1'b1;
32'b???????_?????_?????_111_?????_11100_11 : CSRRCI <= 1'b1;
default : OTHER <= 1'b1;
endcase
end
if (cnt_done & ctrl_pc_en) begin
if (LUI) saved=0; //LUI and AUIPC Operates on bits 31:12 and
else if (AUIPC) saved=0; //would need a separate 20-bit MSB detector
//JAL, JALR, B* : Ignoring branch ops for now, but optimizing especially BNE/BEQ
//could be very beneficial
else if (LB) saved = 24; //Can likely optimize data readout. Maybe also
else if (LH) saved = 16; //speed up address calculation, but not sure
else if (LW) saved = 0; //
else if (LBU) saved = 24; //Can likely optimize data readout. Maybe also
else if (LHU) saved = 16; //speed up address calculation, but not sure
//SB, SH, SW Not sure how much work to improve SB/SH
else if (ADDI) saved = 31-maxof3(flx_rs1, 0, flx_imm); // +1 for carry?
else if (SLTI) saved = 31; //SLT* only needs to write one bit to RD. Rest is
else if (SLTIU) saved = 31; //zero. Maybe possible to speed up comparison too?
else if (XORI) saved = 31-maxof3(flx_rs1, 0, flx_imm);
else if (ORI) saved = 31-maxof3(flx_rs1, 0, flx_imm);
else if (ANDI) saved = 31-maxof3(flx_rs1, 0, flx_imm);
//SLLI, SRLI, SRAI: Not sure what can be done here. Probably too much work
else if (ADD) saved = 31-maxof3(flx_rs1, flx_rs2, 0); // +1 for carry?
else if (SUB) saved = 31-maxof3(flx_rs1, flx_rs2, 0); // +1 for carry?
//SLL: Not sure what can be done here. Probably too much work
else if (SLT) saved = 31; //SLT* only needs to write one bit to RD. Rest is
else if (SLTU) saved = 31; //zero. Maybe possible to speed up comparison too?
else if (XOR) saved = 31-maxof3(flx_rs1, flx_rs2, 0);
//SRL, SRA: Not sure what can be done here. Probably too much work
else if (OR) saved = 31-maxof3(flx_rs1, flx_rs2, 0);
else if (AND) saved = 31-maxof3(flx_rs1, flx_rs2, 0);
else if (FENCE) saved = 32; //This is a nop in SERV
else if (ECALL) saved = 0; //Not worth optimizing?
else if (EBREAK) saved = 0; //Not worth optimizing?
else saved = 0;
//Minimum 12 cycles
saved_cycles_min_12 <= saved_cycles_min_12 + ((saved>20) ? 20 : saved);
//Pretending we have 16-bit granularity
saved_cycles16 <= saved_cycles16 + ((saved>15) ? 16 : 0);
saved_cycles1 <= saved_cycles1 + saved;
end
cycles <= cycles + 1;
if (cnt_en) begin
bit_cnt <= bit_cnt + 5'd1;
if (cnt0) begin
bit_rs1 <= rs1;
bit_rs2 <= rs2;
flx_rs1 <= 5'd0;
flx_rs2 <= 5'd0;
end else begin
if (rs1 != bit_rs1) begin
flx_rs1 <= bit_cnt;
bit_rs1 <= rs1;
end
if (rs2 != bit_rs2) begin
flx_rs2 <= bit_cnt;
bit_rs2 <= rs2;
end
end // else: !if(cnt0)
end
end
`ifdef RISCV_FORMAL
reg [31:0] pc = RESET_PC;