1
0
mirror of https://github.com/mist-devel/mist-board.git synced 2026-01-23 10:38:31 +00:00

STEroids fixed and accelerated

This commit is contained in:
harbaum 2014-05-09 19:04:42 +00:00
parent 707c606173
commit 7edbc00f3c
5 changed files with 142 additions and 134 deletions

View File

@ -26,10 +26,11 @@ module cache (
input reset,
input flush,
input strobe,
input [22:0] addr, // cpu word address
input [1:0] ds, // upper (0) and lower (1) data strobe
output reg [15:0] dout,
output [15:0] dout,
output hit,
// interface to store entire cache lines when read from ram
@ -64,7 +65,7 @@ localparam ALLZERO = 64'd0; // 2 ** BITS zero bits
// L = cache line
// W = 16 bit word select
wire [21-BITS-1:0] tag = addr[22:2+BITS];
reg [BITS-1:0] line;
wire [BITS-1:0] line = addr[2+BITS-1:2];
/* ------------------------------------------------------------------------------ */
/* --------------------------------- cache memory ------------------------------- */
@ -78,7 +79,7 @@ reg [31:24] data_latch_3 [ENTRIES-1:0];
reg [23:16] data_latch_2 [ENTRIES-1:0];
reg [15: 8] data_latch_1 [ENTRIES-1:0];
reg [ 7: 0] data_latch_0 [ENTRIES-1:0];
reg [21-BITS-1:0] tag_latch [ENTRIES-1:0];
reg [ENTRIES-1:0] valid;
@ -89,60 +90,72 @@ reg [21-BITS-1:0] current_tag;
// assign hit = valid[line] && (tag_latch[line] == tag);
assign hit = valid[line] && (current_tag == tag);
reg [15:0] dout_latch_0;
reg [15:0] dout_latch_1;
reg [15:0] dout_latch_2;
reg [15:0] dout_latch_3;
// permanently output data according to current line
// de-multiplex 64 bit data into word requested by cpu
assign dout = (addr[1:0] == 0)?dout_latch_0:
(addr[1:0] == 1)?dout_latch_1:
(addr[1:0] == 2)?dout_latch_2:
dout_latch_3;
always @(posedge clk_128) begin
dout <= (addr[1:0] == 2'd0)?{data_latch_1[line], data_latch_0[line]}:
(addr[1:0] == 2'd1)?{data_latch_3[line], data_latch_2[line]}:
(addr[1:0] == 2'd2)?{data_latch_5[line], data_latch_4[line]}:
{data_latch_7[line], data_latch_6[line]};
dout_latch_0 <= {data_latch_1[line], data_latch_0[line]};
dout_latch_1 <= {data_latch_3[line], data_latch_2[line]};
dout_latch_2 <= {data_latch_5[line], data_latch_4[line]};
dout_latch_3 <= {data_latch_7[line], data_latch_6[line]};
current_tag <= tag_latch[line];
end
always @(negedge clk_128)
line <= addr[2+BITS-1:2];
always @(posedge clk_128) begin
if(reset || flush) begin
valid <= ALLZERO;
end else begin
// store indicates that a whole cache line is to be stored
if(store) begin
data_latch_7[line] <= din64[63:56];
data_latch_6[line] <= din64[55:48];
data_latch_5[line] <= din64[47:40];
data_latch_4[line] <= din64[39:32];
data_latch_3[line] <= din64[31:24];
data_latch_2[line] <= din64[23:16];
data_latch_1[line] <= din64[15: 8];
data_latch_0[line] <= din64[ 7: 0];
// the store and update signals are valid in the last cycle only. The cpu runs
// at 32MHz and is valid if t=14,15,0,1
if(t==15) begin
// store indicates that a whole cache line is to be stored
if(store) begin
data_latch_7[line] <= din64[63:56];
data_latch_6[line] <= din64[55:48];
data_latch_5[line] <= din64[47:40];
data_latch_4[line] <= din64[39:32];
data_latch_3[line] <= din64[31:24];
data_latch_2[line] <= din64[23:16];
data_latch_1[line] <= din64[15: 8];
data_latch_0[line] <= din64[ 7: 0];
tag_latch[line] <= tag;
valid[line] <= 1'b1;
end
tag_latch[line] <= tag;
valid[line] <= 1'b1;
end
// cpu (or other bus master!) writes to ram, so update cache contents if necessary
else if(update && hit) begin
// no need to care for "tag_latch" or "valid" as they simply stay the same
// cpu (or other bus master!) writes to ram, so update cache contents if necessary
else if(update && hit) begin
// no need to care for "tag_latch" or "valid" as they simply stay the same
if(addr[1:0] == 2'd0) begin
if(ds[1]) data_latch_0[line] <= din16[7:0];
if(ds[0]) data_latch_1[line] <= din16[15:8];
end
if(addr[1:0] == 2'd0) begin
if(ds[1]) data_latch_0[line] <= din16[7:0];
if(ds[0]) data_latch_1[line] <= din16[15:8];
end
if(addr[1:0] == 2'd1) begin
if(ds[1]) data_latch_2[line] <= din16[7:0];
if(ds[0]) data_latch_3[line] <= din16[15:8];
end
if(addr[1:0] == 2'd1) begin
if(ds[1]) data_latch_2[line] <= din16[7:0];
if(ds[0]) data_latch_3[line] <= din16[15:8];
end
if(addr[1:0] == 2'd2) begin
if(ds[1]) data_latch_4[line] <= din16[7:0];
if(ds[0]) data_latch_5[line] <= din16[15:8];
end
if(addr[1:0] == 2'd2) begin
if(ds[1]) data_latch_4[line] <= din16[7:0];
if(ds[0]) data_latch_5[line] <= din16[15:8];
end
if(addr[1:0] == 2'd3) begin
if(ds[1]) data_latch_6[line] <= din16[7:0];
if(ds[0]) data_latch_7[line] <= din16[15:8];
if(addr[1:0] == 2'd3) begin
if(ds[1]) data_latch_6[line] <= din16[7:0];
if(ds[0]) data_latch_7[line] <= din16[15:8];
end
end
end
end

View File

@ -14,11 +14,11 @@
// ************************************************************
// THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE!
//
// 13.1.0 Build 162 10/23/2013 SJ Web Edition
// 13.1.4 Build 182 03/12/2014 SJ Web Edition
// ************************************************************
//Copyright (C) 1991-2013 Altera Corporation
//Copyright (C) 1991-2014 Altera Corporation
//Your use of Altera Corporation's design tools, logic functions
//and other software and tools, and its AMPP partner logic
//functions, and any output files from any of the foregoing
@ -126,10 +126,10 @@ module clock (
altpll_component.clk2_divide_by = 18,
altpll_component.clk2_duty_cycle = 50,
altpll_component.clk2_multiply_by = 85,
altpll_component.clk2_phase_shift = "-1500",
altpll_component.clk3_divide_by = 5625,
altpll_component.clk2_phase_shift = "-2500",
altpll_component.clk3_divide_by = 27000000,
altpll_component.clk3_duty_cycle = 50,
altpll_component.clk3_multiply_by = 512,
altpll_component.clk3_multiply_by = 2457599,
altpll_component.clk3_phase_shift = "0",
altpll_component.compensate_clock = "CLK0",
altpll_component.inclk0_input_frequency = 37037,
@ -265,7 +265,7 @@ endmodule
// Retrieval info: PRIVATE: PHASE_RECONFIG_INPUTS_CHECK STRING "0"
// Retrieval info: PRIVATE: PHASE_SHIFT0 STRING "0.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT1 STRING "0.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT2 STRING "-1500.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT2 STRING "-2500.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT3 STRING "0.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT_STEP_ENABLED_CHECK STRING "0"
// Retrieval info: PRIVATE: PHASE_SHIFT_UNIT0 STRING "deg"
@ -323,10 +323,10 @@ endmodule
// Retrieval info: CONSTANT: CLK2_DIVIDE_BY NUMERIC "18"
// Retrieval info: CONSTANT: CLK2_DUTY_CYCLE NUMERIC "50"
// Retrieval info: CONSTANT: CLK2_MULTIPLY_BY NUMERIC "85"
// Retrieval info: CONSTANT: CLK2_PHASE_SHIFT STRING "-1500"
// Retrieval info: CONSTANT: CLK3_DIVIDE_BY NUMERIC "5625"
// Retrieval info: CONSTANT: CLK2_PHASE_SHIFT STRING "-2500"
// Retrieval info: CONSTANT: CLK3_DIVIDE_BY NUMERIC "27000000"
// Retrieval info: CONSTANT: CLK3_DUTY_CYCLE NUMERIC "50"
// Retrieval info: CONSTANT: CLK3_MULTIPLY_BY NUMERIC "512"
// Retrieval info: CONSTANT: CLK3_MULTIPLY_BY NUMERIC "2457599"
// Retrieval info: CONSTANT: CLK3_PHASE_SHIFT STRING "0"
// Retrieval info: CONSTANT: COMPENSATE_CLOCK STRING "CLK0"
// Retrieval info: CONSTANT: INCLK0_INPUT_FREQUENCY NUMERIC "37037"

View File

@ -14,10 +14,10 @@
// ************************************************************
// THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE!
//
// 13.1.0 Build 162 10/23/2013 SJ Web Edition
// 13.1.4 Build 182 03/12/2014 SJ Web Edition
// ************************************************************
//Copyright (C) 1991-2013 Altera Corporation
//Copyright (C) 1991-2014 Altera Corporation
//Your use of Altera Corporation's design tools, logic functions
//and other software and tools, and its AMPP partner logic
//functions, and any output files from any of the foregoing
@ -137,7 +137,7 @@ endmodule
// Retrieval info: PRIVATE: PHASE_RECONFIG_INPUTS_CHECK STRING "0"
// Retrieval info: PRIVATE: PHASE_SHIFT0 STRING "0.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT1 STRING "0.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT2 STRING "-1000.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT2 STRING "-2500.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT3 STRING "0.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT_STEP_ENABLED_CHECK STRING "0"
// Retrieval info: PRIVATE: PHASE_SHIFT_UNIT0 STRING "deg"
@ -195,7 +195,7 @@ endmodule
// Retrieval info: CONSTANT: CLK2_DIVIDE_BY NUMERIC "18"
// Retrieval info: CONSTANT: CLK2_DUTY_CYCLE NUMERIC "50"
// Retrieval info: CONSTANT: CLK2_MULTIPLY_BY NUMERIC "85"
// Retrieval info: CONSTANT: CLK2_PHASE_SHIFT STRING "-1000"
// Retrieval info: CONSTANT: CLK2_PHASE_SHIFT STRING "-2500"
// Retrieval info: CONSTANT: CLK3_DIVIDE_BY NUMERIC "27000000"
// Retrieval info: CONSTANT: CLK3_DUTY_CYCLE NUMERIC "50"
// Retrieval info: CONSTANT: CLK3_MULTIPLY_BY NUMERIC "2457599"

View File

@ -77,7 +77,7 @@ wire tg68_berr = (dtack_timeout == 4'd15);
reg [3:0] berr_cnt_out /* synthesis noprune */;
reg [3:0] berr_cnt;
reg berrD;
always @(posedge clk_8) begin
always @(negedge clk_8) begin
berrD <= tg68_berr;
if(reset) begin
@ -94,14 +94,23 @@ end
reg bus_ok, cpu_cycle_L;
always @(negedge clk_8) begin
// bus error if cpu owns bus, but no dtack, nor ram access,
// nor fast cpu cycle
bus_ok <= tg68_dtack || br || cpu2mem || cpu_fast_cycle;
// nor fast cpu cycle nor cpu does internsal processing
bus_ok <= tg68_dtack || br || cpu2mem || cpu_fast_cycle || (tg68_busstate == 2'b01);
cpu_cycle_L <= cpu_cycle;
end
reg berr_reset;
always @(negedge clk_32) begin
if(reset)
berr_reset <= 1'b1;
else if(clkenaD)
berr_reset <= tg68_clr_berr;
end
reg [3:0] dtack_timeout;
always @(posedge clk_8) begin
if(reset || tg68_clr_berr) begin
always @(posedge clk_32 or posedge berr_reset) begin
// if(reset || tg68_clr_berr) begin
if(berr_reset) begin
dtack_timeout <= 4'd0;
end else begin
if(cpu_cycle_L) begin
@ -113,7 +122,7 @@ always @(posedge clk_8) begin
if(dtack_timeout != 4'd15) begin
if(bus_ok)
dtack_timeout <= 4'd0;
else
else if(clkcnt == 3) // increase timout at the end of the cpu cycle
dtack_timeout <= dtack_timeout + 4'd1;
end
end
@ -464,7 +473,7 @@ ste_joystick ste_joystick (
.uds (tg68_uds ),
.lds (tg68_lds ),
.rw (tg68_rw ),
.dout (ste_joy_data_out),
.dout (ste_joy_data_out)
);
ethernec ethernec (
@ -764,12 +773,7 @@ end
/* -------------------------------------------------------------------------- */
/* ------------------------------ TG68 CPU interface ---------------------- */
/* -------------------------------------------------------------------------- */
// the 128 Mhz cpu clock is gated by clkena. Since the CPU cannot run at full 128MHz
// speed a certain amount of idle cycles have to be inserted between two subsequent
// cpu clocks. This idle time is implemented using the cpu_throttle counter.
reg [3:0] cpu_throttle;
reg clkena;
reg iCacheStore;
reg dCacheStore;
@ -793,43 +797,48 @@ reg cpu_fast_cycle; // signal indicating that the cpu runs from cache,
wire tg68_reset;
wire peripheral_reset = reset || !tg68_reset;
// the CPU throttle counter limits the CPU speed to a rate the tg68 core can
// handle. With a throttle of "4" the core will run effectively at 32MHz which
// is equivalent to ~64MHz on a real 68000. This speed will never be achieved
// since memory and peripheral access slows the cpu further
localparam CPU_THROTTLE = 4'd6;
reg [3:0] clkcnt;
reg trigger /* synthesis noprune */;
reg panic /* synthesis noprune */;
// 32MHz counter running synchronous to the 8Mhz clock. This is used to
// synchronize the 32MHz cpu to the 8MHz system bus
reg [1:0] clkcnt;
always @(posedge clk_32) begin
// count 0..3 within a 8MHz cycle
if(((clkcnt == 3) && ( clk_8 == 0)) ||
((clkcnt == 0) && ( clk_8 == 1)) ||
((clkcnt != 3) && (clkcnt != 0)))
clkcnt <= clkcnt + 2'd1;
end
always @(posedge clk_128) begin
// count 0..15 within a 8MHz cycle
if(((clkcnt == 15) && ( clk_8 == 0)) ||
((clkcnt == 0) && ( clk_8 == 1)) ||
((clkcnt != 15) && (clkcnt != 0)))
clkcnt <= clkcnt + 4'd1;
// generate signal indicating the CPU may run from cache (cpu is active,
// performs a read and the caches are able to provide the requested data)
wire cacheReady = !br && steroids && (tg68_busstate[0] == 1'b0) && cache_hit;
// a clkena delayed by 180 deg is being used to
// read from the cache
reg clkenaD;
always @(posedge clk_32)
clkenaD <= clkena;
// the TG68 core works on the rising clock edge. We thus prepare everything
// on the falling clock edge
always @(negedge clk_32) begin
// default: cpu does not run
clkena <= 1'b0;
iCacheStore <= 1'b0;
dCacheStore <= 1'b0;
cacheUpdate <= 1'b0;
trigger <= 1'b0;
panic <= 1'b0;
if(clkcnt == 15) begin
if(clkcnt == 3) begin
// 8Mhz cycle must not start directly after the cpu has been clocked
// as the address may not be stable then
// cpuDoes8MhzCycle has same timing as tg68_as
if(!clkena && !br) cpuDoes8MhzCycle <= 1'b1;
cpu_fast_cycle <= 1'b0;
if(!clkena && !br && tg68_busstate != 2'b01) cpuDoes8MhzCycle <= 1'b1;
// tg68 core does not provide a as signal, so we generate it
tg68_as <= !clkena && (tg68_busstate != 2'b01) && !br;
tg68_as <= (!clkena && tg68_busstate != 2'b01) && !br;
cpu_fast_cycle <= 1'b0;
// all other output signals are simply latched to make sure
// they don't change within a 8Mhz cycle even if the CPU
// advances. This would be a problem if e.g. The CPU would try
@ -842,32 +851,29 @@ always @(posedge clk_128) begin
tg68_fc <= tg68_fc_S;
end
// evaluate cache one cycle before cpu is allowed to access the bus again
// to make sure cache signals are routed to the cpu if the cpu is supposed
// to use it
if(cpu_throttle == 4'd1) // tg68_busstate[0] == 0 -> cpu read access
if(!br && steroids && (tg68_busstate[0] == 1'b0) && cache_hit)
cacheRead <= 1'b1;
if(clkena)
cacheRead <= 1'b0;
cacheRead <= cacheReady;
// only run cpu if throttle counter has run down
if((cpu_throttle == 4'd0) && !reset) begin
if(!reset) begin
// cpu does internal processing -> let it do this immediately
// cpu wants to read and the requested data is available from the cache -> run immediately
if((tg68_busstate == 2'b01) || cacheRead) begin
if(cacheReady && !br) begin
clkena <= 1'b1;
// cpu must never try to fetch instructions from non-mem
if((tg68_busstate == 2'b00) && !cpu2mem)
panic <= 1'b1;
cpu_throttle <= CPU_THROTTLE;
cpuDoes8MhzCycle <= 1'b0;
cpu_fast_cycle <= 1'b1;
end else begin
end else
// cpu does internal processing -> let it do this immediately
// cpu wants to read and the requested data is available from the cache -> run immediately
// todo: non steroids can run this full throttle
if((tg68_busstate == 2'b01) && !br) begin
clkena <= 1'b1;
cpuDoes8MhzCycle <= 1'b0;
cpu_fast_cycle <= 1'b1;
end else
begin
// this ends a normal 8MHz bus cycle. This requires that the
// cpu/chipset had the entire cycle and not e.g. started just in
// the middle. This is verified using the cpuDoes8MhzCycle signal
@ -875,23 +881,10 @@ always @(posedge clk_128) begin
// runs from cache
// clkcnt == 14 -> clkena in cycle 15 -> cpu runs in cycle 15
if((clkcnt == 13) && cpuDoes8MhzCycle && cpu_cycle && !br && (tg68_dtack || tg68_berr)) begin
if((clkcnt == 3) && cpuDoes8MhzCycle && cpu_cycle && !br && (tg68_dtack || tg68_berr)) begin
clkena <= 1'b1;
cpu_throttle <= CPU_THROTTLE;
cpuDoes8MhzCycle <= 1'b0;
// cpu must never try to fetch instructions from non-mem
if((tg68_busstate == 2'b00) && !cpu2mem)
panic <= 1'b1;
// ---------- cache debugging ---------------
// if the cache reports a hit, it should be the same data that's also
// returned by ram. Otherwise the cache is broken
// if(cache_hit && (tg68_busstate[0] == 1'b0)) begin
// if(cache_data_out != system_data_out)
// trigger <= 1'b1;
// end
if(cacheable && tg68_dtack) begin
// store data in instruction cache on cpu instruction read
if(tg68_busstate == 2'b00)
@ -907,8 +900,7 @@ always @(posedge clk_128) begin
end
end
end
end else
cpu_throttle <= cpu_throttle - 4'd1;
end
end
// TODO: generate cacheUpdate from ram_wr, so other bus masters also trigger this
@ -922,7 +914,7 @@ wire [15:0] cpu_data_in = cacheRead?cache_data_out:system_data_out;
TG68KdotC_Kernel #(2,2,2,2,2,2) tg68k (
.clk (clk_128 ),
.clk (clk_32 ),
.nReset (~reset ),
.clkena_in (clkena ),
.data_in (cpu_data_in ),
@ -968,6 +960,7 @@ cache data_cache (
.flush ( br ),
// use the tg68_*_S signals here to quickly react on cpu requests
.strobe ( clkenaD ),
.addr ( tg68_adr_S[23:1] ),
.ds ( { ~tg68_lds_S, ~tg68_uds_S } ),
@ -995,6 +988,7 @@ cache instruction_cache (
.flush ( br ),
// use the tg68_*_S signals here to quickly react on cpu requests
.strobe ( clkenaD ),
.addr ( tg68_adr_S[23:1] ),
.ds ( { ~tg68_lds_S, ~tg68_uds_S } ),
@ -1032,7 +1026,7 @@ wire cpu2ram = (!cpu2lowrom) && (
((MEM14M || MEM8M) && (tg68_adr[23:22] == 2'b01)) || // 8MB
(MEM14M && ((tg68_adr[23:22] == 2'b10) || // 12MB
(tg68_adr[23:21] == 3'b110))) || // 14MB
(steroids && (tg68_adr[23:19] == 5'b11101)) || // 512k at $e80000 for STEroids
// (steroids && (tg68_adr[23:19] == 5'b11101)) || // 512k at $e80000 for STEroids
(viking_enable && (tg68_adr[23:18] == 6'b110000)) // 256k at 0xc00000 for viking card
);
@ -1135,7 +1129,7 @@ sdram sdram (
.ds ( { ram_uds, ram_lds } ),
.we ( ram_wr ),
.oe ( ram_oe ),
.dout ( ram_data_out_64 ),
.dout ( ram_data_out_64 )
);
// multiplex spi_do, drive it from user_io if that's selected, drive

View File

@ -215,6 +215,11 @@ always @(negedge clk) begin
// writing the data register triggers the transfer
if((sel && !rw && (addr == 5'h11)) || (mw_cnt != 0)) begin
// decrease shift counter. Do this before the register write as
// register write has priority and should reload the counter
if(mw_cnt != 0)
mw_cnt <= mw_cnt - 7'd1;
if(sel && !rw && (addr == 5'h11)) begin
// first bit is evaluated imediately
mw_data_reg <= { din[14:0], 1'b0 };
@ -233,10 +238,6 @@ always @(negedge clk) begin
mw_clk <= mw_mask_reg[15];
end
// decrease shift counter
if(mw_cnt != 0)
mw_cnt <= mw_cnt - 7'd1;
// indicate end of transfer
mw_done <= (mw_cnt == 7'h01);
end