1
0
mirror of https://github.com/mist-devel/mist-board.git synced 2026-02-10 01:39:48 +00:00

STEroids work

This commit is contained in:
harbaum
2013-12-25 21:00:38 +00:00
parent 0e66d224ea
commit 9ac8f3769f
3 changed files with 174 additions and 77 deletions

101
cores/mist/cache.v Normal file
View File

@@ -0,0 +1,101 @@
//
// cache.v
//
// Atari ST CPU cache implementation for the MiST board
// http://code.google.com/p/mist-board/
//
// Copyright (c) 2013 Till Harbaum <till@harbaum.org>
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
module cache (
input clk_128,
input clk_8,
input reset,
input flush,
input [22:0] addr, // cpu word address
input wr,
input rd,
output [15:0] dout,
output hit,
// interface to update entire caches when read from ram
input [63:0] din64,
input update64
);
reg [3:0] t;
always @(posedge clk_128) begin
// 128Mhz counter synchronous to 8 Mhz clock
// force counter to pass state 0 exactly after the rising edge of clk_8
if(((t == 4'd15) && ( clk_8 == 0)) ||
((t == 4'd0) && ( clk_8 == 1)) ||
((t != 4'd15) && (t != 4'd0)))
t <= t + 4'd1;
end
// de-multiplex 64 bit data into word requested by cpu
assign dout = (word == 2'd0)?current_data[15: 0]:
(word == 2'd1)?current_data[31:16]:
(word == 2'd2)?current_data[47:32]:
current_data[63:48];
// wire entry according to line/address
wire [63:0] current_data = data_latch[line];
// cache size configuration
localparam BITS = 5;
localparam ENTRIES = 32; // 2 ** BITS
localparam ALLZERO = 32'd0; // 2 ** BITS zero bits
// _word_ address mapping example with 16 cache lines (BITS == 4)
// 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
// T T T T T T T T T T T T T T T T T L L L L W W
// T = stored in tag RAM
// L = cache line
// W = 16 bit word select
wire [21-BITS-1:0] tag = addr[22:2+BITS] /* synthesis keep */;
wire [BITS-1:0] line = addr[2+BITS-1:2];
wire [1:0] word = addr[1:0];
/* ------------------------------------------------------------------------------ */
/* --------------------------------- cache memory ------------------------------- */
/* ------------------------------------------------------------------------------ */
reg [63:0] data_latch [ENTRIES-1:0];
reg [21-BITS-1:0] tag_latch [ENTRIES-1:0];
reg [ENTRIES-1:0] valid;
// signal indicating the currently selected cache line is valid and matches the
// address the cpu is currently requesting
assign hit = valid[line] && (tag_latch[line] == tag);
always @(posedge clk_128) begin
if(reset || flush) begin
valid <= ALLZERO;
end else begin
// update64 indicates that a whole cache line is to be updated
if(update64) begin
data_latch[line] <= din64;
tag_latch[line] <= tag;
valid[line] <= 1'b1;
end
end
end
endmodule

View File

@@ -139,54 +139,54 @@ reg [7:0] vr;
// any pending and not masked interrupt causes the irq line to go high
// if highest_irq_pending != higest_irq_active then there's a high prio
// irq in service and no irq is generated until this one is finished
assign irq = ((ipr & imr) != 16'h0000) && (highest_irq_active == highest_irq_pending);
//assign irq = ((ipr & imr) != 16'h0000) && (highest_irq_active == highest_irq_pending);
// handle pending and in service irqs
wire [15:0] irq_active_map = (ipr | isr) & imr;
//wire [15:0] irq_active_map = (ipr | isr) & imr;
// (i am pretty sure this can be done much more elegant ...)
// check the number of the highest active irq
wire [3:0] highest_irq_active=
( irq_active_map[15] == 1'b1)?4'd15:
((irq_active_map[15:14] == 2'b01)?4'd14:
((irq_active_map[15:13] == 3'b001)?4'd13:
((irq_active_map[15:12] == 4'b0001)?4'd12:
((irq_active_map[15:11] == 5'b00001)?4'd11:
((irq_active_map[15:10] == 6'b000001)?4'd10:
((irq_active_map[15:9] == 7'b0000001)?4'd9:
((irq_active_map[15:8] == 8'b00000001)?4'd8:
((irq_active_map[15:7] == 9'b000000001)?4'd7:
((irq_active_map[15:6] == 10'b000000001)?4'd6:
((irq_active_map[15:5] == 11'b0000000001)?4'd5:
((irq_active_map[15:4] == 12'b00000000001)?4'd4:
((irq_active_map[15:3] == 13'b000000000001)?4'd3:
((irq_active_map[15:2] == 14'b0000000000001)?4'd2:
((irq_active_map[15:1] == 15'b00000000000001)?4'd1:
((irq_active_map[15:0] == 16'b000000000000001)?4'd0:
4'd0)))))))))))))));
//wire [3:0] highest_irq_active=
// ( irq_active_map[15] == 1'b1)?4'd15:
// ((irq_active_map[15:14] == 2'b01)?4'd14:
// ((irq_active_map[15:13] == 3'b001)?4'd13:
// ((irq_active_map[15:12] == 4'b0001)?4'd12:
// ((irq_active_map[15:11] == 5'b00001)?4'd11:
// ((irq_active_map[15:10] == 6'b000001)?4'd10:
// ((irq_active_map[15:9] == 7'b0000001)?4'd9:
// ((irq_active_map[15:8] == 8'b00000001)?4'd8:
// ((irq_active_map[15:7] == 9'b000000001)?4'd7:
// ((irq_active_map[15:6] == 10'b000000001)?4'd6:
// ((irq_active_map[15:5] == 11'b0000000001)?4'd5:
// ((irq_active_map[15:4] == 12'b00000000001)?4'd4:
// ((irq_active_map[15:3] == 13'b000000000001)?4'd3:
// ((irq_active_map[15:2] == 14'b0000000000001)?4'd2:
// ((irq_active_map[15:1] == 15'b00000000000001)?4'd1:
// ((irq_active_map[15:0] == 16'b000000000000001)?4'd0:
// 4'd0)))))))))))))));
// generate irq signal if an irq is pending and no other irq of same or higher prio is in service
//assign irq = ((ipr & imr) != 16'h0000) && ((isr == 16'h0000) || (highest_irq_pending > irq_in_service));
assign irq = ((ipr & imr) != 16'h0000) && ((isr == 16'h0000) || (highest_irq_pending > irq_in_service));
// check number of current interrupt in service
//wire [3:0] irq_in_service =
// ( isr[15] == 1'b1)?4'd15:
// ((isr[15:14] == 2'b01)?4'd14:
// ((isr[15:13] == 3'b001)?4'd13:
// ((isr[15:12] == 4'b0001)?4'd12:
// ((isr[15:11] == 5'b00001)?4'd11:
// ((isr[15:10] == 6'b000001)?4'd10:
// ((isr[15:9] == 7'b0000001)?4'd9:
// ((isr[15:8] == 8'b00000001)?4'd8:
// ((isr[15:7] == 9'b000000001)?4'd7:
// ((isr[15:6] == 10'b000000001)?4'd6:
// ((isr[15:5] == 11'b0000000001)?4'd5:
// ((isr[15:4] == 12'b00000000001)?4'd4:
// ((isr[15:3] == 13'b000000000001)?4'd3:
// ((isr[15:2] == 14'b0000000000001)?4'd2:
// ((isr[15:1] == 15'b00000000000001)?4'd1:
// ((isr[15:0] == 16'b000000000000001)?4'd0:
// 4'd0)))))))))))))));
wire [3:0] irq_in_service =
( isr[15] == 1'b1)?4'd15:
((isr[15:14] == 2'b01)?4'd14:
((isr[15:13] == 3'b001)?4'd13:
((isr[15:12] == 4'b0001)?4'd12:
((isr[15:11] == 5'b00001)?4'd11:
((isr[15:10] == 6'b000001)?4'd10:
((isr[15:9] == 7'b0000001)?4'd9:
((isr[15:8] == 8'b00000001)?4'd8:
((isr[15:7] == 9'b000000001)?4'd7:
((isr[15:6] == 10'b000000001)?4'd6:
((isr[15:5] == 11'b0000000001)?4'd5:
((isr[15:4] == 12'b00000000001)?4'd4:
((isr[15:3] == 13'b000000000001)?4'd3:
((isr[15:2] == 14'b0000000000001)?4'd2:
((isr[15:1] == 15'b00000000000001)?4'd1:
((isr[15:0] == 16'b000000000000001)?4'd0:
4'd0)))))))))))))));
wire [15:0] irq_pending_map = ipr & imr;
@@ -215,7 +215,7 @@ wire [7:0] gpip_cpu_out = (i & ~ddr) | (gpip & ddr);
// cpu read interface
always @(iack, sel, ds, rw, addr, gpip_cpu_out, aer, ddr, ier, ipr, isr, imr,
vr, irq_vec, serial_data_out_fifo_full, timera_dat_o, timerb_dat_o,
vr, serial_data_out_fifo_full, timera_dat_o, timerb_dat_o,
timerc_dat_o, timerd_dat_o, timera_ctrl_o, timerb_ctrl_o, timerc_ctrl_o,
timerd_ctrl_o) begin
@@ -248,15 +248,13 @@ always @(iack, sel, ds, rw, addr, gpip_cpu_out, aer, ddr, ier, ipr, isr, imr,
if(addr == 5'h16) dout = serial_data_out_fifo_full?8'h00:8'h80;
end else if(iack) begin
dout = irq_vec;
dout = { vr[7:4], highest_irq_pending };
end
end
// delay inputs to detect changes
reg [7:0] iD, iD2;
reg [7:0] irq_vec;
// mask of input irqs which are overwritten by timer a/b inputs
wire [7:0] ti_irq_mask = { 3'b000, pulse_mode, 3'b000};
wire [7:0] ti_irq = { 3'b000, t_i[0], t_i[1], 3'b000};
@@ -265,11 +263,6 @@ reg iackD;
always @(posedge clk) begin
iackD <= iack;
// the pending irq changes in the middle of an iack
// phase, so we latch the current vector to keep ist stable
// during the entire cpu read
irq_vec <= { vr[7:4], highest_irq_pending };
// delay inputs for irq generation, apply aer (irq edge)
iD <= aer ^ ((i & ~ti_irq_mask) | (ti_irq & ti_irq_mask));
iD2 <= iD;

View File

@@ -94,8 +94,11 @@ always @(posedge clk_8) begin
if(cpu_cycle) begin
// timeout only when cpu owns the bus and when
// neither dtack nor another bus master are active
// also cacheable areas should never generate a
// bus error (TODO: check for write on first eight words)
if(dtack_timeout != 3'd7) begin
if(!tg68_dtack || br || tg68_as)
if(!tg68_dtack || br || tg68_as || cacheable)
dtack_timeout <= 3'd0;
else
dtack_timeout <= dtack_timeout + 3'd1;
@@ -690,12 +693,16 @@ always @(posedge clk_8) begin
tg68_lds <= tg68_lds_S;
tg68_rw <= tg68_rw_S;
end
localparam CPU_THROTTLE = 4'd4;
reg [3:0] fromCache;
// the CPU throttle counter limits the CPU speed to a rate the tg68 core can
// handle. With a throttle of "4" the core will run effectively at 32MHz which
// is equivalent to ~64MHz on a real 68000. This speed will never be achieved
// since memory and peripheral access slows the cpu further
localparam CPU_THROTTLE = 4'd5;
reg [3:0] clkcnt;
// TODO: make sure 8Mhz cycles are complete and were from the start
reg [15:0] cacheReadLatch;
always @(posedge clk_128) begin
// count 0..15 within a 8MHz cycle
if(((clkcnt == 15) && ( clk_8 == 0)) ||
@@ -706,10 +713,14 @@ always @(posedge clk_128) begin
// default: cpu does not run
clkena <= 1'b0;
cacheUpdate <= 1'b0;
cacheRead <= 1'b0;
// assume the cpu uses the following 8 Mhz cycles
if(clkcnt == 15)
cpuDoes8MhzCycle <= 1'b1;
// only run cpu if throttle counter has run down
if((cpu_throttle == 4'd0) && !reset) begin
cacheRead <= 1'b0;
// cpu does internal processing -> let it do this immediately
// don't let this happen in the cpu cycle as this may result in a
@@ -719,35 +730,27 @@ always @(posedge clk_128) begin
clkena <= 1'b1;
cpu_throttle <= CPU_THROTTLE;
cpuDoes8MhzCycle <= 1'b0;
end else if((fromCache != 0) && !br && steroids && tg68_rw_S &&
(tg68_busstate == 2'b00) && cache_hit && cacheable) begin
end else if( !br && steroids && (tg68_busstate == 2'b00) && cache_hit && cacheable) begin
clkena <= 1'b1;
cacheRead <= 1'b1;
cacheReadLatch <= cache_data_out;
cpu_throttle <= CPU_THROTTLE;
cpuDoes8MhzCycle <= 1'b0;
fromCache <= fromCache - 4'd1;
end begin
// cpu does io -> force it to wait for end of its bus cycle
// three cases:
// cpu addresses ram (incl. rom) -> terminate transfer early
// cpu addresses io -> terminate transfer after current cycle
// ...
// assume the cpu uses the following 8 Mhz cycles
if(clkcnt == 15)
cpuDoes8MhzCycle <= 1'b1;
if(clkcnt == 13) begin // 15
// this ends a normal 8MHz bus cycle. This requires that the
// cpu/chipset had the entire cycle and not e.g. started just in
// the middle. This is verified using the puDoes8MhzCycle signal
// which is invalidated whenever the cpu uses a
// clkcnt == 14 -> clkena in cycle 15 -> cpu runs in cycle 15
if(clkcnt == 13) begin
if(cpu_uses_8mhz_cycle && cpuDoes8MhzCycle) begin
clkena <= 1'b1;
cpu_throttle <= CPU_THROTTLE;
// update cache on cpu instruction read
if(tg68_rw && (tg68_busstate == 2'b00))
if(tg68_busstate == 2'b00)
cacheUpdate <= 1'b1;
fromCache <= 4'd1;
end
end
end
@@ -757,14 +760,14 @@ end
wire [1:0] tg68_busstate;
// rewire cache
wire [15:0] cpu_data_in_cache = cacheRead?cache_data_out:cpu_data_in;
// feed data from cache into the cpu
wire [15:0] cpu_data_in = cacheRead?cacheReadLatch:system_data_out;
TG68KdotC_Kernel #(2,2,2,2,2,2) tg68k (
.clk (clk_128 ),
.nReset (~reset ),
.clkena_in (clkena ),
.data_in (cpu_data_in_cache ),
.data_in (cpu_data_in ),
.IPL (ipl ),
.IPL_autovector (1'b0 ),
.berr (tg68_berr ),
@@ -805,7 +808,7 @@ cache cache (
.clk_128 ( clk_128 ),
.clk_8 ( clk_8 ),
.reset ( reset ),
.flush ( 1'b0 ),
.flush ( br ),
// use the tg68_*_S signals here to quickly react on cpu requests
.addr ( tg68_adr_S[23:1] ),
@@ -911,7 +914,7 @@ wire cpu_cycle_wr = data_io_br?data_io_write:(blitter_br?blitter_master_write:(a
wire ram_wr = video_cycle?video_cycle_wr:(cpu_cycle?cpu_cycle_wr:1'b0);
wire [15:0] ram_data_out;
wire [15:0] cpu_data_in = cpu2mem?ram_data_out:io_data_out;
wire [15:0] system_data_out = cpu2mem?ram_data_out:io_data_out;
wire [15:0] ram_data_in = data_io_br?data_io_dout:(blitter_br?blitter_master_data_out:tg68_dat_out);
// data strobe