From 9ac8f3769fcb6f373f7c6441f52deaa0e3d8c4fa Mon Sep 17 00:00:00 2001 From: harbaum Date: Wed, 25 Dec 2013 21:00:38 +0000 Subject: [PATCH] STEroids work --- cores/mist/cache.v | 101 ++++++++++++++++++++++++++++++++++++++++++ cores/mist/mfp.v | 89 +++++++++++++++++-------------------- cores/mist/mist_top.v | 61 +++++++++++++------------ 3 files changed, 174 insertions(+), 77 deletions(-) create mode 100644 cores/mist/cache.v diff --git a/cores/mist/cache.v b/cores/mist/cache.v new file mode 100644 index 0000000..0894760 --- /dev/null +++ b/cores/mist/cache.v @@ -0,0 +1,101 @@ +// +// cache.v +// +// Atari ST CPU cache implementation for the MiST board +// http://code.google.com/p/mist-board/ +// +// Copyright (c) 2013 Till Harbaum +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// + +module cache ( + input clk_128, + input clk_8, + input reset, + input flush, + + input [22:0] addr, // cpu word address + input wr, + input rd, + + output [15:0] dout, + output hit, + + // interface to update entire caches when read from ram + input [63:0] din64, + input update64 +); + +reg [3:0] t; +always @(posedge clk_128) begin + // 128Mhz counter synchronous to 8 Mhz clock + // force counter to pass state 0 exactly after the rising edge of clk_8 + if(((t == 4'd15) && ( clk_8 == 0)) || + ((t == 4'd0) && ( clk_8 == 1)) || + ((t != 4'd15) && (t != 4'd0))) + t <= t + 4'd1; +end + +// de-multiplex 64 bit data into word requested by cpu +assign dout = (word == 2'd0)?current_data[15: 0]: + (word == 2'd1)?current_data[31:16]: + (word == 2'd2)?current_data[47:32]: + current_data[63:48]; + +// wire entry according to line/address +wire [63:0] current_data = data_latch[line]; + +// cache size configuration +localparam BITS = 5; +localparam ENTRIES = 32; // 2 ** BITS +localparam ALLZERO = 32'd0; // 2 ** BITS zero bits + +// _word_ address mapping example with 16 cache lines (BITS == 4) +// 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 +// T T T T T T T T T T T T T T T T T L L L L W W +// T = stored in tag RAM +// L = cache line +// W = 16 bit word select +wire [21-BITS-1:0] tag = addr[22:2+BITS] /* synthesis keep */; +wire [BITS-1:0] line = addr[2+BITS-1:2]; +wire [1:0] word = addr[1:0]; + +/* ------------------------------------------------------------------------------ */ +/* --------------------------------- cache memory ------------------------------- */ +/* ------------------------------------------------------------------------------ */ + +reg [63:0] data_latch [ENTRIES-1:0]; +reg [21-BITS-1:0] tag_latch [ENTRIES-1:0]; +reg [ENTRIES-1:0] valid; + +// signal indicating the currently selected cache line is valid and matches the +// address the cpu is currently requesting +assign hit = valid[line] && (tag_latch[line] == tag); + +always @(posedge clk_128) begin + if(reset || flush) begin + valid <= ALLZERO; + end else begin + + // update64 indicates that a whole cache line is to be updated + if(update64) begin + data_latch[line] <= din64; + tag_latch[line] <= tag; + valid[line] <= 1'b1; + end + end +end + +endmodule diff --git a/cores/mist/mfp.v b/cores/mist/mfp.v index 34ccd98..06ea3de 100644 --- a/cores/mist/mfp.v +++ b/cores/mist/mfp.v @@ -139,54 +139,54 @@ reg [7:0] vr; // any pending and not masked interrupt causes the irq line to go high // if highest_irq_pending != higest_irq_active then there's a high prio // irq in service and no irq is generated until this one is finished -assign irq = ((ipr & imr) != 16'h0000) && (highest_irq_active == highest_irq_pending); +//assign irq = ((ipr & imr) != 16'h0000) && (highest_irq_active == highest_irq_pending); // handle pending and in service irqs -wire [15:0] irq_active_map = (ipr | isr) & imr; +//wire [15:0] irq_active_map = (ipr | isr) & imr; // (i am pretty sure this can be done much more elegant ...) // check the number of the highest active irq -wire [3:0] highest_irq_active= - ( irq_active_map[15] == 1'b1)?4'd15: - ((irq_active_map[15:14] == 2'b01)?4'd14: - ((irq_active_map[15:13] == 3'b001)?4'd13: - ((irq_active_map[15:12] == 4'b0001)?4'd12: - ((irq_active_map[15:11] == 5'b00001)?4'd11: - ((irq_active_map[15:10] == 6'b000001)?4'd10: - ((irq_active_map[15:9] == 7'b0000001)?4'd9: - ((irq_active_map[15:8] == 8'b00000001)?4'd8: - ((irq_active_map[15:7] == 9'b000000001)?4'd7: - ((irq_active_map[15:6] == 10'b000000001)?4'd6: - ((irq_active_map[15:5] == 11'b0000000001)?4'd5: - ((irq_active_map[15:4] == 12'b00000000001)?4'd4: - ((irq_active_map[15:3] == 13'b000000000001)?4'd3: - ((irq_active_map[15:2] == 14'b0000000000001)?4'd2: - ((irq_active_map[15:1] == 15'b00000000000001)?4'd1: - ((irq_active_map[15:0] == 16'b000000000000001)?4'd0: - 4'd0))))))))))))))); +//wire [3:0] highest_irq_active= +// ( irq_active_map[15] == 1'b1)?4'd15: +// ((irq_active_map[15:14] == 2'b01)?4'd14: +// ((irq_active_map[15:13] == 3'b001)?4'd13: +// ((irq_active_map[15:12] == 4'b0001)?4'd12: +// ((irq_active_map[15:11] == 5'b00001)?4'd11: +// ((irq_active_map[15:10] == 6'b000001)?4'd10: +// ((irq_active_map[15:9] == 7'b0000001)?4'd9: +// ((irq_active_map[15:8] == 8'b00000001)?4'd8: +// ((irq_active_map[15:7] == 9'b000000001)?4'd7: +// ((irq_active_map[15:6] == 10'b000000001)?4'd6: +// ((irq_active_map[15:5] == 11'b0000000001)?4'd5: +// ((irq_active_map[15:4] == 12'b00000000001)?4'd4: +// ((irq_active_map[15:3] == 13'b000000000001)?4'd3: +// ((irq_active_map[15:2] == 14'b0000000000001)?4'd2: +// ((irq_active_map[15:1] == 15'b00000000000001)?4'd1: +// ((irq_active_map[15:0] == 16'b000000000000001)?4'd0: +// 4'd0))))))))))))))); // generate irq signal if an irq is pending and no other irq of same or higher prio is in service -//assign irq = ((ipr & imr) != 16'h0000) && ((isr == 16'h0000) || (highest_irq_pending > irq_in_service)); +assign irq = ((ipr & imr) != 16'h0000) && ((isr == 16'h0000) || (highest_irq_pending > irq_in_service)); // check number of current interrupt in service -//wire [3:0] irq_in_service = -// ( isr[15] == 1'b1)?4'd15: -// ((isr[15:14] == 2'b01)?4'd14: -// ((isr[15:13] == 3'b001)?4'd13: -// ((isr[15:12] == 4'b0001)?4'd12: -// ((isr[15:11] == 5'b00001)?4'd11: -// ((isr[15:10] == 6'b000001)?4'd10: -// ((isr[15:9] == 7'b0000001)?4'd9: -// ((isr[15:8] == 8'b00000001)?4'd8: -// ((isr[15:7] == 9'b000000001)?4'd7: -// ((isr[15:6] == 10'b000000001)?4'd6: -// ((isr[15:5] == 11'b0000000001)?4'd5: -// ((isr[15:4] == 12'b00000000001)?4'd4: -// ((isr[15:3] == 13'b000000000001)?4'd3: -// ((isr[15:2] == 14'b0000000000001)?4'd2: -// ((isr[15:1] == 15'b00000000000001)?4'd1: -// ((isr[15:0] == 16'b000000000000001)?4'd0: -// 4'd0))))))))))))))); +wire [3:0] irq_in_service = + ( isr[15] == 1'b1)?4'd15: + ((isr[15:14] == 2'b01)?4'd14: + ((isr[15:13] == 3'b001)?4'd13: + ((isr[15:12] == 4'b0001)?4'd12: + ((isr[15:11] == 5'b00001)?4'd11: + ((isr[15:10] == 6'b000001)?4'd10: + ((isr[15:9] == 7'b0000001)?4'd9: + ((isr[15:8] == 8'b00000001)?4'd8: + ((isr[15:7] == 9'b000000001)?4'd7: + ((isr[15:6] == 10'b000000001)?4'd6: + ((isr[15:5] == 11'b0000000001)?4'd5: + ((isr[15:4] == 12'b00000000001)?4'd4: + ((isr[15:3] == 13'b000000000001)?4'd3: + ((isr[15:2] == 14'b0000000000001)?4'd2: + ((isr[15:1] == 15'b00000000000001)?4'd1: + ((isr[15:0] == 16'b000000000000001)?4'd0: + 4'd0))))))))))))))); wire [15:0] irq_pending_map = ipr & imr; @@ -215,7 +215,7 @@ wire [7:0] gpip_cpu_out = (i & ~ddr) | (gpip & ddr); // cpu read interface always @(iack, sel, ds, rw, addr, gpip_cpu_out, aer, ddr, ier, ipr, isr, imr, - vr, irq_vec, serial_data_out_fifo_full, timera_dat_o, timerb_dat_o, + vr, serial_data_out_fifo_full, timera_dat_o, timerb_dat_o, timerc_dat_o, timerd_dat_o, timera_ctrl_o, timerb_ctrl_o, timerc_ctrl_o, timerd_ctrl_o) begin @@ -248,15 +248,13 @@ always @(iack, sel, ds, rw, addr, gpip_cpu_out, aer, ddr, ier, ipr, isr, imr, if(addr == 5'h16) dout = serial_data_out_fifo_full?8'h00:8'h80; end else if(iack) begin - dout = irq_vec; + dout = { vr[7:4], highest_irq_pending }; end end // delay inputs to detect changes reg [7:0] iD, iD2; -reg [7:0] irq_vec; - // mask of input irqs which are overwritten by timer a/b inputs wire [7:0] ti_irq_mask = { 3'b000, pulse_mode, 3'b000}; wire [7:0] ti_irq = { 3'b000, t_i[0], t_i[1], 3'b000}; @@ -265,11 +263,6 @@ reg iackD; always @(posedge clk) begin iackD <= iack; - // the pending irq changes in the middle of an iack - // phase, so we latch the current vector to keep ist stable - // during the entire cpu read - irq_vec <= { vr[7:4], highest_irq_pending }; - // delay inputs for irq generation, apply aer (irq edge) iD <= aer ^ ((i & ~ti_irq_mask) | (ti_irq & ti_irq_mask)); iD2 <= iD; diff --git a/cores/mist/mist_top.v b/cores/mist/mist_top.v index 7ffe773..a384d2f 100644 --- a/cores/mist/mist_top.v +++ b/cores/mist/mist_top.v @@ -94,8 +94,11 @@ always @(posedge clk_8) begin if(cpu_cycle) begin // timeout only when cpu owns the bus and when // neither dtack nor another bus master are active + + // also cacheable areas should never generate a + // bus error (TODO: check for write on first eight words) if(dtack_timeout != 3'd7) begin - if(!tg68_dtack || br || tg68_as) + if(!tg68_dtack || br || tg68_as || cacheable) dtack_timeout <= 3'd0; else dtack_timeout <= dtack_timeout + 3'd1; @@ -690,12 +693,16 @@ always @(posedge clk_8) begin tg68_lds <= tg68_lds_S; tg68_rw <= tg68_rw_S; end - -localparam CPU_THROTTLE = 4'd4; -reg [3:0] fromCache; + +// the CPU throttle counter limits the CPU speed to a rate the tg68 core can +// handle. With a throttle of "4" the core will run effectively at 32MHz which +// is equivalent to ~64MHz on a real 68000. This speed will never be achieved +// since memory and peripheral access slows the cpu further +localparam CPU_THROTTLE = 4'd5; reg [3:0] clkcnt; -// TODO: make sure 8Mhz cycles are complete and were from the start +reg [15:0] cacheReadLatch; + always @(posedge clk_128) begin // count 0..15 within a 8MHz cycle if(((clkcnt == 15) && ( clk_8 == 0)) || @@ -706,10 +713,14 @@ always @(posedge clk_128) begin // default: cpu does not run clkena <= 1'b0; cacheUpdate <= 1'b0; - cacheRead <= 1'b0; + + // assume the cpu uses the following 8 Mhz cycles + if(clkcnt == 15) + cpuDoes8MhzCycle <= 1'b1; // only run cpu if throttle counter has run down if((cpu_throttle == 4'd0) && !reset) begin + cacheRead <= 1'b0; // cpu does internal processing -> let it do this immediately // don't let this happen in the cpu cycle as this may result in a @@ -719,35 +730,27 @@ always @(posedge clk_128) begin clkena <= 1'b1; cpu_throttle <= CPU_THROTTLE; cpuDoes8MhzCycle <= 1'b0; - end else if((fromCache != 0) && !br && steroids && tg68_rw_S && - (tg68_busstate == 2'b00) && cache_hit && cacheable) begin + end else if( !br && steroids && (tg68_busstate == 2'b00) && cache_hit && cacheable) begin clkena <= 1'b1; cacheRead <= 1'b1; + cacheReadLatch <= cache_data_out; cpu_throttle <= CPU_THROTTLE; cpuDoes8MhzCycle <= 1'b0; - fromCache <= fromCache - 4'd1; end begin - // cpu does io -> force it to wait for end of its bus cycle - - // three cases: - // cpu addresses ram (incl. rom) -> terminate transfer early - // cpu addresses io -> terminate transfer after current cycle - // ... - - // assume the cpu uses the following 8 Mhz cycles - if(clkcnt == 15) - cpuDoes8MhzCycle <= 1'b1; - - if(clkcnt == 13) begin // 15 + // this ends a normal 8MHz bus cycle. This requires that the + // cpu/chipset had the entire cycle and not e.g. started just in + // the middle. This is verified using the puDoes8MhzCycle signal + // which is invalidated whenever the cpu uses a + + // clkcnt == 14 -> clkena in cycle 15 -> cpu runs in cycle 15 + if(clkcnt == 13) begin if(cpu_uses_8mhz_cycle && cpuDoes8MhzCycle) begin clkena <= 1'b1; cpu_throttle <= CPU_THROTTLE; // update cache on cpu instruction read - if(tg68_rw && (tg68_busstate == 2'b00)) + if(tg68_busstate == 2'b00) cacheUpdate <= 1'b1; - - fromCache <= 4'd1; end end end @@ -757,14 +760,14 @@ end wire [1:0] tg68_busstate; -// rewire cache -wire [15:0] cpu_data_in_cache = cacheRead?cache_data_out:cpu_data_in; +// feed data from cache into the cpu +wire [15:0] cpu_data_in = cacheRead?cacheReadLatch:system_data_out; TG68KdotC_Kernel #(2,2,2,2,2,2) tg68k ( .clk (clk_128 ), .nReset (~reset ), .clkena_in (clkena ), - .data_in (cpu_data_in_cache ), + .data_in (cpu_data_in ), .IPL (ipl ), .IPL_autovector (1'b0 ), .berr (tg68_berr ), @@ -805,7 +808,7 @@ cache cache ( .clk_128 ( clk_128 ), .clk_8 ( clk_8 ), .reset ( reset ), - .flush ( 1'b0 ), + .flush ( br ), // use the tg68_*_S signals here to quickly react on cpu requests .addr ( tg68_adr_S[23:1] ), @@ -911,7 +914,7 @@ wire cpu_cycle_wr = data_io_br?data_io_write:(blitter_br?blitter_master_write:(a wire ram_wr = video_cycle?video_cycle_wr:(cpu_cycle?cpu_cycle_wr:1'b0); wire [15:0] ram_data_out; -wire [15:0] cpu_data_in = cpu2mem?ram_data_out:io_data_out; +wire [15:0] system_data_out = cpu2mem?ram_data_out:io_data_out; wire [15:0] ram_data_in = data_io_br?data_io_dout:(blitter_br?blitter_master_data_out:tg68_dat_out); // data strobe