1
0
mirror of https://github.com/mist-devel/mist-board.git synced 2026-02-14 19:35:58 +00:00

Obsession\!\!\!

This commit is contained in:
harbaum
2014-01-28 19:58:48 +00:00
parent 68e31775b1
commit fe35d86b89
15 changed files with 697 additions and 391 deletions

View File

@@ -269,7 +269,7 @@ architecture logic of TG68KdotC_Kernel is
signal set_oddout : std_logic;
signal PCbase : std_logic;
signal set_PCbase : std_logic;
signal last_data_read : std_logic_vector(31 downto 0);
signal last_data_in : std_logic_vector(31 downto 0);

View File

@@ -99,16 +99,15 @@ always @(negedge clk) begin
readTimer <= 14'd11138;
end
end
end
end
// ------------------ cpu interface --------------------
wire ikbd_irq = ikbd_cr[7] && ikbd_rx_data_available; // rx irq
wire [7:0] ikbd_rx_data = fifoIn[readPin];
wire ikbd_rx_data_available;
assign ikbd_rx_data_available = (readPin != writePin) && (readTimer == 0);
wire [7:0] ikbd_rx_data = ikbd_rx_data_available?fifoIn[readPin]:fifoIn[readPin-4'd1];
wire ikbd_rx_data_available = (readPin != writePin) && (readTimer == 0);
// in a real ST the irqs are active low open collector outputs and are simply wired
// tegether ("wired or")

View File

@@ -23,10 +23,6 @@
// http://mikro.naprvyraz.sk/docs/ST_E/BLITTER.TXT
// http://paradox.atari.org/files/BLIT_FAQ.TXT
// TODO:
// - Try to use bus cycle 3 as well to make a "turbo blitter" being twice as fast
// - Don't spend a whole state 0 if nfsr && last_word_in_row
module blitter (
input [1:0] bus_cycle,
@@ -52,6 +48,7 @@ module blitter (
input br_in,
output reg br_out,
output irq,
input bg,
input turbo // 16Mhz blitter
);
@@ -93,6 +90,13 @@ reg fxsr;
wire cycle_advance = (bus_cycle == 2'd0) || (turbo && (bus_cycle == 2'd2));
wire cycle_read = (bus_cycle == 2'd1) || (turbo && (bus_cycle == 2'd3));
// latch bus cycle information to use at the end of the cycle (posedge clk)
reg cycle_advanceL, cycle_readL;
always @(negedge clk) begin
cycle_advanceL <= cycle_advance;
cycle_readL <= cycle_read;
end
// ------------------ cpu interface --------------------
// CPU READ
@@ -150,7 +154,7 @@ reg [15:0] bm_data_in_latch;
// latch incoming data at end of bus cycle
always @(posedge clk)
if(cycle_read)
if(cycle_readL)
bm_data_in_latch <= bm_data_in;
always @(negedge clk) begin
@@ -249,8 +253,11 @@ always @(negedge clk) begin
// change between both states (bus grabbed and bus released)
if(bus_coop_cnt == 0) begin
bus_coop_cnt <= 6'd63;
wait4bus <= !wait4bus;
// release bus immediately, grab bus only if bg is set
if(!wait4bus || (wait4bus && bg)) begin
bus_coop_cnt <= 6'd63;
wait4bus <= !wait4bus;
end
end
// blitter has just been setup, so init the state machine in first step
@@ -352,7 +359,7 @@ always @(posedge clk) begin
bm_read <= 1'b0;
bm_write <= 1'b0;
if(br_out && !br_in && (y_count != 0) && cycle_advance) begin
if(br_out && !br_in && (y_count != 0) && cycle_advanceL) begin
if(state == 2'd0) bm_read <= 1'b1;
else if(state == 2'd1) bm_read <= 1'b1;
else if(state == 2'd2) bm_write <= 1'b1;

View File

@@ -19,7 +19,7 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
module cache (
input clk_128,
input clk_8,
@@ -27,15 +27,18 @@ module cache (
input flush,
input [22:0] addr, // cpu word address
input wr,
input rd,
input [1:0] ds, // upper (0) and lower (1) data strobe
output [15:0] dout,
output reg [15:0] dout,
output hit,
// interface to update entire caches when read from ram
// interface to store entire cache lines when read from ram
input [63:0] din64,
input update64
input store,
// interface to update existing cache lines on cpu ram write
input [15:0] din16,
input update
);
reg [3:0] t;
@@ -48,53 +51,100 @@ always @(posedge clk_128) begin
t <= t + 4'd1;
end
// de-multiplex 64 bit data into word requested by cpu
assign dout = (word == 2'd0)?current_data[15: 0]:
(word == 2'd1)?current_data[31:16]:
(word == 2'd2)?current_data[47:32]:
current_data[63:48];
// wire entry according to line/address
wire [63:0] current_data = data_latch[line];
// cache size configuration
localparam BITS = 5;
localparam ENTRIES = 32; // 2 ** BITS
localparam ALLZERO = 32'd0; // 2 ** BITS zero bits
// the cache sizein bytes is 8*(2^BITS), e.g. 2kBytes if BITS == 8
localparam BITS = 6;
localparam ENTRIES = 64; // 2 ** BITS
localparam ALLZERO = 64'd0; // 2 ** BITS zero bits
// _word_ address mapping example with 16 cache lines (BITS == 4)
// 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
// T T T T T T T T T T T T T T T T T L L L L W W
// T = stored in tag RAM
// L = cache line
// W = 16 bit word select
wire [21-BITS-1:0] tag = addr[22:2+BITS] /* synthesis keep */;
wire [BITS-1:0] line = addr[2+BITS-1:2];
wire [1:0] word = addr[1:0];
// W = 16 bit word select
wire [21-BITS-1:0] tag = addr[22:2+BITS];
reg [BITS-1:0] line;
/* ------------------------------------------------------------------------------ */
/* --------------------------------- cache memory ------------------------------- */
/* ------------------------------------------------------------------------------ */
reg [63:0] data_latch [ENTRIES-1:0];
reg [63:56] data_latch_7 [ENTRIES-1:0];
reg [55:48] data_latch_6 [ENTRIES-1:0];
reg [47:40] data_latch_5 [ENTRIES-1:0];
reg [39:32] data_latch_4 [ENTRIES-1:0];
reg [31:24] data_latch_3 [ENTRIES-1:0];
reg [23:16] data_latch_2 [ENTRIES-1:0];
reg [15: 8] data_latch_1 [ENTRIES-1:0];
reg [ 7: 0] data_latch_0 [ENTRIES-1:0];
reg [21-BITS-1:0] tag_latch [ENTRIES-1:0];
reg [ENTRIES-1:0] valid;
reg [21-BITS-1:0] current_tag;
// signal indicating the currently selected cache line is valid and matches the
// address the cpu is currently requesting
assign hit = valid[line] && (tag_latch[line] == tag);
// assign hit = valid[line] && (tag_latch[line] == tag);
assign hit = valid[line] && (current_tag == tag);
// permanently output data according to current line
// de-multiplex 64 bit data into word requested by cpu
always @(posedge clk_128) begin
dout <= (addr[1:0] == 2'd0)?{data_latch_1[line], data_latch_0[line]}:
(addr[1:0] == 2'd1)?{data_latch_3[line], data_latch_2[line]}:
(addr[1:0] == 2'd2)?{data_latch_5[line], data_latch_4[line]}:
{data_latch_7[line], data_latch_6[line]};
current_tag <= tag_latch[line];
end
always @(negedge clk_128)
line <= addr[2+BITS-1:2];
always @(posedge clk_128) begin
if(reset || flush) begin
valid <= ALLZERO;
end else begin
// update64 indicates that a whole cache line is to be updated
if(update64) begin
data_latch[line] <= din64;
// store indicates that a whole cache line is to be stored
if(store) begin
data_latch_7[line] <= din64[63:56];
data_latch_6[line] <= din64[55:48];
data_latch_5[line] <= din64[47:40];
data_latch_4[line] <= din64[39:32];
data_latch_3[line] <= din64[31:24];
data_latch_2[line] <= din64[23:16];
data_latch_1[line] <= din64[15: 8];
data_latch_0[line] <= din64[ 7: 0];
tag_latch[line] <= tag;
valid[line] <= 1'b1;
end
// cpu (or other bus master!) writes to ram, so update cache contents if necessary
else if(update && hit) begin
// no need to care for "tag_latch" or "valid" as they simply stay the same
if(addr[1:0] == 2'd0) begin
if(ds[1]) data_latch_0[line] <= din16[7:0];
if(ds[0]) data_latch_1[line] <= din16[15:8];
end
if(addr[1:0] == 2'd1) begin
if(ds[1]) data_latch_2[line] <= din16[7:0];
if(ds[0]) data_latch_3[line] <= din16[15:8];
end
if(addr[1:0] == 2'd2) begin
if(ds[1]) data_latch_4[line] <= din16[7:0];
if(ds[0]) data_latch_5[line] <= din16[15:8];
end
if(addr[1:0] == 2'd3) begin
if(ds[1]) data_latch_6[line] <= din16[7:0];
if(ds[0]) data_latch_7[line] <= din16[15:8];
end
end
end
end

View File

@@ -126,7 +126,7 @@ module clock (
altpll_component.clk2_divide_by = 18,
altpll_component.clk2_duty_cycle = 50,
altpll_component.clk2_multiply_by = 85,
altpll_component.clk2_phase_shift = "-1000",
altpll_component.clk2_phase_shift = "-1500",
altpll_component.clk3_divide_by = 5625,
altpll_component.clk3_duty_cycle = 50,
altpll_component.clk3_multiply_by = 512,
@@ -265,7 +265,7 @@ endmodule
// Retrieval info: PRIVATE: PHASE_RECONFIG_INPUTS_CHECK STRING "0"
// Retrieval info: PRIVATE: PHASE_SHIFT0 STRING "0.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT1 STRING "0.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT2 STRING "-1000.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT2 STRING "-1500.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT3 STRING "0.00000000"
// Retrieval info: PRIVATE: PHASE_SHIFT_STEP_ENABLED_CHECK STRING "0"
// Retrieval info: PRIVATE: PHASE_SHIFT_UNIT0 STRING "deg"
@@ -323,7 +323,7 @@ endmodule
// Retrieval info: CONSTANT: CLK2_DIVIDE_BY NUMERIC "18"
// Retrieval info: CONSTANT: CLK2_DUTY_CYCLE NUMERIC "50"
// Retrieval info: CONSTANT: CLK2_MULTIPLY_BY NUMERIC "85"
// Retrieval info: CONSTANT: CLK2_PHASE_SHIFT STRING "-1000"
// Retrieval info: CONSTANT: CLK2_PHASE_SHIFT STRING "-1500"
// Retrieval info: CONSTANT: CLK3_DIVIDE_BY NUMERIC "5625"
// Retrieval info: CONSTANT: CLK3_DUTY_CYCLE NUMERIC "50"
// Retrieval info: CONSTANT: CLK3_MULTIPLY_BY NUMERIC "512"

View File

@@ -196,9 +196,9 @@ endmodule
// Retrieval info: CONSTANT: CLK2_DUTY_CYCLE NUMERIC "50"
// Retrieval info: CONSTANT: CLK2_MULTIPLY_BY NUMERIC "85"
// Retrieval info: CONSTANT: CLK2_PHASE_SHIFT STRING "-1000"
// Retrieval info: CONSTANT: CLK3_DIVIDE_BY NUMERIC "5625"
// Retrieval info: CONSTANT: CLK3_DIVIDE_BY NUMERIC "27000000"
// Retrieval info: CONSTANT: CLK3_DUTY_CYCLE NUMERIC "50"
// Retrieval info: CONSTANT: CLK3_MULTIPLY_BY NUMERIC "512"
// Retrieval info: CONSTANT: CLK3_MULTIPLY_BY NUMERIC "2457599"
// Retrieval info: CONSTANT: CLK3_PHASE_SHIFT STRING "0"
// Retrieval info: CONSTANT: COMPENSATE_CLOCK STRING "CLK0"
// Retrieval info: CONSTANT: INCLK0_INPUT_FREQUENCY NUMERIC "37037"

View File

@@ -50,13 +50,18 @@ reg brI; // signals to bring br into local clock domain
// address auto increment takes place at the beginning of each transfer
assign addr = (cmd==2)?(addrR[22:0]-23'd1):addrR[22:0];
// latch bus cycle to have it stable at the end of the cycle (rising edge of clk8)
reg [1:0] bus_cycle_L;
always @(negedge clk_8)
bus_cycle_L <= bus_cycle;
// generate state signals required to control the sdram host interface
always @(posedge clk_8) begin
// start io transfers clock cycles after bus_cycle 0
// (after the cpu cycle)
writeD <= writeCmd && ((bus_cycle == 3) || writeD);
writeD <= writeCmd && ((bus_cycle_L == 3) || writeD);
writeD2 <= writeD;
readD <= readCmd && ((bus_cycle == 3) || readD);
readD <= readCmd && ((bus_cycle_L == 3) || readD);
readD2 <= readD;
br <= brI;

View File

@@ -1,19 +1,117 @@
// Empty dongle module. Just a placeholder ...
module dongle (
// cpu register interface
input clk,
input sel,
input cpu_as,
input uds,
input rw,
input [14:0] addr,
output[7:0] dout,
input clk,
input sel,
input cpu_as, // cpu_cycle && as
input uds,
input rw,
input [14:0] addr,
output reg [7:0] dout,
output present
output present
);
assign present = 1'b0;
assign dout = 8'h00;
assign present = 1'b0; // 0 = deactivate dongle
// ------------------------------------------------------------------------------------
// ------------------------------------ CUBASE 2 DONGLE -------------------------------
// ------------------------------------------------------------------------------------
reg [15:8] d;
reg [15:8] next_d;
// read
always @(sel, uds, rw, d) begin
dout = 8'd0;
if(sel && ~uds && rw)
dout = d;
end
wire [8:1] a = addr[7:0];
// special addresses:
// a[8:1] = 8'b11011000,0 -> 0x1b0 clear all
// a[8:1] = 8'bxxx00xx0,0 -> a5+a4+a1 = 0 sets all, incl. $0c
// update register in the middle of the transfer
always @(negedge clk) begin
if(cpu_as && ~uds) begin
next_d[15] <= !(( a[8] & a[7] & !a[6] & a[5] & a[4] & !a[3] & !a[2] & !a[1]) |
(!d[15] & !d[14] & !d[13] & !d[12] & !d[11] & d[10] & !d[9] & a[4] ) |
( d[14] & d[12] & d[10] & a[1]) |
( d[13] & !d[10] & a[4] ) |
( !d[14] & !d[10] & a[1]) |
( d[15] & !d[10] & a[4] ) |
( !d[12] & !d[10] & a[1]) |
(!d[8] & a[5] ));
next_d[14] <= !(( a[8] & a[7] & !a[6] & a[5] & a[4] & !a[3] & !a[2] & !a[1]) |
(!d[15] & !d[14] & !d[13] & !d[12] & !d[11] & !d[10] & !d[9] & d[8] & a[4] ) |
( d[14] & d[12] & d[10] & d[8] & a[1]) |
( !d[10] & !d[8] & a[1]) |
( !d[12] & !d[8] & a[1]) |
( d[15] & !d[8] & a[4] ) |
( !d[14] & !d[8] & a[1]) |
(!d[15] & a[5] ));
next_d[13] <= !(( a[8] & a[7] & !a[6] & a[5] & a[4] & !a[3] & !a[2] & !a[1]) |
(d[15]&d[14]&d[13]&d[12]&d[11]&d[10]&d[8]&a[1]) |
(!d[15]&!d[13]&d[11]&a[4]) |
(d[13]&!d[11]&a[4]) |
(!d[12]&!d[11]&a[1]) |
(d[15]&!d[11]&a[4]) |
(!d[14]&!d[11]&a[1]) |
(!d[9]&a[5]));
next_d[12] <= !(( a[8] & a[7] & !a[6] & a[5] & a[4] & !a[3] & !a[2] & !a[1]) |
(d[15]&d[14]&d[13]&d[12]&d[10]&d[8]&a[1]) |
(!d[13]&!d[10]&a[1]) |
(!d[15]&d[13]&a[4]) |
(!d[13]&!d[12]&a[1]) |
(d[15]&!d[13]&a[4]) |
(!d[14]&!d[13]&a[1]) |
(!d[11]&a[5]));
next_d[11] <= !(( a[8] & a[7] & !a[6] & a[5] & a[4] & !a[3] & !a[2] & !a[1]) |
(d[15]&d[14]&d[12]&d[10]&d[8]&a[1]) |
(!d[15]&!d[8]&a[1]) |
(!d[15]&!d[10]&a[1]) |
(!d[15]&!d[12]&a[1]) |
(!d[15]&!d[14]&a[1]) |
(d[15]&a[4]) |
(!d[13]&a[5]));
next_d[10] <= !(( a[8] & a[7] & !a[6] & a[5] & a[4] & !a[3] & !a[2] & !a[1]) |
(d[15]&d[14]&d[13]&d[12]&d[11]&d[10]&d[9]&d[8]&a[1]) |
(!d[15]&!d[13]&!d[11]&d[9]&a[4]) |
(d[11]&!d[9]&a[4]) |
(d[13]&!d[9]&a[4]) |
(d[15]&!d[9]&a[4]) |
(!d[14]&!d[9]&a[1]) |
(!d[14]&a[5]));
next_d[9] <= !(( a[8] & a[7] & !a[6] & a[5] & a[4] & !a[3] & !a[2] & !a[1]) |
(!d[15]&d[14]&!d[13]&!d[11]&!d[9]&a[4]) |
(!d[14]&d[9]&a[4]) |
(!d[14]&d[11]&a[4]) |
(!d[14]&d[13]&a[4]) |
(d[15]&!d[14]&a[4]) |
(d[14]&a[1]) |
(!d[12]&a[5]));
next_d[8] <= !(( a[8] & a[7] & !a[6] & a[5] & a[4] & !a[3] & !a[2] & !a[1]) |
(!d[15]&!d[14]&!d[13]&d[12]&!d[11]&!d[9]&a[4]) |
(d[14]&d[12]&a[1]) |
(!d[12]&d[11]&a[4]) |
(d[13]&!d[12]&a[4]) |
(d[15]&!d[12]&a[4]) |
(!d[14]&!d[12]&a[1]) |
(!d[10]&a[5]));
end
end
always @(posedge clk)
d <= next_d;
endmodule

View File

@@ -136,83 +136,54 @@ reg [7:0] aer, ddr, gpip;
reg [15:0] ipr, ier, imr, isr; // interrupt registers
reg [7:0] vr;
// any pending and not masked interrupt causes the irq line to go high
// if highest_irq_pending != higest_irq_active then there's a high prio
// irq in service and no irq is generated until this one is finished
//assign irq = ((ipr & imr) != 16'h0000) && (highest_irq_active == highest_irq_pending);
// handle pending and in service irqs
//wire [15:0] irq_active_map = (ipr | isr) & imr;
// (i am pretty sure this can be done much more elegant ...)
// check the number of the highest active irq
//wire [3:0] highest_irq_active=
// ( irq_active_map[15] == 1'b1)?4'd15:
// ((irq_active_map[15:14] == 2'b01)?4'd14:
// ((irq_active_map[15:13] == 3'b001)?4'd13:
// ((irq_active_map[15:12] == 4'b0001)?4'd12:
// ((irq_active_map[15:11] == 5'b00001)?4'd11:
// ((irq_active_map[15:10] == 6'b000001)?4'd10:
// ((irq_active_map[15:9] == 7'b0000001)?4'd9:
// ((irq_active_map[15:8] == 8'b00000001)?4'd8:
// ((irq_active_map[15:7] == 9'b000000001)?4'd7:
// ((irq_active_map[15:6] == 10'b000000001)?4'd6:
// ((irq_active_map[15:5] == 11'b0000000001)?4'd5:
// ((irq_active_map[15:4] == 12'b00000000001)?4'd4:
// ((irq_active_map[15:3] == 13'b000000000001)?4'd3:
// ((irq_active_map[15:2] == 14'b0000000000001)?4'd2:
// ((irq_active_map[15:1] == 15'b00000000000001)?4'd1:
// ((irq_active_map[15:0] == 16'b000000000000001)?4'd0:
// 4'd0)))))))))))))));
// generate irq signal if an irq is pending and no other irq of same or higher prio is in service
assign irq = ((ipr & imr) != 16'h0000) && ((isr == 16'h0000) || (highest_irq_pending > irq_in_service));
assign irq = ((ipr & imr) != 16'h0000) && (highest_irq_pending >= irq_in_service);
// check number of current interrupt in service
wire [3:0] irq_in_service =
( isr[15] == 1'b1)?4'd15:
((isr[15:14] == 2'b01)?4'd14:
((isr[15:13] == 3'b001)?4'd13:
((isr[15:12] == 4'b0001)?4'd12:
((isr[15:11] == 5'b00001)?4'd11:
((isr[15:10] == 6'b000001)?4'd10:
((isr[15:9] == 7'b0000001)?4'd9:
((isr[15:8] == 8'b00000001)?4'd8:
((isr[15:7] == 9'b000000001)?4'd7:
((isr[15:6] == 10'b000000001)?4'd6:
((isr[15:5] == 11'b0000000001)?4'd5:
((isr[15:4] == 12'b00000000001)?4'd4:
((isr[15:3] == 13'b000000000001)?4'd3:
((isr[15:2] == 14'b0000000000001)?4'd2:
((isr[15:1] == 15'b00000000000001)?4'd1:
((isr[15:0] == 16'b000000000000001)?4'd0:
4'd0)))))))))))))));
(isr[15] == 1'b1)?4'd15:
(isr[15:14] == 2'b1)?4'd14:
(isr[15:13] == 3'b1)?4'd13:
(isr[15:12] == 4'b1)?4'd12:
(isr[15:11] == 5'b1)?4'd11:
(isr[15:10] == 6'b1)?4'd10:
(isr[15:9] == 7'b1)?4'd9:
(isr[15:8] == 8'b1)?4'd8:
(isr[15:7] == 9'b1)?4'd7:
(isr[15:6] == 10'b1)?4'd6:
(isr[15:5] == 11'b1)?4'd5:
(isr[15:4] == 12'b1)?4'd4:
(isr[15:3] == 13'b1)?4'd3:
(isr[15:2] == 14'b1)?4'd2:
(isr[15:1] == 15'b1)?4'd1:
(isr[15:0] == 16'b1)?4'd0:
4'd0;
wire [15:0] irq_pending_map = ipr & imr;
// check the number of the highest pending irq
wire [3:0] highest_irq_pending =
( irq_pending_map[15] == 1'b1)?4'd15:
((irq_pending_map[15:14] == 2'b01)?4'd14:
((irq_pending_map[15:13] == 3'b001)?4'd13:
((irq_pending_map[15:12] == 4'b0001)?4'd12:
((irq_pending_map[15:11] == 5'b00001)?4'd11:
((irq_pending_map[15:10] == 6'b000001)?4'd10:
((irq_pending_map[15:9] == 7'b0000001)?4'd9:
((irq_pending_map[15:8] == 8'b00000001)?4'd8:
((irq_pending_map[15:7] == 9'b000000001)?4'd7:
((irq_pending_map[15:6] == 10'b000000001)?4'd6:
((irq_pending_map[15:5] == 11'b0000000001)?4'd5:
((irq_pending_map[15:4] == 12'b00000000001)?4'd4:
((irq_pending_map[15:3] == 13'b000000000001)?4'd3:
((irq_pending_map[15:2] == 14'b0000000000001)?4'd2:
((irq_pending_map[15:1] == 15'b00000000000001)?4'd1:
((irq_pending_map[15:0] == 16'b000000000000001)?4'd0:
4'd0)))))))))))))));
(irq_pending_map[15] == 1'b1)?4'd15:
(irq_pending_map[15:14] == 2'b1)?4'd14:
(irq_pending_map[15:13] == 3'b1)?4'd13:
(irq_pending_map[15:12] == 4'b1)?4'd12:
(irq_pending_map[15:11] == 5'b1)?4'd11:
(irq_pending_map[15:10] == 6'b1)?4'd10:
(irq_pending_map[15:9] == 7'b1)?4'd9:
(irq_pending_map[15:8] == 8'b1)?4'd8:
(irq_pending_map[15:7] == 9'b1)?4'd7:
(irq_pending_map[15:6] == 10'b1)?4'd6:
(irq_pending_map[15:5] == 11'b1)?4'd5:
(irq_pending_map[15:4] == 12'b1)?4'd4:
(irq_pending_map[15:3] == 13'b1)?4'd3:
(irq_pending_map[15:2] == 14'b1)?4'd2:
(irq_pending_map[15:1] == 15'b1)?4'd1:
(irq_pending_map[15:0] == 16'b1)?4'd0:
4'd0;
// gpip as output to the cpu (ddr bit == 1 -> gpip pin is output)
wire [7:0] gpip_cpu_out = (i & ~ddr) | (gpip & ddr);
// cpu read interface
always @(iack, sel, ds, rw, addr, gpip_cpu_out, aer, ddr, ier, ipr, isr, imr,
vr, serial_data_out_fifo_full, timera_dat_o, timerb_dat_o,
@@ -234,7 +205,7 @@ always @(iack, sel, ds, rw, addr, gpip_cpu_out, aer, ddr, ier, ipr, isr, imr,
if(addr == 5'h08) dout = isr[7:0];
if(addr == 5'h0a) dout = imr[7:0];
if(addr == 5'h0b) dout = vr;
// timers
if(addr == 5'h0c) dout = { 3'b000, timera_ctrl_o};
if(addr == 5'h0d) dout = { 3'b000, timerb_ctrl_o};
@@ -248,36 +219,41 @@ always @(iack, sel, ds, rw, addr, gpip_cpu_out, aer, ddr, ier, ipr, isr, imr,
if(addr == 5'h16) dout = serial_data_out_fifo_full?8'h00:8'h80;
end else if(iack) begin
dout = { vr[7:4], highest_irq_pending };
dout = irq_vec;
end
end
// delay inputs to detect changes
reg [7:0] iD, iD2;
// mask of input irqs which are overwritten by timer a/b inputs
wire [7:0] ti_irq_mask = { 3'b000, pulse_mode, 3'b000};
wire [7:0] ti_irq = { 3'b000, t_i[0], t_i[1], 3'b000};
// delay inputs to detect changes
reg [7:0] iD, iD2;
reg iackD;
always @(posedge clk) begin
// latch to keep irq vector stable during irq ack cycle
reg [7:0] irq_vec;
always @(negedge clk) begin
iackD <= iack;
// update the irq vector periodically unless we are in the
// middle of an interrupt acknowledge phase
if(!iack)
irq_vec <= { vr[7:4], highest_irq_pending };
// delay inputs for irq generation, apply aer (irq edge)
iD <= aer ^ ((i & ~ti_irq_mask) | (ti_irq & ti_irq_mask));
iD2 <= iD;
end
always @(negedge clk) begin
if(reset) begin
ipr <= 16'h0000; ier <= 16'h0000;
imr <= 16'h0000; isr <= 16'h0000;
writePout <= 0;
end else begin
end else begin
// ack pending irqs and set isr if enabled
if(iackD) begin
if(iack && !iackD) begin
// remove active bit from ipr
ipr[highest_irq_pending] <= 1'b0;
@@ -319,6 +295,7 @@ always @(negedge clk) begin
if(addr == 5'h06) ipr[7:0] <= ipr[7:0] & din;
if(addr == 5'h08) isr[7:0] <= isr[7:0] & din; // zero bits are cleared
if(addr == 5'h0a) imr[7:0] <= din;
if(addr == 5'h0b) vr <= din;

View File

@@ -57,11 +57,19 @@ always @(posedge XCLK_I) begin
prescaler_counter <= prescaler_counter + 8'd1;
end
end
// pulse is generate in rising edge and detected in main mfp on falling edge
always @(posedge CLK) begin
T_O_PULSE <= 1'b0;
if (!RST && count && (down_counter === 8'd1))
T_O_PULSE <= 1'b1;
end
always @(negedge CLK) begin
if (RST === 1'b1) begin
T_O_PULSE <= 1'b0;
// T_O_PULSE <= 1'b0;
T_O <= 1'b0;
control <= 4'd0;
data <= 8'd0;
@@ -76,7 +84,7 @@ always @(negedge CLK) begin
xclk_r <= (prescaler_counter === 8'd0);
xclk_r2 <= xclk_r;
T_O_PULSE <= 1'b0;
// T_O_PULSE <= 1'b0;
// if a write request comes from the main unit
// then write the data to the appropriate register.
@@ -118,7 +126,7 @@ always @(negedge CLK) begin
// pulse the timer out
T_O <= ~T_O;
down_counter <= data;
T_O_PULSE <= 1'b1;
// T_O_PULSE <= 1'b1;
end else begin

View File

@@ -1,16 +1,15 @@
/********************************************/
/* */
/********************************************/
module mist_top (
// clock inputsxque
input wire [ 2-1:0] CLOCK_27, // 27 MHz
// LED outputs
output wire LED, // LED Yellow
// UART
output wire UART_TX, // UART Transmitter
input wire UART_RX, // UART Receiver
output wire UART_TX, // UART Transmitter (MIDI out)
input wire UART_RX, // UART Receiver (MIDI in)
// VGA
output wire VGA_HS, // VGA H_SYNC
output wire VGA_VS, // VGA V_SYNC
@@ -65,7 +64,7 @@ wire io_dtack = vreg_sel || mmu_sel || mfp_sel || mfp_iack ||
// required to properly detect that a blitter is not present.
// a bus error is now generated once no dtack is seen for 63 clock cycles.
wire tg68_clr_berr;
wire tg68_berr = (dtack_timeout == 3'd7);
wire tg68_berr = (dtack_timeout == 4'd15);
// count bus errors for debugging purposes. we can thus trigger for a
// certain bus error
@@ -86,27 +85,35 @@ always @(posedge clk_8) begin
end
end
reg [2:0] dtack_timeout;
reg bus_ok, cpu_cycle_L;
always @(negedge clk_8) begin
// bus error if cpu owns bus, but no dtack, nor ram access,
// nor fast cpu cycle
bus_ok <= tg68_dtack || br || cpu2mem || cpu_fast_cycle;
cpu_cycle_L <= cpu_cycle;
end
reg [3:0] dtack_timeout;
always @(posedge clk_8) begin
if(reset || tg68_clr_berr) begin
dtack_timeout <= 3'd0;
dtack_timeout <= 4'd0;
end else begin
if(cpu_cycle) begin
if(cpu_cycle_L) begin
// timeout only when cpu owns the bus and when
// neither dtack nor another bus master are active
// also cacheable areas should never generate a
// bus error (TODO: check for write on first eight words)
if(dtack_timeout != 3'd7) begin
if(!tg68_dtack || br || tg68_as || cacheable)
dtack_timeout <= 3'd0;
// also ram areas should never generate a
// bus error for reading. But rom does for writing
if(dtack_timeout != 4'd15) begin
if(bus_ok)
dtack_timeout <= 4'd0;
else
dtack_timeout <= dtack_timeout + 3'd1;
dtack_timeout <= dtack_timeout + 4'd1;
end
end
end
end
// no tristate busses exist inside the FPGA. so bus request doesn't do
// much more than halting the cpu by suppressing dtack
`define BRWIRE
@@ -120,21 +127,20 @@ always @(negedge clk_8)
`endif
// request interrupt ack from mfp for IPL == 6
wire mfp_iack = cpu_cycle && cpu2iack && address_strobe && (tg68_adr[3:1] == 3'b110);
wire mfp_iack = cpu_cycle && cpu2iack && tg68_as && (tg68_adr[3:1] == 3'b110);
// the tg68k core with the wrapper of the minimig doesn't support non-autovector
// interrupts. Also the existing support for them inside the tg68 kernel is/was broken.
// For the atari i've fixed the non-autovector support inside the kernel and switched
// entirely to non-autovector interrupts. This means that i now have to provide
// the tg68k core doesn't reliably support mixed usage of autovector and non-autovector
// interrupts.
// For the atari we've fixed the non-autovector support inside the kernel and switched
// entirely to non-autovector interrupts. This means that we now have to provide
// the vectors for those interrupts that oin the ST are autovector ones. This needs
// to be done for IPL2 (hbi) and IPL4 (vbi)
wire auto_iack = cpu_cycle && cpu2iack && address_strobe &&
((tg68_adr[3:1] == 3'b100) || (tg68_adr[3:1] == 3'b010));
wire auto_iack = cpu_cycle && cpu2iack && tg68_as &&
((tg68_adr[3:1] == 3'b100) || (tg68_adr[3:1] == 3'b010));
wire [7:0] auto_vector_vbi = (auto_iack && (tg68_adr[3:1] == 3'b100))?8'h1c:8'h00;
wire [7:0] auto_vector_hbi = (auto_iack && (tg68_adr[3:1] == 3'b010))?8'h1a:8'h00;
wire [7:0] auto_vector = auto_vector_vbi | auto_vector_hbi;
// interfaces not implemented:
// $fff00000 - $fff000ff - IDE
// $ffff8780 - $ffff878f - SCSI
@@ -142,10 +148,10 @@ wire [7:0] auto_vector = auto_vector_vbi | auto_vector_hbi;
// $fffffc20 - $fffffc3f - RTC
// $ffff8e00 - $ffff8e0f - VME (only fake implementation)
wire io_sel = cpu_cycle && cpu2io && address_strobe ;
wire io_sel = cpu_cycle && cpu2io && tg68_as ;
// dongle interface at $fb0000 - $fbffff
wire dongle_sel = dongle_present && cpu_cycle && address_strobe && tg68_rw && (tg68_adr[23:16] == 8'hfb);
wire dongle_sel = dongle_present && cpu_cycle && tg68_as && tg68_rw && (tg68_adr[23:16] == 8'hfb);
wire [7:0] dongle_data_out;
// mmu 8 bit interface at $ff8000 - $ff8001
@@ -176,7 +182,7 @@ wire [15:0] ste_dma_snd_data_out;
// mfp 8 bit interface at $fffa00 - $fffa3f
wire mfp_sel = io_sel && ({tg68_adr[15:6], 6'd0} == 16'hfa00);
wire [7:0] mfp_data_out;
// acia 8 bit interface at $fffc00 - $fffc07
wire acia_sel = io_sel && ({tg68_adr[15:8], 8'd0} == 16'hfc00);
wire [7:0] acia_data_out;
@@ -203,9 +209,9 @@ wire [15:0] io_data_out = vreg_data_out | dma_data_out | blitter_data_out |
wire init = ~pll_locked;
video video (
.clk (clk_32 ),
.clk (clk_32 ),
.clk27 (CLOCK_27[0]),
.bus_cycle (bus_cycle ),
.bus_cycle (bus_cycle ),
// spi for OSD
.sdi (SPI_DI ),
@@ -337,12 +343,27 @@ acia acia (
wire [23:1] blitter_master_addr;
wire blitter_master_write;
wire blitter_master_read;
wire blitter_br;
wire blitter_irq;
wire [15:0] blitter_master_data_out;
wire blitter_br = blitter_br_out;
wire blitter_bg = 1'b1;
//wire blitter_bg = blitter_br;
//reg blitter_br;
//always @(posedge clk_128) begin
// if(blitter_br_out && (tg68_busstate == 2'd0))
// blitter_br <= 1'b1;
// else if(!blitter_br_out)
// blitter_br <= 1'b0;
//end
wire blitter_br_out;
blitter blitter (
.bus_cycle (bus_cycle_8 ),
.bus_cycle (bus_cycle ),
// cpu interface
.clk (clk_8 ),
@@ -362,7 +383,8 @@ blitter blitter (
.bm_data_in (ram_data_out),
.br_in (data_io_br ),
.br_out (blitter_br ),
.br_out (blitter_br_out ),
.bg (blitter_bg ),
.irq (blitter_irq ),
.turbo (steroids )
@@ -386,7 +408,7 @@ dongle dongle (
.sel (dongle_sel ),
.present (dongle_present),
.addr (tg68_adr[15:1]),
.cpu_as (address_strobe),
.cpu_as (cpu_cycle && tg68_as && !br),
.uds (tg68_uds ),
.rw (tg68_rw ),
.dout (dongle_data_out)
@@ -552,26 +574,22 @@ clock clock (
);
//// 8MHz clock ////
wire [3:0] bus_cycle;
reg [3:0] clk_cnt;
reg [1:0] bus_cycle_8;
reg [1:0] bus_cycle;
always @ (posedge clk_32, negedge pll_locked) begin
if (!pll_locked) begin
clk_cnt <= #1 4'b0010;
bus_cycle_8 <= 2'd3;
bus_cycle <= 2'd0;
end else begin
clk_cnt <= #1 clk_cnt + 4'd1;
if(clk_cnt[1:0] == 2'd2) begin
bus_cycle_8 <= bus_cycle_8 + 2'd1;
if(clk_cnt[1:0] == 2'd1) begin
bus_cycle <= bus_cycle + 2'd1;
end
end
end
assign clk_8 = clk_cnt[1];
assign bus_cycle = clk_cnt - 4'd2;
// bus cycle counter for debugging
reg [31:0] cycle_counter /* synthesis noprune */;
@@ -583,62 +601,55 @@ always @ (posedge clk_8) begin
end
// tg68
// tg68 bus interface. These are the signals which are latched
// for the 8MHz bus.
wire [15:0] tg68_dat_in;
reg [15:0] tg68_dat_out;
reg [31:0] tg68_adr;
wire [2:0] tg68_IPL;
wire tg68_dtack;
reg tg68_as;
reg tg68_uds;
reg tg68_lds;
reg tg68_rw;
reg [2:0] tg68_fc;
wire reset = system_ctrl[0];
// ------------- generate VBI (IPL = 4) --------------
wire vbi_ack;
assign vbi_ack = cpu2iack && address_strobe && (tg68_adr[3:1] == 3'b100);
reg vsD, vsD2, vbi;
wire vbi_ack = cpu2iack && cpu_cycle && tg68_as && (tg68_adr[3:1] == 3'b100);
reg vsD, vsD2, vsI, vbi;
always @(negedge clk_8)
always @(negedge clk_8) begin
vsD <= st_vs;
always @(posedge clk_8) begin
vsD2 <= vsD; // delay by one
vsI <= vsD && !vsD2; // create single event
if(reset || vbi_ack)
vbi <= 1'b0;
else if(vsI)
else if(vsD && !vsD2)
vbi <= 1'b1;
end
// ------------- generate HBI (IPL = 2) --------------
wire hbi_ack;
assign hbi_ack = cpu2iack && address_strobe && (tg68_adr[3:1] == 3'b010);
reg hsD, hsD2, hbi;
wire hbi_ack = cpu2iack && cpu_cycle && tg68_as && (tg68_adr[3:1] == 3'b010);
reg hsD, hsD2, hsI, hbi;
always @(negedge clk_8)
always @(negedge clk_8) begin
hsD <= st_hs;
always @(posedge clk_8) begin
hsD2 <= hsD; // delay by one
hsI <= hsD && !hsD2; // create single event
if(reset || hbi_ack)
hbi <= 1'b0;
else if(hsI)
else if(hsD && !hsD2)
hbi <= 1'b1;
end
wire mfp_irq;
reg [2:0] ipl;
always @(posedge clk_8) begin
if(reset) begin
ipl <= 3'b111;
end else begin
reg [2:0] ipl;
always @(posedge clk_128) begin
if(reset)
ipl <= 3'b111;
else begin
// ipl[0] is tied high on the atari
if(mfp_irq) ipl <= 3'b001; // mfp has IPL 6
else if(vbi) ipl <= 3'b011; // vbi has IPL 4
@@ -651,19 +662,14 @@ end
/* ------------------------------ TG68 CPU interface ---------------------- */
/* -------------------------------------------------------------------------- */
// signal indicating that the cpu is making use of the current 8mhz cycle
// this means that the cpu owns the bus and either a normal bus cycle
// ends or a bus error has happened.
wire cpu_uses_8mhz_cycle = cpu_cycle && !br && (!tg68_dtack || tg68_berr || (tg68_busstate == 2'b01));
wire cpu_req_bus = !(tg68_busstate == 2'b01);
// the 128 Mhz cpu clock is gated by clkena. Since the CPU cannot run at full 128MHz
// speed a certain amount of idle cycles have to be inserted between two subsequent
// cpu clocks. This idle time is implemented using the cpu_throttle counter.
reg [3:0] cpu_throttle;
reg clkena;
reg iCacheStore;
reg dCacheStore;
reg cacheUpdate;
reg cacheRead;
reg cpuDoes8MhzCycle;
@@ -672,16 +678,16 @@ reg cpuDoes8MhzCycle;
// requirements
wire [15:0] tg68_dat_out_S;
wire [31:0] tg68_adr_S;
wire [2:0] tg68_fc_S;
wire tg68_uds_S;
wire tg68_lds_S;
wire tg68_rw_S;
reg address_strobe; // should be "cpu_active" oe similar
reg tg68_as;
reg cpu_fast_cycle; // signal indicating that the cpu runs from cache
always @(posedge clk_8) begin
// tg68 core does not provide a as signal, so we generate it
tg68_as <= ~(tg68_busstate != 2'b01);
address_strobe <= cpu_cycle_is_next && !tg68_as && !br;
tg68_as <= (tg68_busstate != 2'b01) && !br;
// all other output signals are simply latched to make sure
// they don't change within a 8Mhz cycle even if the CPU
@@ -692,6 +698,7 @@ always @(posedge clk_8) begin
tg68_uds <= tg68_uds_S;
tg68_lds <= tg68_lds_S;
tg68_rw <= tg68_rw_S;
tg68_fc <= tg68_fc_S;
end
// the CPU throttle counter limits the CPU speed to a rate the tg68 core can
@@ -701,7 +708,7 @@ end
localparam CPU_THROTTLE = 4'd5;
reg [3:0] clkcnt;
reg [15:0] cacheReadLatch;
reg trigger /* synthesis noprune */;
always @(posedge clk_128) begin
// count 0..15 within a 8MHz cycle
@@ -712,44 +719,69 @@ always @(posedge clk_128) begin
// default: cpu does not run
clkena <= 1'b0;
iCacheStore <= 1'b0;
dCacheStore <= 1'b0;
cacheUpdate <= 1'b0;
trigger <= 1'b0;
// assume the cpu uses the following 8 Mhz cycles
if(clkcnt == 15)
// cpuDoes8MhzCycle has same timing as tg68_as
if(clkcnt == 15) begin
cpuDoes8MhzCycle <= 1'b1;
cpu_fast_cycle <= 1'b0;
end
// evaluate cache one cycle before cpu is allowed to access the bus again
// to make sure cache signals are routed to the cpu if the cpu is supposed
// to use it
if(cpu_throttle == 4'd1) // tg68_busstate[0] == 0 -> cpu read access
if(!br && steroids && (tg68_busstate[0] == 1'b0) && cache_hit)
cacheRead <= 1'b1;
if(clkena)
cacheRead <= 1'b0;
// only run cpu if throttle counter has run down
if((cpu_throttle == 4'd0) && !reset) begin
cacheRead <= 1'b0;
// cpu does internal processing -> let it do this immediately
// don't let this happen in the cpu cycle as this may result in a
// read/write state which suddenly happens right in the middle of
// the ongoing cpu cycle
if(tg68_busstate == 2'b01) begin
// cpu wants to read and the requested data is available from the cache -> run immediately
if((tg68_busstate == 2'b01) || cacheRead) begin
clkena <= 1'b1;
cpu_throttle <= CPU_THROTTLE;
cpuDoes8MhzCycle <= 1'b0;
end else if( !br && steroids && (tg68_busstate == 2'b00) && cache_hit && cacheable) begin
clkena <= 1'b1;
cacheRead <= 1'b1;
cacheReadLatch <= cache_data_out;
cpu_throttle <= CPU_THROTTLE;
cpuDoes8MhzCycle <= 1'b0;
end begin
cpu_fast_cycle <= 1'b1;
end else begin
// this ends a normal 8MHz bus cycle. This requires that the
// cpu/chipset had the entire cycle and not e.g. started just in
// the middle. This is verified using the puDoes8MhzCycle signal
// which is invalidated whenever the cpu uses a
// the middle. This is verified using the cpuDoes8MhzCycle signal
// which is invalidated whenever the cpu uses a internal cycle or
// runs from cache
// clkcnt == 14 -> clkena in cycle 15 -> cpu runs in cycle 15
if(clkcnt == 13) begin
if(cpu_uses_8mhz_cycle && cpuDoes8MhzCycle) begin
clkena <= 1'b1;
cpu_throttle <= CPU_THROTTLE;
// update cache on cpu instruction read
if((clkcnt == 13) && cpuDoes8MhzCycle && cpu_cycle && !br && (tg68_dtack || tg68_berr)) begin
clkena <= 1'b1;
cpu_throttle <= CPU_THROTTLE;
cpuDoes8MhzCycle <= 1'b0;
// ---------- cache debugging ---------------
// if the cache reports a hit, it should be the same data that's also
// returned by ram. Otherwise the cache is broken
// if(cache_hit && (tg68_busstate[0] == 1'b0)) begin
// if(cache_data_out != system_data_out)
// trigger <= 1'b1;
// end
if(cacheable && tg68_dtack) begin
// store data in instruction cache on cpu instruction read
if(tg68_busstate == 2'b00)
iCacheStore <= 1'b1;
// store data in data cache on cpu data read
if(tg68_busstate == 2'b10)
dCacheStore <= 1'b1;
// update cache on data write
if(tg68_busstate == 2'b11)
cacheUpdate <= 1'b1;
end
end
@@ -758,10 +790,14 @@ always @(posedge clk_128) begin
cpu_throttle <= cpu_throttle - 4'd1;
end
// TODO: generate cacheUpdate from ram_wr, so other bus masters also trigger this
// same goes for cache lds/uds and the address used for update16 !!!!
wire [1:0] tg68_busstate;
// feed data from cache into the cpu
wire [15:0] cpu_data_in = cacheRead?cacheReadLatch:system_data_out;
wire [15:0] cache_data_out = data_cache_hit?data_cache_data_out:inst_cache_data_out;
wire [15:0] cpu_data_in = cacheRead?cache_data_out:system_data_out;
TG68KdotC_Kernel #(2,2,2,2,2,2) tg68k (
.clk (clk_128 ),
@@ -769,7 +805,7 @@ TG68KdotC_Kernel #(2,2,2,2,2,2) tg68k (
.clkena_in (clkena ),
.data_in (cpu_data_in ),
.IPL (ipl ),
.IPL_autovector (1'b0 ),
.IPL_autovector(1'b0 ),
.berr (tg68_berr ),
.clr_berr (tg68_clr_berr ),
.CPU (system_ctrl[5:4] ), // 00=68000
@@ -780,13 +816,15 @@ TG68KdotC_Kernel #(2,2,2,2,2,2) tg68k (
.nWr (tg68_rw_S ),
.busstate (tg68_busstate ), // 00-> fetch code 10->read data 11->write data 01->no memaccess
.nResetOut ( ),
.FC ( )
.FC (tg68_fc_S )
);
/* ------------------------------------------------------------------------------ */
/* ---------------------------------- cpu cache --------------------------------- */
/* ------------------------------------------------------------------------------ */
wire cache_hit = cacheable && (data_cache_hit || inst_cache_hit);
// Any type of memory that may use a cache. Since it's a pure read cache we don't
// have to differentiate between ram and rom. Cartridge is not cached since me might
// attach dongles there someday
@@ -797,14 +835,11 @@ wire cacheable = ((tg68_adr_S[23:22] == 2'b00) || // ordinary 4MB
(tg68_adr_S[23:18] == 6'b111000) || // 256k TOS
(tg68_adr_S[23:17] == 7'b1111110) || // first 128k of 192k TOS
(tg68_adr_S[23:16] == 8'b11111110) ); // second 64k of 192k TOS
wire cache_hit /* synthesis keep */;
wire [15:0] cache_data_out /* synthesis keep */;
wire data_cache_hit;
wire [15:0] data_cache_data_out;
wire tg68_rd = (tg68_busstate != 2'b01) && !tg68_rw_S;
wire tg68_wr = (tg68_busstate != 2'b01) && tg68_rw_S;
cache cache (
cache data_cache (
.clk_128 ( clk_128 ),
.clk_8 ( clk_8 ),
.reset ( reset ),
@@ -812,18 +847,47 @@ cache cache (
// use the tg68_*_S signals here to quickly react on cpu requests
.addr ( tg68_adr_S[23:1] ),
// .ds ( { tg68_uds_S, tg68_lds_S } ),
.wr ( tg68_wr ),
.rd ( tg68_rd ),
.ds ( { ~tg68_lds_S, ~tg68_uds_S } ),
// at the same time the 8mhz ram access is required to monitor
// cpu accesses to the ram as well as other bus masters
.hit ( cache_hit ),
.dout ( cache_data_out ),
// the interface to the cpus read interface is pretty simple
.hit ( data_cache_hit ),
.dout ( data_cache_data_out ),
.update64 ( cacheUpdate ),
// interface to update entire cache lines on ram read
.store ( dCacheStore ),
.din64 ( ram_data_out_64 ),
// this is a write through cache. Thus the cpus write access to ram
// is not intercepted but only used to update matching cache lines
.update ( cacheUpdate ),
.din16 ( ram_data_in )
);
wire inst_cache_hit;
wire [15:0] inst_cache_data_out;
cache instruction_cache (
.clk_128 ( clk_128 ),
.clk_8 ( clk_8 ),
.reset ( reset ),
.flush ( br ),
// use the tg68_*_S signals here to quickly react on cpu requests
.addr ( tg68_adr_S[23:1] ),
.ds ( { ~tg68_lds_S, ~tg68_uds_S } ),
// the interface to the cpus read interface is pretty simple
.hit ( inst_cache_hit ),
.dout ( inst_cache_data_out ),
// interface to update entire cache lines on ram read
.store ( iCacheStore ),
.din64 ( ram_data_out_64 ),
// this is a write through cache. Thus the cpus write access to ram
// is not intercepted but only used to update matching cache lines
.update ( cacheUpdate ),
.din16 ( ram_data_in )
);
/* ------------------------------------------------------------------------------ */
@@ -872,11 +936,11 @@ wire cpu2mem = cpu2ram14 || (tg68_rw && (cpu2tos192k || cpu2tos256k || cpu2cart)
// io from 0xff0000
wire cpu2io = (tg68_adr[23:16] == 8'hff);
// irq ack happens on 0xfffffX
wire cpu2iack = (tg68_adr[23:4] == 20'hfffff);
// irq ack happens
wire cpu2iack = (tg68_fc == 3'b111);
// generate dtack (for st ram only and rom, no dtack for rom write)
assign tg68_dtack = ~(((cpu2mem && address_strobe) || io_dtack ) && !br);
// generate dtack (for st ram and rom on read, no dtack for rom write)
assign tg68_dtack = ((cpu2mem && cpu_cycle && tg68_as) || io_dtack ) && !br;
/* ------------------------------------------------------------------------------ */
/* ------------------------------- bus multiplexer ------------------------------ */
@@ -888,16 +952,11 @@ wire second_cpu_slot = (mste && enable_16mhz) || steroids;
// Two of the four cycles are being used. One for video (+STE audio) and one for
// cpu, DMA and Blitter. A third is optionally being used for faster CPU
wire video_cycle = (bus_cycle[3:2] == 0);
wire cpu_cycle = (bus_cycle[3:2] == 1) || (second_cpu_slot && (bus_cycle[3:2] == 3));
// if things are to be latched is usually required to know what type the next cycle
// will be
wire video_cycle_is_next = (bus_cycle[3:2] == 3);
wire cpu_cycle_is_next = (bus_cycle[3:2] == 0) || (second_cpu_slot && (bus_cycle[3:2] == 2));
wire video_cycle = (bus_cycle == 0);
wire cpu_cycle = (bus_cycle == 1) || (second_cpu_slot && (bus_cycle == 3));
// ----------------- RAM address --------------
wire [22:0] video_cycle_addr = (st_hs&&ste)?ste_dma_snd_addr:video_address;
wire [22:0] video_cycle_addr = (st_hs && ste)?ste_dma_snd_addr:video_address;
wire [22:0] cpu_cycle_addr = data_io_br?data_io_addr:(blitter_br?blitter_master_addr:tg68_adr[23:1]);
wire [22:0] ram_address = video_cycle?video_cycle_addr:cpu_cycle_addr;
@@ -905,12 +964,12 @@ wire [22:0] ram_address = video_cycle?video_cycle_addr:cpu_cycle_addr;
// memory access during the video cycle is shared between video and ste_dma_snd
wire video_cycle_oe = (st_hs && ste)?ste_dma_snd_read:video_read;
// memory access during the cpu cycle is shared between blitter and cpu
wire cpu_cycle_oe = data_io_br?data_io_read:(blitter_br?blitter_master_read:(address_strobe && tg68_rw && cpu2mem));
wire cpu_cycle_oe = data_io_br?data_io_read:(blitter_br?blitter_master_read:(cpu_cycle && tg68_as && tg68_rw && cpu2mem));
wire ram_oe = video_cycle?video_cycle_oe:(cpu_cycle?cpu_cycle_oe:1'b0);
// ----------------- RAM write -----------------
wire video_cycle_wr = 1'b0;
wire cpu_cycle_wr = data_io_br?data_io_write:(blitter_br?blitter_master_write:(address_strobe && ~tg68_rw && cpu2ram));
wire cpu_cycle_wr = data_io_br?data_io_write:(blitter_br?blitter_master_write:(cpu_cycle && tg68_as && ~tg68_rw && cpu2ram));
wire ram_wr = video_cycle?video_cycle_wr:(cpu_cycle?cpu_cycle_wr:1'b0);
wire [15:0] ram_data_out;
@@ -921,27 +980,19 @@ wire [15:0] ram_data_in = data_io_br?data_io_dout:(blitter_br?blitter_master_dat
wire ram_uds = video_cycle?1'b1:((blitter_br||data_io_br)?1'b1:~tg68_uds);
wire ram_lds = video_cycle?1'b1:((blitter_br||data_io_br)?1'b1:~tg68_lds);
assign SDRAM_CKE = 1'b1;
// sdram controller has 64 bit output
wire [63:0] ram_data_out_64;
// latch lowest address bits for 64 bit word decomposition at the
// begin of a cpu cycle
//reg [1:0] ram_word_sel;
//always @(posedge clk_8)
// ram_word_sel <= cpu_cycle_addr[1:0];
wire [1:0] ram_word_sel = cpu_cycle_addr[1:0] /* synthesis keep */;
// select right word of 64 bit ram output for those devices that only want 16 bits
// this is only used for the cpu and other bus masters opoerating within the cpu
// cycle but neither video nor dma audio use it. They are both fed with 64 bits
assign ram_data_out =
(ram_word_sel == 2'd0)?ram_data_out_64[15:0]:
((ram_word_sel == 2'd1)?ram_data_out_64[31:16]:
((ram_word_sel == 2'd2)?ram_data_out_64[47:32]:
ram_data_out_64[63:48]));
(cpu_cycle_addr[1:0] == 2'd0)?ram_data_out_64[15:0]:
((cpu_cycle_addr[1:0] == 2'd1)?ram_data_out_64[31:16]:
((cpu_cycle_addr[1:0] == 2'd2)?ram_data_out_64[47:32]:
ram_data_out_64[63:48]));
assign SDRAM_CKE = 1'b1;
sdram sdram (
// interface to the MT48LC16M16 chip
@@ -1020,12 +1071,12 @@ wire [22:0] data_io_addr;
wire [15:0] data_io_dout;
wire data_io_write, data_io_read;
wire data_io_br;
data_io data_io (
// system control
.clk_8 (clk_8 ),
.reset (init ),
.bus_cycle (bus_cycle[3:2]),
.bus_cycle (bus_cycle ),
.ctrl_out (system_ctrl ),
// spi

View File

@@ -155,7 +155,11 @@ always @(posedge clk_128) begin
sd_cmd <= CMD_ACTIVE;
sd_addr <= { 1'b0, addr[19:8] };
sd_ba <= addr[21:20];
sd_dqm <= ~ds;
// always return both bytes in a read. The cpu may not
// need it, but the caches need to be able to store everything
if(!we) sd_dqm <= 2'b00;
else sd_dqm <= ~ds;
// lowest address for burst read
burst_addr <= addr[1:0];

View File

@@ -35,7 +35,7 @@ module ste_dma_snd (
// memory interface
input clk32, // 31.875 MHz
input [3:0] bus_cycle, // bus-cycle
input [1:0] bus_cycle, // bus-cycle
input hsync, // to synchronize with video
output read,
output [22:0] saddr,
@@ -48,9 +48,28 @@ module ste_dma_snd (
output xsint,
output xsint_d
);
// ---------------------------------------------------------------------------
// --------------------------- internal state counter ------------------------
// ---------------------------------------------------------------------------
reg [1:0] t /* synthesis noprune */ ;
always @(posedge clk32) begin
// 32Mhz counter synchronous to 8 Mhz clock
// force counter to pass state 0 exactly after the rising edge of clk (8Mhz)
if(((t == 2'd3) && ( clk == 0)) ||
((t == 2'd0) && ( clk == 1)) ||
((t != 2'd3) && (t != 2'd0)))
t <= t + 2'd1;
end
// create internal bus_cycle signal which is stable on the positive clock
// edge and extends the previous state by half a 32 Mhz clock cycle
reg [3:0] bus_cycle_L;
always @(negedge clk32)
bus_cycle_L <= { bus_cycle, t };
assign saddr = snd_adr; // drive data
assign read = (bus_cycle[3:2] == 0) && hsync && !fifo_full && dma_enable;
assign read = (bus_cycle == 0) && hsync && !fifo_full && dma_enable;
// ---------------------------------------------------------------------------
// ------------------------------ clock generation ---------------------------
@@ -322,7 +341,8 @@ always @(posedge clk32) begin
frame_done <= (snd_adr == snd_end_latched-23'd1);
// fifo not full? read something during hsync using the video cycle
if((!fifo_full) && hsync && (bus_cycle == 3)) begin
// bus_cycle_L = 3 is the end of the video cycle
if((!fifo_full) && hsync && (bus_cycle_L == 3)) begin
if(snd_adr != snd_end_latched) begin
// read right word from ram using the 64 bit memory interface

View File

@@ -20,15 +20,7 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// TODO:
// - async timing
// Overscan:
// http://codercorner.com/fullscrn.txt
// Examples: automation 000 + 001 + 097: bottom border
// automation 196: top + bottom border
// Todo STE:
// Implemented STE features
// http://alive.atari.org/alive12/ste_hwsc.php
// http://atari-ste.anvil-soft.com/html/devdocu2.htm
// + 3*4 bit palette (4096 colors)
@@ -36,13 +28,15 @@
// + video counter writeable
// + pixel offset
// + line offset
// - undocumented 16 pixel "line offset overscan"
// + botton overscan
// + top overscan (this is really an unreliable hack which barely works for obsession pinball)
// + undocumented 16 pixel "line offset overscan"
module video (
// system interface
input clk, // 31.875 MHz
input clk27, // 27.000 Mhz
input [3:0] bus_cycle, // bus-cycle for sync
input [1:0] bus_cycle, // bus-cycle for sync
// SPI interface for OSD
input sck,
@@ -79,9 +73,9 @@ module video (
input ste, // enable STE featurss
// signals not affected by scan doubler for internal use like irqs
output st_de,
output st_vs,
output st_hs
output st_de,
output reg st_vs,
output reg st_hs
);
localparam LINE_WIDTH = 10'd640;
@@ -92,22 +86,71 @@ localparam STATE_BLANK = 2'd1;
localparam STATE_BORDER = 2'd2;
localparam STATE_DISP = 2'd3;
// ---------------------------------------------------------------------------
// --------------------------- internal state counter ------------------------
// ---------------------------------------------------------------------------
reg [1:0] t;
always @(posedge clk) begin
// 32Mhz counter synchronous to 8 Mhz clock
// force counter to pass state 0 exactly after the rising edge of clk_reg (8Mhz)
if(((t == 2'd3) && ( reg_clk == 0)) ||
((t == 2'd0) && ( reg_clk == 1)) ||
((t != 2'd3) && (t != 2'd0)))
t <= t + 2'd1;
end
// create internal bus_cycle signal which is stable on the positive clock
// edge and extends the previous state by half a 32 Mhz clock cycle
reg [3:0] bus_cycle_L;
always @(negedge clk)
bus_cycle_L <= { bus_cycle, t };
// ---------------------------------------------------------------------------
// ------------------------------ internal signals ---------------------------
// ---------------------------------------------------------------------------
// st_de is the internal display enable signal as used by the mfp. This is used
// by software to generate a line interrupt and to e.g. do 512 color effects.
// st_de is active low. Using memory enable (me) for this makes sure the cpu has
// st_de is active low. Using display enable (de) for this makes sure the cpu has
// plenty of time before data for the next line is starting to be fetched
assign st_de = ~me;
assign st_de = ~de;
// hsync irq is generated at the rising edge of st_hs
assign st_hs = (st_h_state == STATE_SYNC);
always @(posedge clk) begin
// vsync irq is generated at the rising edge of st_vs
assign st_vs = (v_state == STATE_SYNC);
// hsync irq is generated after the rightmost border pixel column has been displayed
// Run st timing at full speed if no scan doubler is being used. Otherwise run
// it at half speed
if((!scan_doubler_enable) || vga_hcnt[0]) begin
// hsync starts at begin of blanking phase
if(st_hcnt == (t1_h_blank_right - memory_prefetch))
st_hs <= 1'b1;
// hsync ends at begin of left border
if(st_hcnt == (t4_h_border_left - memory_prefetch))
st_hs <= 1'b0;
end
// vsync irq is generated right after the last border line has been displayed
// TODO: check where these additional -10'd2 come from. Obsession pinball
// needs this to get the colors right. This means it's needed for the correct
// relationship between vbi and hbi. But why?
// v_event is the begin of hsync. The hatari video.h says vbi happens 64 clock cycles
// ST hor counter runs at 16Mhz, thus the trigger is 128 events after h_sync
// xyz
if(st_h_active && (st_hcnt == (v_event))) begin
// vsync starts at begin of blanking phase
if(vcnt == t7_v_blank_bot - de_v_offset - 10'd2) st_vs <= 1'b1;
// vsync ends at begin of top border
if(vcnt == t10_v_border_top - de_v_offset - 10'd2) st_vs <= 1'b0;
end
end
// ---------------------------------------------------------------------------
// -------------------------------- video mode -------------------------------
// ---------------------------------------------------------------------------
@@ -143,15 +186,13 @@ wire [9:0] t4_h_border_left = config_string[80:71];
wire [9:0] t5_h_end = config_string[70:61];
wire v_sync_pol = config_string[60];
// in overscan mode the bottom border is removed and data is displayed instead
wire [9:0] t6_v_border_bot = overscan?config_string[49:40]:config_string[59:50];
wire [9:0] t6_v_border_bot = config_string[59:50];
wire [9:0] t7_v_blank_bot = config_string[49:40];
wire [9:0] t8_v_sync = config_string[39:30];
wire [9:0] t9_v_blank_top = config_string[29:20];
wire [9:0] t10_v_border_top = config_string[19:10];
wire [9:0] t11_v_end = config_string[9:0];
// default video mode is monochrome
parameter DEFAULT_MODE = 2'd2;
@@ -182,6 +223,7 @@ reg [3:0] palette_b[15:0];
// STE-only registers
reg [7:0] line_offset; // number of words to skip at the end of each line
reg [3:0] pixel_offset; // number of pixels to skip at begin of line
reg ste_overscan_enable; // STE has a special 16 bit overscan
// ---------------------------------------------------------------------------
// ----------------------------- CPU register read ---------------------------
@@ -247,6 +289,7 @@ always @(negedge reg_clk) begin
// disable STE hard scroll features
line_offset <= 8'h00;
pixel_offset <= 4'h0;
ste_overscan_enable <= 1'b0;
if(DEFAULT_MODE == 0) begin
// TOS default palette, can be disabled after tests
@@ -292,12 +335,22 @@ always @(negedge reg_clk) begin
// writing special STE registers
if(ste && !reg_lds) begin
if(reg_addr == 6'h07) line_offset <= reg_din[7:0];
if(reg_addr == 6'h32) pixel_offset <= reg_din[3:0];
if(reg_addr == 6'h32) begin
pixel_offset <= reg_din[3:0];
ste_overscan_enable <= 1'b0;
end
// Writing the video address counter happens directly inside the
// memory engine further below!!!
end
// byte write of 0 to ff8264 while ff8365 (pixel_offset) != 0 results in extra
// ste overscan
if(ste && !reg_uds && reg_lds) begin
if((reg_addr == 6'h32) && (pixel_offset != 0))
ste_overscan_enable <= 1'b1;
end
// the color palette registers, always write bit 3 with zero if not in
// ste mode as this is the lsb of ste
if(reg_addr >= 6'h20 && reg_addr < 6'h30 ) begin
@@ -353,7 +406,7 @@ osd osd (
// mono uses the lsb of blue palette entry 0 to invert video
wire [3:0] blue0 = palette_b[0];
wire mono_bit = blue0[0]^shift_0[15];
wire [3:0] mono_rgb = de?{mono_bit, mono_bit, mono_bit, mono_bit}:4'b1000;
wire [3:0] mono_rgb = { mono_bit, mono_bit, mono_bit, mono_bit };
// ------------------------- colour video signal -----------------------------
@@ -368,7 +421,7 @@ wire [3:0] color_b = { color[ 2:0], color[ 3] };
wire [3:0] stvid_r = mono?mono_rgb:color_r;
wire [3:0] stvid_g = mono?mono_rgb:color_g;
wire [3:0] stvid_b = mono?mono_rgb:color_b;
// shift registers for up to 4 planes
reg [15:0] shift_0, shift_1, shift_2, shift_3;
@@ -435,7 +488,12 @@ end
// the top border should also be easy. Opening the side borders is basically
// impossible as this requires a 100% perfect CPU and shifter timing.
reg last_syncmode, overscan_detect, overscan;
reg last_syncmode;
reg [3:0] bottom_overscan_cnt;
reg [3:0] top_overscan_cnt;
wire bottom_overscan = (bottom_overscan_cnt != 0) /* synthesis keep */;
wire top_overscan = (top_overscan_cnt != 0) /* synthesis keep */;
always @(posedge clk) begin
last_syncmode <= syncmode[1]; // delay syncmode to detect changes
@@ -443,18 +501,29 @@ always @(posedge clk) begin
// this is the magic used to do "overscan".
// the magic actually involves more than writing zero (60hz)
// within line 200. But this is sufficient for our detection
if(vcnt[9:2] == 8'd99) begin
// syncmode has changed from 1 to 0 (50 to 60 hz)
if((syncmode[1] == 1'b0) && (last_syncmode == 1'b1))
overscan_detect <= 1'b1;
end
// latch overscan state at topleft screen edge
if((vga_hcnt == t4_h_border_left) && (vcnt == t10_v_border_top)) begin
// save and reset overscan
overscan <= overscan_detect;
overscan_detect <= 1'b0;
end
// trigger in line 198/199
if((vcnt == { 8'd97, 2'b00} ) && (vga_hcnt == 10'd0) && (bottom_overscan_cnt != 0))
bottom_overscan_cnt <= bottom_overscan_cnt - 4'd1;
if((vcnt[9:2] == 8'd98)||(vcnt[9:2] == 8'd99)||(vcnt[9:2] == 8'd100)) begin
// syncmode has changed from 1 to 0 (50 to 60 hz)
if((syncmode[1] == 1'b0) && (last_syncmode == 1'b1))
bottom_overscan_cnt <= 4'd15;
end
// trigger in line 284/285
if((vcnt == {8'd133, 2'b00 }) && (vga_hcnt == 10'd0) && (top_overscan_cnt != 0))
top_overscan_cnt <= top_overscan_cnt - 4'd1;
if((vcnt[9:2] == 8'd134)||(vcnt[9:2] == 8'd135)||(vcnt[9:2] == 8'd136)) begin
// syncmode has changed from 1 to 0 (50 to 60 hz)
if((syncmode[1] == 1'b0) && (last_syncmode == 1'b1))
top_overscan_cnt <= 4'd15;
end
// top_overscan <= 1'b1;
// bottom_overscan <= 1'b1;
end
// ---------------------------------------------------------------------------
@@ -497,7 +566,7 @@ ste_shifter ste_shifter_3 (
// move data into STE hard scroll shift registers
always @(posedge clk) begin
if((bus_cycle == 4'd14) && (plane == 2'd0)) begin
if((bus_cycle_L == 4'd14) && (plane == 2'd0)) begin
// shift up 16 pixels and load new data into lower bits of shift registers
ste_shift_0 <= { ste_shift_0[15:0], data_latch[0] };
ste_shift_1 <= { ste_shift_1[15:0], (planes > 3'd1)?data_latch[1]:16'h0000 };
@@ -521,7 +590,7 @@ reg [15:0] sd_shift_0, sd_shift_1, sd_shift_2, sd_shift_3;
// msb of the shift registers is the index used to access the palette registers.
// Return border color index (0) if outside display area
wire [3:0] sd_index = (!me_v)?4'd0:
wire [3:0] sd_index = (!de_v)?4'd0:
{ sd_shift_3[15], sd_shift_2[15], sd_shift_1[15], sd_shift_0[15]};
// line buffer for two lines of 720 pixels (640 + 2 * 40 border) 3 * 4 (STE!) bit rgb data
@@ -537,19 +606,18 @@ always @(posedge clk) begin
// vertical state changes at end of hsync (begin of left blank)
if(vga_hcnt == v_event) begin
// reset state counter two vga lines before screen start since scan doubler
// reset state counter two vga lines before display start since scan doubler
// starts prefetching data two vga lines before
if(vcnt == (t11_v_end-10'd2)) sd_vcnt <= 2'd0;
else sd_vcnt <= sd_vcnt + 2'd1;
if(vcnt == (de_v_start-10'd2)) sd_vcnt <= 2'd0;
else sd_vcnt <= sd_vcnt + 2'd1;
end
// permanently move data from data_latch into scan doublers shift registers
if((bus_cycle == 4'd15) && (plane == 2'd0)) begin
if((bus_cycle_L == 4'd15) && (plane == 2'd0)) begin
// normally data is directly moved from the input latches into the
// shift registers. Only on an ste with pixel scrolling enabled
// the data is moved through additional shift registers
if(!ste || (pixel_offset == 0)) begin
if(!ste || (pixel_offset == 0) || ste_overscan_enable) begin
// load data into shift registers as required by color depth
sd_shift_0 <= data_latch[0];
sd_shift_1 <= (planes > 3'd1)?data_latch[1]:16'h0000;
@@ -606,53 +674,60 @@ end
// ------------------------------- memory engine -----------------------------
// ---------------------------------------------------------------------------
assign read = (bus_cycle[3:2] == 0) && me; // memory enable can directly be used as a ram read signal
assign read = (bus_cycle == 0) && de; // display enable can directly be used as a ram read signal
// current plane to be read from memory
reg [1:0] plane;
// To be able to output the first pixel we need to have one word for every plane already
// present in memory. We thus need a "memory enable" signal which is (depending on color depth)
// present in memory. We thus need a display enable signal which is (depending on color depth)
// 16, 32 or 64 pixel ahead of display enable
reg me, me_v;
reg de, de_v;
// required pixel offset allowing for prefetch of 16 pixels in 1, 2 or 4 planes (16, 32 or 64 cycles)
wire [9:0] memory_prefetch = scan_doubler_enable?{ 4'd0, planes, 3'd0 }:{ 3'd0, planes, 4'd0 };
wire [9:0] ste_overscan = ste_overscan_enable?memory_prefetch:10'd0;
// ste is starting another 16 pixels earlier if horizontal hard scroll is being used
wire [9:0] ste_prefetch = (ste && (pixel_offset != 0))?memory_prefetch:10'd0;
wire [9:0] me_h_start = t5_h_end - memory_prefetch - ste_prefetch;
wire [9:0] me_h_end = t0_h_border_right - memory_prefetch;
wire [9:0] ste_prefetch = (ste && ((pixel_offset != 0) && !ste_overscan_enable))?memory_prefetch:10'd0;
wire [9:0] de_h_start = t5_h_end - memory_prefetch - ste_prefetch;
wire [9:0] de_h_end = t0_h_border_right - memory_prefetch + ste_overscan;
// extra lines required by overscan
wire [9:0] de_v_top_extra = top_overscan?10'd58:10'd0; // 29 extra ST lines at top
wire [9:0] de_v_bot_extra = bottom_overscan?10'd76:10'd0; // 38 extra ST lines at bottom
// line offset required for scan doubler
wire [9:0] me_v_offset = scan_doubler_enable?10'd2:10'd0;
wire [9:0] me_v_start = t11_v_end - me_v_offset;
wire [9:0] me_v_end = t6_v_border_bot - me_v_offset;
wire [9:0] de_v_offset = scan_doubler_enable?10'd2:10'd0;
// calculate lines in which active display starts end ends
wire [9:0] de_v_start = t11_v_end - de_v_offset - de_v_top_extra;
wire [9:0] de_v_end = t6_v_border_bot - de_v_offset + de_v_bot_extra;
// with scan doubler being active, there are two main clock cycles per st hor counter
// st_h_active makes sure these events only trigger once
wire st_h_active = (!scan_doubler_enable || bus_cycle[0]);
wire st_h_active = (!scan_doubler_enable || t[0]);
always @(posedge clk) begin
// line in which memory access is enabled
// in scan doubler mode two lines ahead of vertical display enable
if(vga_hcnt == v_event) begin
if(vcnt == me_v_start) me_v <= 1'b1;
if(vcnt == me_v_end) me_v <= 1'b0;
if(vcnt == de_v_start) de_v <= 1'b1;
if(vcnt == de_v_end) de_v <= 1'b0;
end
// memory enable signal 16/32/64 bits (16*planes) ahead of display enable (de)
// display enable signal 16/32/64 bits (16*planes) ahead of display enable (de)
// include bus cycle to stay in sync in scna doubler mode
if(me_v && st_h_active) begin
if(st_hcnt == me_h_start) me <= 1'b1;
if(st_hcnt == me_h_end) me <= 1'b0;
if(de_v && st_h_active) begin
if(st_hcnt == de_h_start) de <= 1'b1;
if(st_hcnt == de_h_end) de <= 1'b0;
end
// make sure each line starts with plane 0
if(st_hcnt == me_h_start)
if(st_hcnt == de_h_start)
plane <= 2'd0;
// The video address counter is reloaded slightly before vsync
if((vga_hcnt == t4_h_border_left) && (vcnt == t8_v_sync - 10'd3)) begin
// The video address counter is reloaded right before display starts
if((vga_hcnt == t3_h_blank_left) && (vcnt == t7_v_blank_bot)) begin
vaddr <= _v_bas_ad;
// copy syncmode
@@ -660,10 +735,10 @@ always @(posedge clk) begin
end else begin
// video transfer happens in cycle 3 (end of video cycle)
if(bus_cycle == 3) begin
if(bus_cycle_L == 3) begin
// read if memory enable is active
if(me) begin
// read if display enable is active
if(de) begin
// move incoming video data into data latch
// ST shifter only uses 16 out of possible 64 bits, so select the right word
@@ -689,12 +764,12 @@ always @(posedge clk) begin
// STE has additional ways to influence video address
if(ste) begin
// add line offset at the end of each video line
if(me_v && st_h_active && (st_hcnt == t2_h_sync))
if(de_v && st_h_active && (st_hcnt == t2_h_sync))
vaddr <= vaddr + line_offset;
// STE vaddr write handling
// bus_cycle 6 is in the middle of a cpu cycle
if((bus_cycle == 6) && ste_vaddr_write) begin
if((bus_cycle_L == 6) && ste_vaddr_write) begin
if(reg_addr == 6'h02) vaddr[22:15] <= reg_din[7:0];
if(reg_addr == 6'h03) vaddr[14: 7] <= reg_din[7:0];
if(reg_addr == 6'h04) vaddr[ 6: 0] <= reg_din[7:1];
@@ -723,7 +798,6 @@ reg [1:0] v_state; // 0=sync, 1=blank, 2=border, 3=display
// blank level is also used during sync
wire blank = (v_state == STATE_BLANK) || (vga_h_state == STATE_BLANK) ||
(v_state == STATE_SYNC) || (vga_h_state == STATE_SYNC);
wire de = (v_state == STATE_DISP) && (vga_h_state == STATE_DISP);
// time in horizontal timing where vertical states change (at the begin of the sync phase)
wire [9:0] v_event = t2_h_sync;
@@ -741,7 +815,7 @@ always @(posedge clk) begin
// the scan doubler is a special case as the atari line timing then expands over two vga
// lines and may/must be asynchronous to the vga timing at the end of the first line
if(vga_hcnt == t5_h_end) begin
if((bus_cycle == 4'd15) || (scan_doubler_enable && sd_vcnt[0]))
if((bus_cycle_L == 4'd15) || (scan_doubler_enable && sd_vcnt[0]))
vga_hcnt <= 10'd0;
end else
vga_hcnt <= vga_hcnt + 10'd1;
@@ -770,7 +844,7 @@ always @(posedge clk) begin
// generate horizontal video signal states
if( st_hcnt == t2_h_sync ) st_h_state <= STATE_SYNC;
if((st_hcnt == t0_h_border_right) || (st_hcnt == t4_h_border_left)) st_h_state <= STATE_BORDER;
if((st_hcnt == t0_h_border_right + ste_overscan) || (st_hcnt == t4_h_border_left)) st_h_state <= STATE_BORDER;
if((st_hcnt == t1_h_blank_right) || (st_hcnt == t3_h_blank_left)) st_h_state <= STATE_BLANK;
if( st_hcnt == t5_h_end) st_h_state <= STATE_DISP;
end

View File

@@ -28,6 +28,17 @@
// NTSC 32042400 Hz
// MIST 31875000 Hz
// real ST timing
// Starting with VBI
// Atari Timing as hatari sees it: sync 34, border 29, disp 200, 47 border, 3 ?? = 313, vbi@310
// 47 bottom border lines seesm to be too much, some intros have artifacts in the lower lines
// 38 bottom border lines seems to be good
// 60Hz sync 5, border 29, disp 200, 29 border = 263, vbi@261
// vbl at cycle counter 64 (64 cycles after hbl)
module video_modes (
inout mono, // select monochrome mode (and not color)
input pal, // select pal mode (and not ntsc) if a color mode is selected
@@ -42,11 +53,11 @@ module video_modes (
localparam H_ACT = 10'd640;
localparam V_ACT = 10'd400;
// TIMING CONSTRAINTS:
// The total width (act+both blank+2*border+sync) must be a multiple of 16, for
// scan doubled modes a multiple of 8
// ---------------------------------------------------------------------------
// ----------------------------- pal56 timing -------------------------------
// ---------------------------------------------------------------------------
@@ -57,9 +68,9 @@ localparam V_ACT = 10'd400;
wire [121:0] pal56_config_str;
conf pal56_conf(
// front porch sync width back porch border width sync polarity
.h_fp ( 10'd44), .h_s (10'd120), .h_bp ( 10'd44), .h_bd (10'd40), .h_sp (1'b1),
.v_fp ( 10'd24), .v_s ( 10'd4), .v_bp ( 10'd24), .v_bd (10'd80), .v_sp (1'b1),
// front porch sync width back porch border width sync polarity
.h_fp ( 10'd44), .h_s (10'd120), .h_bp ( 10'd44), .h_bd (10'd40), .h_sp (1'b1),
.v_fp ( 10'd24), .v_s ( 10'd4), .v_bp ( 10'd24), .v_tb (10'd80), .v_bb (10'd80), .v_sp (1'b1),
.str (pal56_config_str)
);
@@ -73,9 +84,10 @@ conf pal56_conf(
wire [121:0] pal50_config_str;
conf pal50_conf(
// front porch sync width back porch border width sync polarity
.h_fp ( 10'd80), .h_s ( 10'd64), .h_bp ( 10'd80), .h_bd (10'd80), .h_sp (1'b1),
.v_fp ( 10'd30), .v_s ( 10'd6), .v_bp ( 10'd30), .v_bd (10'd80), .v_sp (1'b1),
// front porch sync width back porch border width sync polarity
.h_fp ( 10'd80), .h_s ( 10'd64), .h_bp ( 10'd80), .h_bd (10'd80), .h_sp (1'b1),
// .v_fp ( 10'd42), .v_s ( 10'd8), .v_bp ( 10'd42), .v_tb (10'd58), .v_bb (10'd76), .v_sp (1'b1),
.v_fp ( 10'd30), .v_s ( 10'd6), .v_bp ( 10'd30), .v_tb (10'd80), .v_bb (10'd80), .v_sp (1'b1),
.str (pal50_config_str)
);
@@ -89,9 +101,9 @@ conf pal50_conf(
wire [121:0] ntsc_config_str;
conf ntsc_conf(
// front porch sync width back porch border width sync polarity
.h_fp ( 10'd76), .h_s ( 10'd64), .h_bp ( 10'd76), .h_bd (10'd80), .h_sp (1'b1),
.v_fp ( 10'd20), .v_s ( 10'd6), .v_bp ( 10'd20), .v_bd (10'd40), .v_sp (1'b0),
// front porch sync width back porch border width sync polarity
.h_fp ( 10'd76), .h_s ( 10'd64), .h_bp ( 10'd76), .h_bd (10'd80), .h_sp (1'b1),
.v_fp ( 10'd20), .v_s ( 10'd6), .v_bp ( 10'd20), .v_tb (10'd40), .v_bb (10'd40), .v_sp (1'b0),
.str (ntsc_config_str)
);
@@ -105,9 +117,9 @@ conf ntsc_conf(
wire [121:0] mono_config_str;
conf mono_conf(
// front porch sync width back porch border width sync polarity
.h_fp (10'd108), .h_s ( 10'd40), .h_bp (10'd108), .h_bd ( 10'd0), .h_sp (1'b0),
.v_fp ( 10'd48), .v_s ( 10'd5), .v_bp ( 10'd48), .v_bd ( 10'd0), .v_sp (1'b0),
// front porch sync width back porch border width sync polarity
.h_fp (10'd108), .h_s ( 10'd40), .h_bp (10'd108), .h_bd ( 10'd0), .h_sp (1'b0),
.v_fp ( 10'd48), .v_s ( 10'd5), .v_bp ( 10'd48), .v_tb ( 10'd0), .v_bb ( 10'd0), .v_sp (1'b0),
.str (mono_config_str)
);
@@ -130,7 +142,8 @@ module conf (
input [9:0] v_fp, // vertical front porch width
input [9:0] v_s, // vertical sync width
input [9:0] v_bp, // vertical back porch width
input [9:0] v_bd, // vertical border width
input [9:0] v_tb, // vertical border width top
input [9:0] v_bb, // vertical border width bottom
input v_sp, // vertical sync polarity
output [121:0] str
@@ -151,11 +164,11 @@ wire [60:0] h_str = { h_sp,
wire [60:0] v_str = { v_sp,
V_ACT - 10'd1,
V_ACT + v_bd - 10'd1,
V_ACT + v_bd + v_fp - 10'd1,
V_ACT + v_bd + v_fp + v_s - 10'd1,
V_ACT + v_bd + v_fp + v_s + v_bp - 10'd1,
V_ACT + v_bd + v_fp + v_s + v_bp + v_bd - 10'd1};
V_ACT + v_bb - 10'd1,
V_ACT + v_bb + v_fp - 10'd1,
V_ACT + v_bb + v_fp + v_s - 10'd1,
V_ACT + v_bb + v_fp + v_s + v_bp - 10'd1,
V_ACT + v_bb + v_fp + v_s + v_bp + v_tb - 10'd1};
assign str = { h_str, v_str };