1
0
mirror of https://github.com/mist-devel/mist-board.git synced 2026-01-26 20:01:48 +00:00

More blitter work

This commit is contained in:
harbaum
2013-08-21 19:47:28 +00:00
parent 9eba58c901
commit bee90b5d3e
3 changed files with 222 additions and 72 deletions

View File

@@ -55,7 +55,6 @@ entity TG68K is
fromram : in std_logic_vector(15 downto 0);
ramready : in std_logic:='0';
cpu : in std_logic_vector(1 downto 0);
memcfg : in std_logic_vector(5 downto 0);
ramaddr : out std_logic_vector(31 downto 0);
cpustate : out std_logic_vector(5 downto 0);
nResetOut : out std_logic;

View File

@@ -1,24 +1,40 @@
// blitter docs:
//
// http://mikro.naprvyraz.sk/docs/ST_E/BLITTER.TXT
// http://paradox.atari.org/files/BLIT_FAQ.TXT
// https://steem-engine.googlecode.com/svn-history/r67/branches/Seagal/steem/code/blitter.cpp
// TODO:
// - Also use bus cycle 3 to make a "turbo blitter" being twice as fast
module blitter (
input clk32, // 31.875 MHz
input [3:0] bus_cycle,
input [1:0] bus_cycle,
// cpu register interface
input clk,
input reset,
input clk,
input reset,
input sel,
input [4:0] addr,
input [15:0] din,
output reg [15:0] dout,
input uds,
input lds,
input rw,
// bus master interface
output [23:1] bm_addr,
output reg bm_write,
output reg bm_read,
output [15:0] bm_data_out,
input [15:0] bm_data_in,
output reg br,
output irq
input sel,
input [4:0] addr,
input [15:0] din,
output reg [15:0] dout,
input uds,
input lds,
input rw,
output irq
);
assign irq = 1'b0;
assign irq = busy;
// CPU controlled register set
reg [15:0] halftone_ram[15:0];
@@ -51,6 +67,7 @@ reg [3:0] skew;
reg nfsr;
reg fxsr;
// ------------------ cpu interface --------------------
// CPU READ
@@ -79,16 +96,26 @@ always @(sel, rw, addr, src_y_inc, src_x_inc, src_addr, endmask1, endmask2, endm
if(addr == 5'h1b) dout <= x_count;
if(addr == 5'h1c) dout <= y_count;
// since reading them has not side effect we can return the 8 bit registers
// since reading them has no side effect we can return the 8 bit registers
// without caring for uds/lds
if(addr == 5'h1d) dout <= { 6'b000000, hop, 4'b0000, op };
if(addr == 5'h1e) dout <= { busy, hog, smudge, 1'b0, line_number, fxsr, nfsr, 2'b00, skew };
end
end
// CPU WRITE
// source read takes place in state 1 (normal source read) and 4 (fxsr)
assign bm_addr = ((state == 1)||(state == 4))?src_addr:dst_addr;
reg [2:0] state;
reg [7:0] dummy /* synthesis noprune */;
always @(negedge clk) begin
if(reset) begin
// ---------- böitter cpu register write interfce ............
if(reset) begin
busy <= 1'b0;
state <= 3'd0;
dummy <= 8'd0;
end else begin
if(sel && ~rw) begin
// ------ 16/32 bit registers, not byte adressable ----------
@@ -118,41 +145,132 @@ always @(negedge clk) begin
// ------ 8 bit registers ----------
// uds -> even bytes via d15:d8
// lds -> odd bytes via d7:d0
if(addr == 5'h1d && ~uds) hop <= din[9:8];
if(addr == 5'h1d && ~lds) op <= din[3:0];
if((addr == 5'h1d) && ~uds) hop <= din[9:8];
if((addr == 5'h1d) && ~lds) op <= din[3:0];
if(addr == 5'h1e && ~uds) begin
if(addr == 5'h1d)
dummy <= dummy + 8'd1;
if((addr == 5'h1e) && ~uds) begin
line_number <= din[11:8];
smudge <= din[13];
hog <= din[14];
busy <= din[15];
// writing busy with 1 starts the blitter
// writing busy with 1 starts the blitter, but only if y_count != 0
if(din[15] && (y_count != 0)) begin
busy <= 1'b1;
state <= 3'd0;
end
end
if(addr == 5'h1e && ~lds) begin
if((addr == 5'h1e) && ~lds) begin
skew <= din[3:0];
nfsr <= din[6];
fxsr <= din[7];
end
end
end
// --------- blitter state machine -------------
br <= busy; // hog mode: grab bus immediately as long as we need it
// busy is written by the cpu and anly becomes active if y_count != 0
if(br && (bus_cycle == 2'd0)) begin
if(state == 3'd3) begin
if(last_word_in_row && fxsr)
state <= 3'd4; // extra state 4, then 1, 2 ...
else
state <= 3'd1; // cycle through states 1, 2 and 3
end else if(state == 3'd4)
state <= 3'd1;
else if(state == 3'd0 && fxsr)
state <= 3'd4;
else
state <= state + 3'd1;
if((state == 3'd1) || (state == 3'd4)) begin
// don't do the read of the last word in a row if nfsr is set
if(!((state == 3'd1) && nfsr && last_word_in_row)) begin
if(src_x_inc[15] == 1'b0) src[15:0] <= bm_data_in;
else src[31:16] <= bm_data_in;
// in noral read state (not due to fxsr) we shift
if(state == 3'd1) begin
if(src_x_inc[15] == 1'b0) src[31:16] <= src[15:0];
else src[15:0] <= src[31:16];
end
// if(src_x_inc[15] == 1'b0) src <= { src[15:0], bm_data_in};
// else src <= { bm_data_in, src[31:16]};
// process src pointer
if(x_count != 1) // do signed add by sign expanding XXX_x_inc
src_addr <= src_addr + { {8{src_x_inc[15]}}, src_x_inc };
else // we are at the end of a line
src_addr <= src_addr + { {8{src_y_inc[15]}}, src_y_inc };
end else begin
// no source read, but shifting anyway
if(src_x_inc[15] == 1'b0) src[31:16] <= src[15:0];
else src[15:0] <= src[31:16];
// TODO: do the dest read here if nfsr and skip state 2
end
end
if(state == 3'd2) begin
dest <= bm_data_in;
end
// don't update counters and adresses if still in setup phase
if(state == 3'd3) begin
// y_count != 0 means blitter is (still) active
if(y_count != 0) begin
if(x_count != 1) begin
// we are at the begin or within a line (have not reached the end yet)
// do signed add by sign expanding XXX_x_inc
dst_addr <= dst_addr + { {8{dst_x_inc[15]}}, dst_x_inc };
x_count <= x_count - 8'd1;
end else begin
// we are at the end of a line but not finished yet
// do signed add by sign expanding XXX_y_inc
dst_addr <= dst_addr + { {8{dst_y_inc[15]}}, dst_y_inc };
if(dst_y_inc[15]) line_number <= line_number + 4'd1;
else line_number <= line_number - 4'd1;
x_count <= x_count_latch;
y_count <= y_count - 8'd1;
end
end else begin
// y_count reached zero -> end of blitter operation
busy <= 1'b0;
end
end
end
end
// ----------------- blitter engine -------------------
always @(posedge clk32) begin
if(reset) begin
end else begin
// ----------------- blitter busmaster engine -------------------
always @(posedge clk) begin
bm_read <= 1'b0;
bm_write <= 1'b0;
end
if(br && (y_count != 0) && (bus_cycle == 2'd0)) begin
// drive write
if(state == 3'd1) bm_read <= 1'b1;
else if(state == 3'd2) bm_read <= 1'b1;
else if(state == 3'd3) bm_write <= 1'b1;
else if(state == 3'd4) bm_read <= 1'b1; // fxsr state
end
end
// wire io = (bus_cycle[3:2] == 0); // blitter does io in cycle 0 which is the same one the cpu uses
// wire io = (bus_cycle[3:2] == 1); // blitter does io in cycle 1 which is the same one the cpu uses
// TODO: Also use bus cycle 2 to make a "turbo blitter" being twice as fast
// internal registers
reg [31:0] src; // 32 bit source read buffer
reg [15:0] dest; // 16 bit destination read buffer
@@ -182,6 +300,12 @@ halftone_op halftone_op (
.out (src_halftoned)
);
// todo: clean this
reg [15:0] dummy_reg /* synthesis noprune */;
always @(posedge clk) begin
dummy_reg <= src_skewed;
end
// apply blitter operation
blitter_op blitter_op (
.op (op),
@@ -191,11 +315,18 @@ blitter_op blitter_op (
.out (result)
);
wire first_word_in_row = (x_count == x_count_latch);
wire last_word_in_row = (x_count == 16'h0001);
wire first_word_in_row = (x_count == x_count_latch) /* synthesis keep */;
wire last_word_in_row = (x_count == 16'h0001) /* synthesis keep */;
reg first_word_in_row_reg /* synthesis noprune */;
reg last_word_in_row_reg /* synthesis noprune */;
always @(posedge clk) begin
first_word_in_row_reg <= first_word_in_row;
last_word_in_row_reg <= last_word_in_row;
end
wire [15:0] data_to_write;
// apply masks
masking masking (
.endmask1 (endmask1),
@@ -206,7 +337,7 @@ masking masking (
.in0 (result),
.in1 (dest),
.out (data_to_write)
.out (bm_data_out)
);

View File

@@ -114,7 +114,7 @@ end
// no tristate busses exist inside the FPGA. so bus request doesn't do
// much more than halting the cpu by suppressing dtack
wire br = data_io_br; // && (tg68_cpustate[1:0] == 2'b00) ; // dma is only other bus master (yet)
wire br = data_io_br || blitter_br; // && (tg68_cpustate[1:0] == 2'b00) ; // dma is only other bus master (yet)
wire data_io_br;
// request interrupt ack from mfp for IPL == 6
@@ -137,7 +137,6 @@ wire [7:0] auto_vector = auto_vector_vbi | auto_vector_hbi;
// $fff00000 - $fff000ff - IDE
// $ffff8780 - $ffff878f - SCSI
// $ffff8901 - $ffff893f - STE DMA audio
// $ffff8a00 - $ffff8a3f - Blitter
// $ffff9200 - $ffff923f - STE joystick ports
// $fffffa40 - $fffffa7f - FPU
// $fffffc20 - $fffffc3f - RTC
@@ -161,8 +160,7 @@ wire acia_sel = io_sel && ({tg68_adr[15:8], 8'd0} == 16'hfc00);
wire [7:0] acia_data_out;
// blitter 16 bit interface at $ff8a00 - $ff8a3f
// wire blitter_sel = io_sel && ({tg68_adr[15:8], 8'd0} == 16'h8a00);
wire blitter_sel = 1'b0;
wire blitter_sel = system_ctrl[19] && io_sel && ({tg68_adr[15:8], 8'd0} == 16'h8a00);
wire [15:0] blitter_data_out;
// psg 8 bit interface at $ff8800 - $ff8803
@@ -203,7 +201,7 @@ video video (
.reg_dout (vreg_data_out),
.vaddr (video_address ),
.data (video_data ),
.data (ram_data_out ),
.read (video_read ),
.hs (VGA_HS ),
@@ -281,18 +279,35 @@ acia acia (
.ikbd_data_in (ikbd_data_to_acia)
);
wire [23:1] blitter_master_addr;
wire blitter_master_write;
wire blitter_master_read;
wire blitter_br;
wire [15:0] blitter_master_data_out;
blitter blitter (
// .bus_cycle (bus_cycle[3:2] ),
.bus_cycle (bus_cycle_8 ),
// cpu interface
.clk (clk_8 ),
.reset (reset ),
.din (tg68_dat_out ),
.sel (blitter_sel ),
.addr (tg68_adr[5:1] ),
.uds (tg68_uds ),
.lds (tg68_uds ),
.rw (tg68_rw ),
.dout (blitter_data_out ),
.irq ( )
.clk (clk_8 ),
.reset (reset ),
.din (tg68_dat_out ),
.sel (blitter_sel ),
.addr (tg68_adr[5:1] ),
.uds (tg68_uds ),
.lds (tg68_lds ),
.rw (tg68_rw ),
.dout (blitter_data_out ),
.bm_addr (blitter_master_addr),
.bm_write (blitter_master_write),
.bm_data_out (blitter_master_data_out),
.bm_read (blitter_master_read),
.bm_data_in (ram_data_out),
.br (blitter_br ),
.irq ( )
);
@@ -406,16 +421,22 @@ clock clock (
//// 8MHz clock ////
wire [3:0] bus_cycle;
reg [3:0] clk_cnt;
reg [1:0] bus_cycle_8;
always @ (posedge clk_32, negedge pll_locked) begin
if (!pll_locked)
clk_cnt <= #1 4'b0010;
else
clk_cnt <= #1 clk_cnt + 4'd1;
if (!pll_locked) begin
clk_cnt <= #1 4'b0010;
bus_cycle_8 <= 2'd3;
end else begin
clk_cnt <= #1 clk_cnt + 4'd1;
if(clk_cnt[1:0] == 2'd2)
bus_cycle_8 <= bus_cycle_8 + 2'd1;
end
end
assign clk_8 = clk_cnt[1];
assign bus_cycle = clk_cnt-4'd2;
assign bus_cycle = clk_cnt - 4'd2;
// SDRAM
assign SDRAM_CKE = 1'b1;
@@ -447,8 +468,6 @@ wire tg68_ena7WR;
wire tg68_enaWR;
wire [ 16-1:0] tg68_cout;
wire tg68_cpuena;
// wire [ 2-1:0] cpu_config;
// wire [ 6-1:0] memcfg;
wire [ 32-1:0] tg68_cad;
wire [ 6-1:0] tg68_cpustate /* synthesis noprune */;
wire tg68_cdma;
@@ -536,7 +555,6 @@ TG68K tg68k (
.fromram (tg68_cout ),
.ramready (tg68_cpuena ),
.cpu (system_ctrl[5:4] ), // 00=68000
.memcfg (6'b000000 ), // 00XXXX = no fastmem
.ramaddr (tg68_cad ),
.cpustate (tg68_cpustate ),
.nResetOut ( ),
@@ -549,18 +567,17 @@ TG68K tg68k (
//
wire [15:0] cpu_data_in;
assign cpu_data_in = cpu2mem?ram_data:io_data_out;
assign cpu_data_in = cpu2mem?ram_data_out:io_data_out;
// cpu/video stram multiplexing
wire [22:0] ram_address;
wire [15:0] ram_data;
wire [15:0] ram_data_out;
wire video_cycle = (bus_cycle[3:2] == 0);
wire cpu_cycle = (bus_cycle[3:2] == 1); // || (bus_cycle[3:2] == 3);
wire io_cycle = (bus_cycle[3:2] == 2);
assign ram_address = video_cycle?video_address:tg68_adr[23:1];
assign video_data = ram_data;
assign ram_address = video_cycle?video_address:(blitter_br?blitter_master_addr:tg68_adr[23:1]);
assign video_data = ram_data_out;
// TODO: put 0x000000 to 0x000007 into tos section so it's write protected
wire MEM512K = (system_ctrl[3:1] == 3'd0);
@@ -611,13 +628,16 @@ always @(posedge clk_8)
assign tg68_dtack = ~(((cpu2mem && address_strobe) || io_dtack ) && !br);
wire ram_oe = video_cycle?~video_read:
(cpu_cycle?~(address_strobe && tg68_rw && cpu2mem):1'b1);
(cpu_cycle?~(blitter_br?blitter_master_read:(address_strobe && tg68_rw && cpu2mem)):1'b1);
// (cpu_cycle?~(address_strobe && tg68_rw && cpu2mem):1'b1);
wire ram_wr = cpu_cycle?~(address_strobe && ~tg68_rw && cpu2ram):1'b1;
wire ram_wr = cpu_cycle?~(blitter_br?blitter_master_write:(address_strobe && ~tg68_rw && cpu2ram)):1'b1;
wire [15:0] ram_data_in = blitter_br?blitter_master_data_out:tg68_dat_out;
// data strobe
wire ram_uds = video_cycle?1'b0:tg68_uds;
wire ram_lds = video_cycle?1'b0:tg68_lds;
wire ram_uds = video_cycle?1'b0:(blitter_br?1'b0:tg68_uds);
wire ram_lds = video_cycle?1'b0:(blitter_br?1'b0:tg68_lds);
//// sdram ////
sdram sdram (
@@ -655,14 +675,14 @@ sdram sdram (
.ena7WRreg (tg68_ena7WR ),
// chip/slow ram interface
.chipWR (tg68_dat_out ),
.chipWR (ram_data_in ),
.chipAddr (ram_address ),
.chipU (ram_uds ),
.chipL (ram_lds ),
.chipRW (ram_wr ),
.chip_dma (ram_oe ),
.c_7m (clk_8 ),
.chipRD (ram_data ),
.chipRD (ram_data_out ),
.reset_out ( )
);