From bee90b5d3e01ac642e4371e05a3f4372eedc6dc2 Mon Sep 17 00:00:00 2001 From: harbaum Date: Wed, 21 Aug 2013 19:47:28 +0000 Subject: [PATCH] More blitter work --- cores/mist/TG68K.vhd | 1 - cores/mist/blitter.v | 205 ++++++++++++++++++++++++++++++++++-------- cores/mist/mist_top.v | 88 +++++++++++------- 3 files changed, 222 insertions(+), 72 deletions(-) diff --git a/cores/mist/TG68K.vhd b/cores/mist/TG68K.vhd index 0fd9892..b5dacda 100644 --- a/cores/mist/TG68K.vhd +++ b/cores/mist/TG68K.vhd @@ -55,7 +55,6 @@ entity TG68K is fromram : in std_logic_vector(15 downto 0); ramready : in std_logic:='0'; cpu : in std_logic_vector(1 downto 0); - memcfg : in std_logic_vector(5 downto 0); ramaddr : out std_logic_vector(31 downto 0); cpustate : out std_logic_vector(5 downto 0); nResetOut : out std_logic; diff --git a/cores/mist/blitter.v b/cores/mist/blitter.v index f83c89e..69a860a 100644 --- a/cores/mist/blitter.v +++ b/cores/mist/blitter.v @@ -1,24 +1,40 @@ // blitter docs: +// +// http://mikro.naprvyraz.sk/docs/ST_E/BLITTER.TXT +// http://paradox.atari.org/files/BLIT_FAQ.TXT +// https://steem-engine.googlecode.com/svn-history/r67/branches/Seagal/steem/code/blitter.cpp + +// TODO: +// - Also use bus cycle 3 to make a "turbo blitter" being twice as fast module blitter ( - input clk32, // 31.875 MHz - input [3:0] bus_cycle, + input [1:0] bus_cycle, // cpu register interface - input clk, - input reset, + input clk, + input reset, + + input sel, + input [4:0] addr, + input [15:0] din, + output reg [15:0] dout, + input uds, + input lds, + input rw, + + // bus master interface + output [23:1] bm_addr, + output reg bm_write, + output reg bm_read, + output [15:0] bm_data_out, + input [15:0] bm_data_in, + + output reg br, + output irq - input sel, - input [4:0] addr, - input [15:0] din, - output reg [15:0] dout, - input uds, - input lds, - input rw, - output irq ); -assign irq = 1'b0; +assign irq = busy; // CPU controlled register set reg [15:0] halftone_ram[15:0]; @@ -51,6 +67,7 @@ reg [3:0] skew; reg nfsr; reg fxsr; + // ------------------ cpu interface -------------------- // CPU READ @@ -79,16 +96,26 @@ always @(sel, rw, addr, src_y_inc, src_x_inc, src_addr, endmask1, endmask2, endm if(addr == 5'h1b) dout <= x_count; if(addr == 5'h1c) dout <= y_count; - // since reading them has not side effect we can return the 8 bit registers + // since reading them has no side effect we can return the 8 bit registers // without caring for uds/lds if(addr == 5'h1d) dout <= { 6'b000000, hop, 4'b0000, op }; if(addr == 5'h1e) dout <= { busy, hog, smudge, 1'b0, line_number, fxsr, nfsr, 2'b00, skew }; end end -// CPU WRITE +// source read takes place in state 1 (normal source read) and 4 (fxsr) +assign bm_addr = ((state == 1)||(state == 4))?src_addr:dst_addr; +reg [2:0] state; + +reg [7:0] dummy /* synthesis noprune */; + always @(negedge clk) begin - if(reset) begin + + // ---------- böitter cpu register write interfce ............ + if(reset) begin + busy <= 1'b0; + state <= 3'd0; + dummy <= 8'd0; end else begin if(sel && ~rw) begin // ------ 16/32 bit registers, not byte adressable ---------- @@ -118,41 +145,132 @@ always @(negedge clk) begin // ------ 8 bit registers ---------- // uds -> even bytes via d15:d8 // lds -> odd bytes via d7:d0 - if(addr == 5'h1d && ~uds) hop <= din[9:8]; - if(addr == 5'h1d && ~lds) op <= din[3:0]; + if((addr == 5'h1d) && ~uds) hop <= din[9:8]; + if((addr == 5'h1d) && ~lds) op <= din[3:0]; - if(addr == 5'h1e && ~uds) begin + if(addr == 5'h1d) + dummy <= dummy + 8'd1; + + if((addr == 5'h1e) && ~uds) begin line_number <= din[11:8]; smudge <= din[13]; hog <= din[14]; - busy <= din[15]; - // writing busy with 1 starts the blitter - + // writing busy with 1 starts the blitter, but only if y_count != 0 + if(din[15] && (y_count != 0)) begin + busy <= 1'b1; + state <= 3'd0; + end end - if(addr == 5'h1e && ~lds) begin + if((addr == 5'h1e) && ~lds) begin skew <= din[3:0]; nfsr <= din[6]; fxsr <= din[7]; end end end + + // --------- blitter state machine ------------- + br <= busy; // hog mode: grab bus immediately as long as we need it + + // busy is written by the cpu and anly becomes active if y_count != 0 + if(br && (bus_cycle == 2'd0)) begin + if(state == 3'd3) begin + if(last_word_in_row && fxsr) + state <= 3'd4; // extra state 4, then 1, 2 ... + else + state <= 3'd1; // cycle through states 1, 2 and 3 + end else if(state == 3'd4) + state <= 3'd1; + else if(state == 3'd0 && fxsr) + state <= 3'd4; + else + state <= state + 3'd1; + + if((state == 3'd1) || (state == 3'd4)) begin + // don't do the read of the last word in a row if nfsr is set + if(!((state == 3'd1) && nfsr && last_word_in_row)) begin + + if(src_x_inc[15] == 1'b0) src[15:0] <= bm_data_in; + else src[31:16] <= bm_data_in; + + // in noral read state (not due to fxsr) we shift + if(state == 3'd1) begin + if(src_x_inc[15] == 1'b0) src[31:16] <= src[15:0]; + else src[15:0] <= src[31:16]; + end + +// if(src_x_inc[15] == 1'b0) src <= { src[15:0], bm_data_in}; +// else src <= { bm_data_in, src[31:16]}; + + // process src pointer + if(x_count != 1) // do signed add by sign expanding XXX_x_inc + src_addr <= src_addr + { {8{src_x_inc[15]}}, src_x_inc }; + else // we are at the end of a line + src_addr <= src_addr + { {8{src_y_inc[15]}}, src_y_inc }; + end else begin + // no source read, but shifting anyway + if(src_x_inc[15] == 1'b0) src[31:16] <= src[15:0]; + else src[15:0] <= src[31:16]; + + // TODO: do the dest read here if nfsr and skip state 2 + end + end + + if(state == 3'd2) begin + dest <= bm_data_in; + end + + // don't update counters and adresses if still in setup phase + if(state == 3'd3) begin + + // y_count != 0 means blitter is (still) active + if(y_count != 0) begin + + if(x_count != 1) begin + // we are at the begin or within a line (have not reached the end yet) + + // do signed add by sign expanding XXX_x_inc + dst_addr <= dst_addr + { {8{dst_x_inc[15]}}, dst_x_inc }; + + x_count <= x_count - 8'd1; + + end else begin + // we are at the end of a line but not finished yet + + // do signed add by sign expanding XXX_y_inc + dst_addr <= dst_addr + { {8{dst_y_inc[15]}}, dst_y_inc }; + if(dst_y_inc[15]) line_number <= line_number + 4'd1; + else line_number <= line_number - 4'd1; + + x_count <= x_count_latch; + y_count <= y_count - 8'd1; + end + end else begin + // y_count reached zero -> end of blitter operation + busy <= 1'b0; + end + end + end end -// ----------------- blitter engine ------------------- -always @(posedge clk32) begin - if(reset) begin - - end else begin +// ----------------- blitter busmaster engine ------------------- +always @(posedge clk) begin + bm_read <= 1'b0; + bm_write <= 1'b0; - end + if(br && (y_count != 0) && (bus_cycle == 2'd0)) begin + // drive write + if(state == 3'd1) bm_read <= 1'b1; + else if(state == 3'd2) bm_read <= 1'b1; + else if(state == 3'd3) bm_write <= 1'b1; + else if(state == 3'd4) bm_read <= 1'b1; // fxsr state + end end -// wire io = (bus_cycle[3:2] == 0); // blitter does io in cycle 0 which is the same one the cpu uses +// wire io = (bus_cycle[3:2] == 1); // blitter does io in cycle 1 which is the same one the cpu uses -// TODO: Also use bus cycle 2 to make a "turbo blitter" being twice as fast - // internal registers reg [31:0] src; // 32 bit source read buffer reg [15:0] dest; // 16 bit destination read buffer @@ -182,6 +300,12 @@ halftone_op halftone_op ( .out (src_halftoned) ); +// todo: clean this +reg [15:0] dummy_reg /* synthesis noprune */; +always @(posedge clk) begin + dummy_reg <= src_skewed; +end + // apply blitter operation blitter_op blitter_op ( .op (op), @@ -191,11 +315,18 @@ blitter_op blitter_op ( .out (result) ); -wire first_word_in_row = (x_count == x_count_latch); -wire last_word_in_row = (x_count == 16'h0001); + + +wire first_word_in_row = (x_count == x_count_latch) /* synthesis keep */; +wire last_word_in_row = (x_count == 16'h0001) /* synthesis keep */; + +reg first_word_in_row_reg /* synthesis noprune */; +reg last_word_in_row_reg /* synthesis noprune */; +always @(posedge clk) begin + first_word_in_row_reg <= first_word_in_row; + last_word_in_row_reg <= last_word_in_row; +end -wire [15:0] data_to_write; - // apply masks masking masking ( .endmask1 (endmask1), @@ -206,7 +337,7 @@ masking masking ( .in0 (result), .in1 (dest), - .out (data_to_write) + .out (bm_data_out) ); diff --git a/cores/mist/mist_top.v b/cores/mist/mist_top.v index b1fe56b..06ede8b 100644 --- a/cores/mist/mist_top.v +++ b/cores/mist/mist_top.v @@ -114,7 +114,7 @@ end // no tristate busses exist inside the FPGA. so bus request doesn't do // much more than halting the cpu by suppressing dtack -wire br = data_io_br; // && (tg68_cpustate[1:0] == 2'b00) ; // dma is only other bus master (yet) +wire br = data_io_br || blitter_br; // && (tg68_cpustate[1:0] == 2'b00) ; // dma is only other bus master (yet) wire data_io_br; // request interrupt ack from mfp for IPL == 6 @@ -137,7 +137,6 @@ wire [7:0] auto_vector = auto_vector_vbi | auto_vector_hbi; // $fff00000 - $fff000ff - IDE // $ffff8780 - $ffff878f - SCSI // $ffff8901 - $ffff893f - STE DMA audio -// $ffff8a00 - $ffff8a3f - Blitter // $ffff9200 - $ffff923f - STE joystick ports // $fffffa40 - $fffffa7f - FPU // $fffffc20 - $fffffc3f - RTC @@ -161,8 +160,7 @@ wire acia_sel = io_sel && ({tg68_adr[15:8], 8'd0} == 16'hfc00); wire [7:0] acia_data_out; // blitter 16 bit interface at $ff8a00 - $ff8a3f -// wire blitter_sel = io_sel && ({tg68_adr[15:8], 8'd0} == 16'h8a00); -wire blitter_sel = 1'b0; +wire blitter_sel = system_ctrl[19] && io_sel && ({tg68_adr[15:8], 8'd0} == 16'h8a00); wire [15:0] blitter_data_out; // psg 8 bit interface at $ff8800 - $ff8803 @@ -203,7 +201,7 @@ video video ( .reg_dout (vreg_data_out), .vaddr (video_address ), - .data (video_data ), + .data (ram_data_out ), .read (video_read ), .hs (VGA_HS ), @@ -281,18 +279,35 @@ acia acia ( .ikbd_data_in (ikbd_data_to_acia) ); +wire [23:1] blitter_master_addr; +wire blitter_master_write; +wire blitter_master_read; +wire blitter_br; +wire [15:0] blitter_master_data_out; + blitter blitter ( +// .bus_cycle (bus_cycle[3:2] ), + .bus_cycle (bus_cycle_8 ), + // cpu interface - .clk (clk_8 ), - .reset (reset ), - .din (tg68_dat_out ), - .sel (blitter_sel ), - .addr (tg68_adr[5:1] ), - .uds (tg68_uds ), - .lds (tg68_uds ), - .rw (tg68_rw ), - .dout (blitter_data_out ), - .irq ( ) + .clk (clk_8 ), + .reset (reset ), + .din (tg68_dat_out ), + .sel (blitter_sel ), + .addr (tg68_adr[5:1] ), + .uds (tg68_uds ), + .lds (tg68_lds ), + .rw (tg68_rw ), + .dout (blitter_data_out ), + + .bm_addr (blitter_master_addr), + .bm_write (blitter_master_write), + .bm_data_out (blitter_master_data_out), + .bm_read (blitter_master_read), + .bm_data_in (ram_data_out), + + .br (blitter_br ), + .irq ( ) ); @@ -406,16 +421,22 @@ clock clock ( //// 8MHz clock //// wire [3:0] bus_cycle; reg [3:0] clk_cnt; +reg [1:0] bus_cycle_8; always @ (posedge clk_32, negedge pll_locked) begin - if (!pll_locked) - clk_cnt <= #1 4'b0010; - else - clk_cnt <= #1 clk_cnt + 4'd1; + + if (!pll_locked) begin + clk_cnt <= #1 4'b0010; + bus_cycle_8 <= 2'd3; + end else begin + clk_cnt <= #1 clk_cnt + 4'd1; + if(clk_cnt[1:0] == 2'd2) + bus_cycle_8 <= bus_cycle_8 + 2'd1; + end end assign clk_8 = clk_cnt[1]; -assign bus_cycle = clk_cnt-4'd2; +assign bus_cycle = clk_cnt - 4'd2; // SDRAM assign SDRAM_CKE = 1'b1; @@ -447,8 +468,6 @@ wire tg68_ena7WR; wire tg68_enaWR; wire [ 16-1:0] tg68_cout; wire tg68_cpuena; -// wire [ 2-1:0] cpu_config; -// wire [ 6-1:0] memcfg; wire [ 32-1:0] tg68_cad; wire [ 6-1:0] tg68_cpustate /* synthesis noprune */; wire tg68_cdma; @@ -536,7 +555,6 @@ TG68K tg68k ( .fromram (tg68_cout ), .ramready (tg68_cpuena ), .cpu (system_ctrl[5:4] ), // 00=68000 - .memcfg (6'b000000 ), // 00XXXX = no fastmem .ramaddr (tg68_cad ), .cpustate (tg68_cpustate ), .nResetOut ( ), @@ -549,18 +567,17 @@ TG68K tg68k ( // wire [15:0] cpu_data_in; -assign cpu_data_in = cpu2mem?ram_data:io_data_out; +assign cpu_data_in = cpu2mem?ram_data_out:io_data_out; // cpu/video stram multiplexing wire [22:0] ram_address; -wire [15:0] ram_data; +wire [15:0] ram_data_out; wire video_cycle = (bus_cycle[3:2] == 0); wire cpu_cycle = (bus_cycle[3:2] == 1); // || (bus_cycle[3:2] == 3); -wire io_cycle = (bus_cycle[3:2] == 2); -assign ram_address = video_cycle?video_address:tg68_adr[23:1]; -assign video_data = ram_data; +assign ram_address = video_cycle?video_address:(blitter_br?blitter_master_addr:tg68_adr[23:1]); +assign video_data = ram_data_out; // TODO: put 0x000000 to 0x000007 into tos section so it's write protected wire MEM512K = (system_ctrl[3:1] == 3'd0); @@ -611,13 +628,16 @@ always @(posedge clk_8) assign tg68_dtack = ~(((cpu2mem && address_strobe) || io_dtack ) && !br); wire ram_oe = video_cycle?~video_read: - (cpu_cycle?~(address_strobe && tg68_rw && cpu2mem):1'b1); + (cpu_cycle?~(blitter_br?blitter_master_read:(address_strobe && tg68_rw && cpu2mem)):1'b1); +// (cpu_cycle?~(address_strobe && tg68_rw && cpu2mem):1'b1); -wire ram_wr = cpu_cycle?~(address_strobe && ~tg68_rw && cpu2ram):1'b1; +wire ram_wr = cpu_cycle?~(blitter_br?blitter_master_write:(address_strobe && ~tg68_rw && cpu2ram)):1'b1; + +wire [15:0] ram_data_in = blitter_br?blitter_master_data_out:tg68_dat_out; // data strobe -wire ram_uds = video_cycle?1'b0:tg68_uds; -wire ram_lds = video_cycle?1'b0:tg68_lds; +wire ram_uds = video_cycle?1'b0:(blitter_br?1'b0:tg68_uds); +wire ram_lds = video_cycle?1'b0:(blitter_br?1'b0:tg68_lds); //// sdram //// sdram sdram ( @@ -655,14 +675,14 @@ sdram sdram ( .ena7WRreg (tg68_ena7WR ), // chip/slow ram interface - .chipWR (tg68_dat_out ), + .chipWR (ram_data_in ), .chipAddr (ram_address ), .chipU (ram_uds ), .chipL (ram_lds ), .chipRW (ram_wr ), .chip_dma (ram_oe ), .c_7m (clk_8 ), - .chipRD (ram_data ), + .chipRD (ram_data_out ), .reset_out ( ) );