From abdb1780895cf04c2ac622b0d91aa01ea9f65c85 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 23 Jul 2022 12:53:30 +0200 Subject: [PATCH] trying to debug DMA for RAMDsk --- .../DeclROM/NuBusFPGADrvr.h | 4 + .../DeclROM/NuBusFPGARAMDskDrvr.h | 25 +++++ .../DeclROM/NuBusFPGARAMDskDrvr_OpenClose.c | 18 ++++ .../DeclROM/NuBusFPGARAMDskDrvr_Prime.c | 92 ++++++++++++++++++- nubus-to-ztex-gateware/nubus_full_sampling.py | 70 +++++++++----- nubus-to-ztex-gateware/nubus_to_fpga_soc.py | 9 +- 6 files changed, 189 insertions(+), 29 deletions(-) diff --git a/nubus-to-ztex-gateware/DeclROM/NuBusFPGADrvr.h b/nubus-to-ztex-gateware/DeclROM/NuBusFPGADrvr.h index 0922dff..795df84 100644 --- a/nubus-to-ztex-gateware/DeclROM/NuBusFPGADrvr.h +++ b/nubus-to-ztex-gateware/DeclROM/NuBusFPGADrvr.h @@ -122,4 +122,8 @@ UInt32 Primary(SEBlock* block); #define Check32QDTrap 0xAB03 +static inline UInt32 revb(UInt32 d) { + return ((d&0xFFul)<<24) | ((d&0xFF00ul)<<8) | ((d&0xFF0000ul)>>8) | ((d&0xFF000000ul)>>24); +} + #endif diff --git a/nubus-to-ztex-gateware/DeclROM/NuBusFPGARAMDskDrvr.h b/nubus-to-ztex-gateware/DeclROM/NuBusFPGARAMDskDrvr.h index 16b19df..ce674e9 100644 --- a/nubus-to-ztex-gateware/DeclROM/NuBusFPGARAMDskDrvr.h +++ b/nubus-to-ztex-gateware/DeclROM/NuBusFPGARAMDskDrvr.h @@ -8,15 +8,40 @@ #include #include +#define ENABLE_DMA 1 + #include "NuBusFPGADrvr.h" struct RAMDrvContext { DrvSts2 drvsts; char slot; +#ifdef ENABLE_DMA + unsigned int dma_blk_size; + unsigned int dma_blk_size_mask; + unsigned int dma_blk_size_shift; + unsigned long dma_blk_base; + unsigned long dma_mem_size; +#endif }; #define DRIVE_SIZE_BYTES ((256ul-8ul)*1024ul*1024ul) // FIXME: mem size minus fb size +#ifdef ENABLE_DMA +/* FIXME; should be auto-generated for CSR addresses... */ +/* WARNING: 0x00100800 is the offset to GOBOFB_BASE !! */ +#define DMA_BLK_SIZE (0x00100800 | 0x00) +#define DMA_BLK_BASE (0x00100800 | 0x04) +#define DMA_MEM_SIZE (0x00100800 | 0x08) +//#define DMA_IRQ_CTL (0x00a00800 | 0x0c) // IRQ not connected +#define DMA_BLK_ADDR (0x00100800 | 0x10) +#define DMA_DMA_ADDR (0x00100800 | 0x14) +#define DMA_BLK_CNT (0x00100800 | 0x18) + +#define DMA_STATUS (0x00100800 | 0x2c) +#define DMA_STATUS_CHECK_BITS (0x01F) + +#endif + uint32_t rledec(uint32_t* out, const uint32_t* in, const uint32_t len); #endif diff --git a/nubus-to-ztex-gateware/DeclROM/NuBusFPGARAMDskDrvr_OpenClose.c b/nubus-to-ztex-gateware/DeclROM/NuBusFPGARAMDskDrvr_OpenClose.c index 76f826a..53713d6 100644 --- a/nubus-to-ztex-gateware/DeclROM/NuBusFPGARAMDskDrvr_OpenClose.c +++ b/nubus-to-ztex-gateware/DeclROM/NuBusFPGARAMDskDrvr_OpenClose.c @@ -95,6 +95,22 @@ OSErr cNuBusFPGARAMDskOpen(IOParamPtr pb, /* DCtlPtr */ AuxDCEPtr dce) // add the drive MyAddDrive(dsptr->dQRefNum, drvnum, (DrvQElPtr)&dsptr->qLink); + +#ifdef ENABLE_DMA + ctx->dma_blk_size = revb( read_reg(dce, DMA_BLK_SIZE) ); + ctx->dma_blk_size_mask = ctx->dma_blk_size - 1; // size is Po2 + ctx->dma_blk_size_shift = 0; + while ((1 << ctx->dma_blk_size_shift) < ctx->dma_blk_size) + ctx->dma_blk_size_shift++; + ctx->dma_blk_base = revb( read_reg(dce, DMA_BLK_BASE) ); + ctx->dma_mem_size = revb( read_reg(dce, DMA_MEM_SIZE) ); + /* write_reg(dce, GOBOFB_DEBUG, 0xD1580002); */ + /* write_reg(dce, GOBOFB_DEBUG, ctx->dma_blk_size); */ + /* write_reg(dce, GOBOFB_DEBUG, ctx->dma_blk_size_mask); */ + /* write_reg(dce, GOBOFB_DEBUG, ctx->dma_blk_size_shift); */ + /* write_reg(dce, GOBOFB_DEBUG, ctx->dma_blk_base); */ + /* write_reg(dce, GOBOFB_DEBUG, ctx->dma_mem_size); */ +#endif // auto-mount { @@ -102,6 +118,8 @@ OSErr cNuBusFPGARAMDskOpen(IOParamPtr pb, /* DCtlPtr */ AuxDCEPtr dce) pbr.volumeParam.ioVRefNum = dsptr->dQDrive; ret = PBMountVol(&pbr); } + + } SwapMMUMode ( &busMode ); diff --git a/nubus-to-ztex-gateware/DeclROM/NuBusFPGARAMDskDrvr_Prime.c b/nubus-to-ztex-gateware/DeclROM/NuBusFPGARAMDskDrvr_Prime.c index 3a01bc9..d485002 100644 --- a/nubus-to-ztex-gateware/DeclROM/NuBusFPGARAMDskDrvr_Prime.c +++ b/nubus-to-ztex-gateware/DeclROM/NuBusFPGARAMDskDrvr_Prime.c @@ -32,17 +32,105 @@ OSErr cNuBusFPGARAMDskPrime(IOParamPtr pb, /* DCtlPtr */ AuxDCEPtr dce) default: break; } +#define MAX_COUNT 100 /* **** WHAT **** */ /* Devices 1-33 (p53) */ if ((pb->ioTrap & 0x00FF) == aRdCmd) { if(!(pb->ioPosMode & 0x40)) { // rdVerify, let's ignore it for now - BlockMoveData((superslot + abs_offset), pb->ioBuffer, pb->ioReqCount); +#ifdef ENABLE_DMA + /* write_reg(dce, GOBOFB_DEBUG, 0xD1580000); */ + /* write_reg(dce, GOBOFB_DEBUG, (unsigned long)pb->ioBuffer); */ + /* write_reg(dce, GOBOFB_DEBUG, pb->ioReqCount); */ + if ((((unsigned long)pb->ioBuffer & ctx->dma_blk_size_mask) == 0) && + (((unsigned long)pb->ioReqCount & ctx->dma_blk_size_mask) == 0) && + (((unsigned long)abs_offset & ctx->dma_blk_size_mask) == 0)) { + short count; + unsigned long blk_cnt, status; + blk_cnt = revb(read_reg(dce, DMA_BLK_CNT)); + status = revb(read_reg(dce, DMA_STATUS)) & DMA_STATUS_CHECK_BITS; + if ((blk_cnt == 0) && (status == 0)) { + write_reg(dce, DMA_BLK_ADDR, revb(ctx->dma_blk_base + (abs_offset >> ctx->dma_blk_size_shift))); + write_reg(dce, DMA_DMA_ADDR, revb(pb->ioBuffer)); + write_reg(dce, DMA_BLK_CNT, revb(0x00000000ul | (pb->ioReqCount >> ctx->dma_blk_size_shift))); + count = 0; + while (((blk_cnt = revb(read_reg(dce, DMA_BLK_CNT))) != 0) && (count < MAX_COUNT)) + count ++; + count = 0; + while ((((status = revb(read_reg(dce, DMA_STATUS)) & DMA_STATUS_CHECK_BITS)) != 0) && (count < MAX_COUNT)) + count ++; + } + if (blk_cnt || status) { + BlockMoveData((superslot + abs_offset), pb->ioBuffer, pb->ioReqCount); + } else { + unsigned int k = 0; + while ((((unsigned long*)(superslot))[5] == 0x12345678) && (((unsigned long*)(superslot))[9] == 0x87654321) && (k < 7)) { + k++; + superslot += 64; + } + if ((((unsigned long*)(superslot))[5] != 0x12345678) || (((unsigned long*)(superslot))[9] != 0x87654321)) { + unsigned int i; + for (i = 0 ; i < pb->ioReqCount ; i+=4 ) { + if ((*(unsigned long*)(superslot + abs_offset + i)) != (*(unsigned long*)((char*)pb->ioBuffer + i))) { + ((unsigned long*)(superslot))[0] = ctx->dma_blk_size; + ((unsigned long*)(superslot))[1] = ctx->dma_blk_size_mask; + ((unsigned long*)(superslot))[2] = ctx->dma_blk_size_shift; + ((unsigned long*)(superslot))[3] = ctx->dma_blk_base; + ((unsigned long*)(superslot))[4] = ctx->dma_mem_size; + ((unsigned long*)(superslot))[5] = 0x12345678; + ((unsigned long*)(superslot))[6] = pb->ioBuffer; + ((unsigned long*)(superslot))[7] = pb->ioReqCount; + ((unsigned long*)(superslot))[8] = abs_offset; + ((unsigned long*)(superslot))[9] = 0x87654321; + ((unsigned long*)(superslot))[10] = i; + ((unsigned long*)(superslot))[11] = (*(unsigned long*)(superslot + abs_offset + i)); + ((unsigned long*)(superslot))[12] = (*(unsigned long*)((char*)pb->ioBuffer + i)); + ((unsigned long*)(superslot))[13] = (*(unsigned long*)(superslot + abs_offset + i + 4)); + ((unsigned long*)(superslot))[14] = (*(unsigned long*)((char*)pb->ioBuffer + i + 4)); + i += 4; + } + } + } + } + } else +#endif + { + BlockMoveData((superslot + abs_offset), pb->ioBuffer, pb->ioReqCount); + } } pb->ioActCount = pb->ioReqCount; dce->dCtlPosition = abs_offset + pb->ioReqCount; pb->ioPosOffset = dce->dCtlPosition; } else if ((pb->ioTrap & 0x00FF) == aWrCmd) { - BlockMoveData(pb->ioBuffer, (superslot + abs_offset), pb->ioReqCount); +#if 0//def ENABLE_DMA + /* write_reg(dce, GOBOFB_DEBUG, 0xD1580001); */ + /* write_reg(dce, GOBOFB_DEBUG, (unsigned long)pb->ioBuffer); */ + /* write_reg(dce, GOBOFB_DEBUG, pb->ioReqCount); */ + if ((((unsigned long)pb->ioBuffer & ctx->dma_blk_size_mask) == 0) && + (((unsigned long)pb->ioReqCount & ctx->dma_blk_size_mask) == 0) && + (((unsigned long)abs_offset & ctx->dma_blk_size_mask) == 0)) { + short count; + unsigned long blk_cnt, status; + blk_cnt = revb(read_reg(dce, DMA_BLK_CNT)); + status = revb(read_reg(dce, DMA_STATUS)) & DMA_STATUS_CHECK_BITS; + if ((blk_cnt == 0) && (status == 0)) { + write_reg(dce, DMA_BLK_ADDR, revb(ctx->dma_blk_base + (abs_offset >> ctx->dma_blk_size_shift))); + write_reg(dce, DMA_DMA_ADDR, revb(pb->ioBuffer)); + write_reg(dce, DMA_BLK_CNT, revb(0x80000000ul | (pb->ioReqCount >> ctx->dma_blk_size_shift))); + count = 0; + while (((blk_cnt = revb(read_reg(dce, DMA_BLK_CNT))) != 0) && (count < MAX_COUNT)) + count ++; + count = 0; + while ((((status = revb(read_reg(dce, DMA_STATUS)) & DMA_STATUS_CHECK_BITS)) != 0) && (count < MAX_COUNT)) + count ++; + } + if (blk_cnt || status) { + BlockMoveData(pb->ioBuffer, (superslot + abs_offset), pb->ioReqCount); + } + } else +#endif + { + BlockMoveData(pb->ioBuffer, (superslot + abs_offset), pb->ioReqCount); + } pb->ioActCount = pb->ioReqCount; dce->dCtlPosition = abs_offset + pb->ioReqCount; pb->ioPosOffset = dce->dCtlPosition; diff --git a/nubus-to-ztex-gateware/nubus_full_sampling.py b/nubus-to-ztex-gateware/nubus_full_sampling.py index ed78a54..b07b626 100644 --- a/nubus-to-ztex-gateware/nubus_full_sampling.py +++ b/nubus-to-ztex-gateware/nubus_full_sampling.py @@ -41,11 +41,11 @@ class NuBus(Module): # slave tmo_oe = Signal() # output enable tm0_i_n = Signal() - tm0_o_n = Signal() + tm0_o_n = Signal(reset = 1) tm1_i_n = Signal() - tm1_o_n = Signal() + tm1_o_n = Signal(reset = 1) ack_i_n = Signal() - ack_o_n = Signal() + ack_o_n = Signal(reset = 1) ad_oe = Signal() ad_i_n = Signal(32) @@ -54,12 +54,12 @@ class NuBus(Module): id_i_n = Signal(4) start_i_n = Signal() - start_o_n = Signal() # master via master_oe + start_o_n = Signal(reset = 1) # master via master_oe # master rqst_oe = Signal() rqst_i_n = Signal() - rqst_o_n = Signal() + rqst_o_n = Signal(reset = 1) # sampled signals, exposing the value of the register acquired on the falling edge # they can change every cycle *on falling edge* @@ -69,6 +69,7 @@ class NuBus(Module): sampled_start = Signal() sampled_ack = Signal() sampled_ad = Signal(32) + sampled_ad_byterev = Signal(32) # master sampled_rqst = Signal() @@ -85,6 +86,10 @@ class NuBus(Module): processed_ad[23:32].eq(Cat(sampled_ad[23], Signal(8, reset = 0xf0)))), # 24 bits, a.k.a 22 bits of words processed_super_ad[0:28].eq(sampled_ad[0:28]), processed_super_ad[28:32].eq(Signal(4, reset = 0x8)), + sampled_ad_byterev[ 0: 8].eq(sampled_ad[24:32]), + sampled_ad_byterev[ 8:16].eq(sampled_ad[16:24]), + sampled_ad_byterev[16:24].eq(sampled_ad[ 8:16]), + sampled_ad_byterev[24:32].eq(sampled_ad[ 0: 8]), ] # decoded signals, exposing decoded results from the sampled signals @@ -338,6 +343,9 @@ class NuBus(Module): tosbus_fifo_dout = Record(soc.tosbus_layout) self.comb += tosbus_fifo_dout.raw_bits().eq(tosbus_fifo.dout) + tosbus_fifo_dout_data_byterev = Signal(data_width_bits) + tosbus_fifo_dout_bytereversal_stmts = [ tosbus_fifo_dout_data_byterev[k*32+j*8:k*32+j*8+8].eq(tosbus_fifo_dout.data[k*32+32-j*8-8:k*32+32-j*8]) for k in range(burst_size) for j in range(4) ] + self.comb += tosbus_fifo_dout_bytereversal_stmts fromsbus_req_fifo_dout = Record(soc.fromsbus_req_layout) self.comb += fromsbus_req_fifo_dout.raw_bits().eq(fromsbus_req_fifo.dout) @@ -422,7 +430,7 @@ class NuBus(Module): If(sampled_ack, wb_dma.ack.eq(1), # fixme: check status ??? (tm0 and tm1 should be active for no-error) - NextValue(led0, (~sampled_tm0 | ~sampled_tm1)), + #NextValue(led0, (~sampled_tm0 | ~sampled_tm1)), NextState("FinishCycle"), ) ) @@ -442,7 +450,7 @@ class NuBus(Module): If(sampled_ack, wb_dma.ack.eq(1), # fixme: check status ??? (tm0 and tm1 should be active for no-error) - NextValue(led0, (~sampled_tm0 | ~sampled_tm1)), + #NextValue(led0, (~sampled_tm0 | ~sampled_tm1)), NextState("FinishCycle"), ) ) @@ -473,15 +481,19 @@ class NuBus(Module): start_o_n.eq(1), # start finished, but still need to be driven If(sampled_ack, # oups fromsbus_req_fifo.re.eq(1), # remove request to avoid infinite repeat - NextValue(led0, 1), - NextValue(led1, 1), + #NextValue(led0, 1), + #NextValue(led1, 1), NextState("FinishCycle"), ).Elif(sampled_tm0, Case(ctr, { - 0x0: NextValue(fifo_buffer[ 0: 32], sampled_ad), - 0x1: NextValue(fifo_buffer[32: 64], sampled_ad), - 0x2: NextValue(fifo_buffer[64: 96], sampled_ad), - #0x3: NextValue(fifo_buffer[96:128], sampled_ad), + #0x0: NextValue(fifo_buffer[ 0: 32], sampled_ad), + #0x1: NextValue(fifo_buffer[32: 64], sampled_ad), + #0x2: NextValue(fifo_buffer[64: 96], sampled_ad), + ##0x3: NextValue(fifo_buffer[96:128], sampled_ad), + 0x0: NextValue(fifo_buffer[ 0: 32], sampled_ad_byterev), + 0x1: NextValue(fifo_buffer[32: 64], sampled_ad_byterev), + 0x2: NextValue(fifo_buffer[64: 96], sampled_ad_byterev), + #0x3: NextValue(fifo_buffer[96:128], sampled_ad_byterev), }), NextValue(ctr, ctr + 1), If(ctr == 0x2, # burst next-to-last @@ -498,9 +510,10 @@ class NuBus(Module): fromsbus_req_fifo.re.eq(1), # remove request fromsbus_fifo.we.eq(1), fromsbus_fifo_din.blkaddress.eq(fifo_blk_addr), - fromsbus_fifo_din.data.eq(Cat(fifo_buffer[0:96], sampled_ad)), # we use sampled_ad directly for 96:128 + #fromsbus_fifo_din.data.eq(Cat(fifo_buffer[0:96], sampled_ad)), # we use sampled_ad directly for 96:128 + fromsbus_fifo_din.data.eq(Cat(fifo_buffer[0:96], sampled_ad_byterev)), # we use sampled_ad directly for 96:128 # fixme: check status ??? (tm0 and tm1 should be active for no-error) - NextValue(led0, (~sampled_tm0 | ~sampled_tm1)), + #NextValue(led0, (~sampled_tm0 | ~sampled_tm1)), NextState("FinishCycle"), ) ) @@ -509,14 +522,18 @@ class NuBus(Module): ad_oe.eq(1), # for write data start_o_n.eq(1), # start finished, but still need to be driven Case(ctr, { - 0x0: ad_o_n.eq(~tosbus_fifo_dout.data[ 0: 32]), - 0x1: ad_o_n.eq(~tosbus_fifo_dout.data[32: 64]), - 0x2: ad_o_n.eq(~tosbus_fifo_dout.data[64: 96]), - #0x3: ad_o_n.eq(~tosbus_fifo_dout.data[96:128]), + #0x0: ad_o_n.eq(~tosbus_fifo_dout.data[ 0: 32]), + #0x1: ad_o_n.eq(~tosbus_fifo_dout.data[32: 64]), + #0x2: ad_o_n.eq(~tosbus_fifo_dout.data[64: 96]), + ##0x3: ad_o_n.eq(~tosbus_fifo_dout.data[96:128]), + 0x0: ad_o_n.eq(~tosbus_fifo_dout_data_byterev[ 0: 32]), + 0x1: ad_o_n.eq(~tosbus_fifo_dout_data_byterev[32: 64]), + 0x2: ad_o_n.eq(~tosbus_fifo_dout_data_byterev[64: 96]), + #0x3: ad_o_n.eq(~tosbus_fifo_dout_data_byterev[96:128]), }), If(sampled_ack, # oups - NextValue(led0, 1), - NextValue(led1, 1), + #NextValue(led0, 1), + #NextValue(led1, 1), tosbus_fifo.re.eq(1), # remove FIFO entry to avoid infinite repeat NextState("FinishCycle"), ).Elif(sampled_tm0, @@ -532,14 +549,21 @@ class NuBus(Module): master_oe.eq(1), # for start ad_oe.eq(1), # for write data start_o_n.eq(1), # start finished, but still need to be driven - ad_o_n.eq(~tosbus_fifo_dout.data[96:128]), # last word + #ad_o_n.eq(~tosbus_fifo_dout.data[96:128]), # last word + ad_o_n.eq(~tosbus_fifo_dout_data_byterev[96:128]), # last word If(sampled_ack, tosbus_fifo.re.eq(1), # remove FIFO entry at last # fixme: check status ??? (tm0 and tm1 should be active for no-error) - NextValue(led0, (~sampled_tm0 | ~sampled_tm1)), + #NextValue(led0, (~sampled_tm0 | ~sampled_tm1)), NextState("FinishCycle"), ) ) + + self.comb += [ + led0.eq(~dma_fsm.ongoing("Idle")), + #led1.eq(dma_fsm.ongoing("Burst4DatCycleAck") | dma_fsm.ongoing("Burst4DatCycleTM0") ), + led1.eq(sampled_rqst | wb_dma.cyc), + ] # stuff at this end so we don't use the signals inadvertantly diff --git a/nubus-to-ztex-gateware/nubus_to_fpga_soc.py b/nubus-to-ztex-gateware/nubus_to_fpga_soc.py index e6ff81a..77a5114 100644 --- a/nubus-to-ztex-gateware/nubus_to_fpga_soc.py +++ b/nubus-to-ztex-gateware/nubus_to_fpga_soc.py @@ -344,9 +344,9 @@ class NuBusFPGA(SoCCore): ("dmaaddress", 32), ] - self.submodules.tosbus_fifo = ClockDomainsRenamer({"read": "nubus", "write": "sys"})(AsyncFIFOBuffered(width=layout_len(self.tosbus_layout), depth=burst_size)) - self.submodules.fromsbus_fifo = ClockDomainsRenamer({"write": "nubus", "read": "sys"})(AsyncFIFOBuffered(width=layout_len(self.fromsbus_layout), depth=burst_size)) - self.submodules.fromsbus_req_fifo = ClockDomainsRenamer({"read": "nubus", "write": "sys"})(AsyncFIFOBuffered(width=layout_len(self.fromsbus_req_layout), depth=burst_size)) + self.submodules.tosbus_fifo = ClockDomainsRenamer({"read": "nubus", "write": "sys"})(AsyncFIFOBuffered(width=layout_len(self.tosbus_layout), depth=1024//data_width)) + self.submodules.fromsbus_fifo = ClockDomainsRenamer({"write": "nubus", "read": "sys"})(AsyncFIFOBuffered(width=layout_len(self.fromsbus_layout), depth=512//data_width)) + self.submodules.fromsbus_req_fifo = ClockDomainsRenamer({"read": "nubus", "write": "sys"})(AsyncFIFOBuffered(width=layout_len(self.fromsbus_req_layout), depth=512//data_width)) self.submodules.exchange_with_mem = ExchangeWithMem(soc=self, platform=platform, @@ -357,7 +357,8 @@ class NuBusFPGA(SoCCore): dram_native_w=self.sdram.crossbar.get_port(mode="write", data_width=data_width_bits), mem_size=avail_sdram//1048576, burst_size=burst_size, - do_checksum = False) + do_checksum = False, + clock_domain="nubus") self.submodules.nubus = nubus_full_sampling.NuBus(soc=self, burst_size=burst_size,