1
0
mirror of synced 2026-03-05 10:24:10 +00:00

add direct memory access support to Jareth UnitLS

This commit is contained in:
Romain Dolbeau
2022-03-12 13:05:55 +01:00
parent 12ea2bc18b
commit 798a4975c0
3 changed files with 153 additions and 60 deletions

View File

@@ -130,11 +130,15 @@ static void goblin_init(struct goblin_softc *);
static void goblin_reset(struct goblin_softc *);
/* Jareth stuff */
enum jareth_verbosity {
jareth_silent,
jareth_verbose
};
static int init_programs(struct goblin_softc *sc);
static int power_on(struct goblin_softc *sc);
static int power_off(struct goblin_softc *sc);
static int jareth_scroll(struct goblin_softc *sc, int y0, int y1, int x0, int w, int n);
static int jareth_fill(struct goblin_softc *sc, int y0, int pat, int x0, int w, int n);
static int jareth_scroll(struct goblin_softc *sc, enum jareth_verbosity verbose, int y0, int y1, int x0, int w, int n);
static int jareth_fill(struct goblin_softc *sc, enum jareth_verbosity verbose, int y0, int pat, int x0, int w, int n);
static const uint32_t program_scroll128[12] = { 0x407c0012,0x00140080,0x201c0013,0x60fc7013,0x00170146,0xfe000148,0x000e10c6,0x010000c9,0x00004005,0xfb000809,0x0000000a,0x0000000a };
static const uint32_t program_fill128[12] = { 0x407c0012,0x00140080,0x607c1013,0x00170146,0xfe800148,0x000e10c6,0x010000c9,0x00004005,0xfb800809,0x0000000a,0x0000000a,0x0000000a };
@@ -361,13 +365,13 @@ goblinioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
case GOBLIN_SCROLL: {
struct scrolltest *st = (struct scrolltest *)data;
jareth_scroll(sc, st->y0, st->y1, st->x0, st->w, st->n);
jareth_scroll(sc, jareth_verbose, st->y0, st->y1, st->x0, st->w, st->n);
}
break;
case GOBLIN_FILL: {
struct scrolltest *st = (struct scrolltest *)data;
jareth_fill(sc, st->y0, st->y1, st->x0, st->w, st->n);
jareth_fill(sc, jareth_verbose, st->y0, st->y1, st->x0, st->w, st->n);
}
break;
@@ -726,10 +730,10 @@ goblin_reset(struct goblin_softc *sc)
#define REG_BASE(reg) (base + (reg * 32))
#define SUBREG_ADDR(reg, off) (REG_BASE(reg) + (off)*4)
static int start_job(struct goblin_softc *sc, int verbose);
static int wait_job(struct goblin_softc *sc, uint32_t param, int verbose);
static int start_job(struct goblin_softc *sc, enum jareth_verbosity verbose);
static int wait_job(struct goblin_softc *sc, uint32_t param, enum jareth_verbosity verbose);
static int jareth_scroll(struct goblin_softc *sc, int y0, int y1, int x0, int w, int n) {
static int jareth_scroll(struct goblin_softc *sc, enum jareth_verbosity verbose, int y0, int y1, int x0, int w, int n) {
const uint32_t base = 0;
const int pidx = 0;
int i;
@@ -756,9 +760,9 @@ static int jareth_scroll(struct goblin_softc *sc, int y0, int y1, int x0, int w,
jareth_mpstart_write(sc, program_offset[pidx]);
jareth_mplen_write(sc, program_len[pidx]);
(void)start_job(sc, 0);
(void)start_job(sc, verbose);
delay(1);
(void)wait_job(sc, 2, 0);
(void)wait_job(sc, 2, verbose);
power_off(sc);
@@ -766,7 +770,7 @@ static int jareth_scroll(struct goblin_softc *sc, int y0, int y1, int x0, int w,
}
static int jareth_fill(struct goblin_softc *sc, int y0, int pat, int x0, int w, int n) {
static int jareth_fill(struct goblin_softc *sc, enum jareth_verbosity verbose, int y0, int pat, int x0, int w, int n) {
const uint32_t base = 0;
const int pidx = 1;
int i;
@@ -787,9 +791,9 @@ static int jareth_fill(struct goblin_softc *sc, int y0, int pat, int x0, int w,
jareth_mpstart_write(sc, program_offset[pidx]);
jareth_mplen_write(sc, program_len[pidx]);
(void)start_job(sc, 0);
(void)start_job(sc, verbose);
delay(1);
(void)wait_job(sc, 1, 0);
(void)wait_job(sc, 1, verbose);
power_off(sc);
@@ -834,7 +838,7 @@ jareth_copyrows(void *cookie, int src, int dst, int n)
/* int x3 = ri->ri_xorigin + ri->ri_emuwidth - 1; */
/* int y3 = ri->ri_yorigin + dst + n - 1; */
jareth_scroll(sc, y0, y2, x0, ri->ri_emuwidth, n);
jareth_scroll(sc, jareth_silent, y0, y2, x0, ri->ri_emuwidth, n);
#if 0
if (y0 > y2) {
@@ -859,10 +863,10 @@ jareth_copyrows(void *cookie, int src, int dst, int n)
#endif
}
static int start_job(struct goblin_softc *sc, int verbose) {
static int start_job(struct goblin_softc *sc, enum jareth_verbosity verbose) {
uint32_t status = jareth_status_read(sc);
if (status & (1<<CSR_JARETH_STATUS_RUNNING_OFFSET)) {
if (verbose)
if (verbose == jareth_verbose)
aprint_error_dev(sc->sc_dev, "START - Jareth status: 0x%08x, still running?\n", status);
return ENXIO;
}
@@ -872,7 +876,7 @@ static int start_job(struct goblin_softc *sc, int verbose) {
return 0;
}
static int wait_job(struct goblin_softc *sc, uint32_t param, int verbose) {
static int wait_job(struct goblin_softc *sc, uint32_t param, enum jareth_verbosity verbose) {
uint32_t status = jareth_status_read(sc);
int count = 0;
int max_count = 3000;
@@ -891,26 +895,26 @@ static int wait_job(struct goblin_softc *sc, uint32_t param, int verbose) {
}
if (del > max_del_seen) {
max_del_seen = del;
if (verbose)
if (verbose == jareth_verbose)
aprint_normal_dev(sc->sc_dev, "WAIT - new max delay %d after %d count (param was %u)\n", max_del_seen, count, param);
}
if (count > max_cnt_seen) {
max_cnt_seen = count;
if (verbose)
if (verbose == jareth_verbose)
aprint_normal_dev(sc->sc_dev, "WAIT - new max count %d with %d delay (param was %u)\n", max_cnt_seen, del, param);
}
//jareth_control_write(sc, 0);
if (status & (1<<CSR_JARETH_STATUS_RUNNING_OFFSET)) {
if (verbose)
if (verbose == jareth_verbose)
aprint_error_dev(sc->sc_dev, "WAIT - Jareth status: 0x%08x (pc 0x%08x), did not finish in time? [inst: 0x%08x ls_status: 0x%08x]\n", status, (status>>1)&0x03ff, jareth_instruction_read(sc), jareth_ls_status_read(sc));
return ENXIO;
} else if (status & (1<<CSR_JARETH_STATUS_SIGILL_OFFSET)) {
if (verbose)
if (verbose == jareth_verbose)
aprint_error_dev(sc->sc_dev, "WAIT - Jareth status: 0x%08x, sigill [inst: 0x%08x ls_status: 0x%08x]\n", status, jareth_instruction_read(sc), jareth_ls_status_read(sc));
return ENXIO;
} else if (status & (1<<CSR_JARETH_STATUS_ABORT_OFFSET)) {
if (verbose)
if (verbose == jareth_verbose)
aprint_error_dev(sc->sc_dev, "WAIT - Jareth status: 0x%08x, aborted [inst: 0x%08x ls_status: 0x%08x]\n", status, jareth_instruction_read(sc), jareth_ls_status_read(sc));
return ENXIO;
} else {

View File

@@ -346,7 +346,7 @@ class ExecAddSub(ExecUnit, AutoDoc):
]
class ExecLS(ExecUnit, AutoDoc):
def __init__(self, width=256, interface=None, r_dat_f=None, r_dat_m=None, granule=0):
def __init__(self, width=256, interface=None, memoryport=None, r_dat_f=None, r_dat_m=None, granule=0):
ExecUnit.__init__(self, width, ["MEM", "SETM", "ADR", "LOADH", "GETM"])
self.notes = ModuleDoc(title=f"Load/Store ExecUnit Subclass", body=f"""
@@ -357,7 +357,9 @@ class ExecLS(ExecUnit, AutoDoc):
]
assert(width == 256) # fixme
assert(len(interface.sel) == 16) # 128 bits Wishbone
assert((len(interface.sel) == 16)) # 128 bits Wishbone
assert((len(memoryport.rdata.data) == 128)) # 128 bits memory
assert((len(memoryport.wdata.data) == 128)) # 128 bits memory
start_pipe = Signal()
self.sync.mul_clk += start_pipe.eq(self.start) # break critical path of instruction decode -> SETUP_A state muxes
@@ -380,34 +382,28 @@ class ExecLS(ExecUnit, AutoDoc):
offsetpsize = Signal(max_size_bits+1, reset = 0)
addresses = Array(Signal(28) for x in range(width//32)) # 128-bits chunk, so 16-bytes chunk, so low 4 bits are ignored
address = Signal(28)
wishbone = Signal()
#if ((interface != None) and (memoryport != None)):
# self.comb += [ wishbone.eq(addresses[self.instruction.immediate[0:log2_int(width//32)]][24:28] != 0x8), ] # fixme ; 0x8 is SDRAM memory map prefix
#else:
# if (interface == None):
# self.comb += [ wishbone.eq(0), ]
# else: # memoryport == None
# self.comb += [ wishbone.eq(1), ]
#if (memoryport != None):
self.comb += [ memoryport.rdata.ready.eq(1),
memoryport.wdata.we.eq(Replicate(1, len(memoryport.wdata.we))), ]
lsseq.act("IDLE",
If(start_pipe,
If(self.instruction.opcode == opcodes["MEM"][0],
If((self.instruction.opcode == opcodes["MEM"][0]) | (self.instruction.opcode == opcodes["LOADH"][0]),
NextValue(cpar, 0),
NextValue(self.has_timeout, 0),
NextValue(self.has_failure, 0),
NextValue(interface.cyc, 1),
NextValue(interface.stb, 1),
NextValue(interface.sel, 2**len(interface.sel)-1),
NextValue(interface.adr, addresses[self.instruction.immediate[0:log2_int(width//32)]]),
NextValue(interface.we, self.instruction.immediate[7]),
NextValue(timeout, 2047),
If(self.instruction.immediate[7], # do we need those tests or could we always update dat_w/dat_r ?
NextValue(interface.dat_w, self.b[0:128])),
NextState("MEMl") # MEMl
).Elif(self.instruction.opcode == opcodes["LOADH"][0],
NextValue(cpar, 0),
NextValue(self.has_timeout, 0),
NextValue(self.has_failure, 0),
NextValue(interface.cyc, 1),
NextValue(interface.stb, 1),
NextValue(interface.sel, 2**len(interface.sel)-1),
NextValue(interface.adr, addresses[self.instruction.immediate[0:log2_int(width//32)]]),
NextValue(interface.we, self.instruction.immediate[7]),
NextValue(timeout, 2047),
NextValue(lbuf[0:128], self.b[128:256]),
NextState("MEMh") # MEMl
NextValue(address, addresses[self.instruction.immediate[0:log2_int(width//32)]]),
NextValue(wishbone, ~(addresses[self.instruction.immediate[0:log2_int(width//32)]] == 0x8)),
NextState("DOMEM"),
).Elif(self.instruction.opcode == opcodes["SETM"][0],
Case(self.instruction.immediate[0:2],
{ 0x3 : [ NextValue(r_dat_f[0], 0),
@@ -443,6 +439,52 @@ class ExecLS(ExecUnit, AutoDoc):
)
)
)
lsseq.act("DOMEM",
NextValue(cpar, cpar ^ 1),
If(self.instruction.opcode == opcodes["MEM"][0],
NextValue(self.has_timeout, 0),
NextValue(self.has_failure, 0),
NextValue(timeout, 2047),
If(wishbone,
NextValue(interface.cyc, 1),
NextValue(interface.stb, 1),
NextValue(interface.sel, 2**len(interface.sel)-1),
NextValue(interface.adr, address),
NextValue(interface.we, self.instruction.immediate[7]),
If(self.instruction.immediate[7], # do we need those tests or could we always update dat_w/dat_r ?
NextValue(interface.dat_w, self.b[0:128])),
NextState("MEMl") # MEMl
).Else(
memoryport.cmd.we.eq(self.instruction.immediate[7]),
memoryport.cmd.addr.eq(address[0:]),
memoryport.cmd.valid.eq(1),
If(memoryport.cmd.ready,
NextState("MEMl")
)
),
).Elif(self.instruction.opcode == opcodes["LOADH"][0],
NextValue(cpar, 0),
NextValue(self.has_timeout, 0),
NextValue(self.has_failure, 0),
NextValue(timeout, 2047),
NextValue(lbuf[0:128], self.b[128:256]),
If(wishbone,
NextValue(interface.cyc, 1),
NextValue(interface.stb, 1),
NextValue(interface.sel, 2**len(interface.sel)-1),
NextValue(interface.adr, address),
NextValue(interface.we, self.instruction.immediate[7]),
NextState("MEMh") # MEMl
).Else(
memoryport.cmd.we.eq(self.instruction.immediate[7]),
memoryport.cmd.addr.eq(address[0:]),
memoryport.cmd.valid.eq(1),
If(memoryport.cmd.ready,
NextState("MEMh")
)
)
)
)
for X in range(0, granule_num):
lsseq.act("GENMASK_R" + str(X),
NextValue(cpar, cpar ^ 1),
@@ -469,20 +511,28 @@ class ExecLS(ExecUnit, AutoDoc):
NextState("MEM_EVEN1")
)
)
lsseq.act("MEMl",
NextValue(cpar, cpar ^ 1),
If(interface.ack,
If(wishbone & interface.ack,
If(~self.instruction.immediate[7],
NextValue(lbuf[0:128], interface.dat_r)),
NextValue(interface.cyc, 0),
NextValue(interface.stb, 0),
NextState("MEMl2")
).Elif(interface.err,
).Elif(wishbone & interface.err,
NextValue(self.has_failure[0], 1),
NextValue(interface.cyc, 0),
NextValue(interface.stb, 0),
NextState("ERR"),
).Elif(~wishbone & ~self.instruction.immediate[7] & memoryport.rdata.valid,
NextValue(lbuf[0:128], memoryport.rdata.data),
NextState("MEMl2"),
).Elif(~wishbone & self.instruction.immediate[7],
memoryport.wdata.data.eq(self.b[0:128]),
memoryport.wdata.valid.eq(1),
If(memoryport.wdata.ready,
NextState("MEMl2"),
),
).Elif(timeout == 0,
NextValue(self.has_timeout[0], 1),
NextValue(interface.cyc, 0),
@@ -491,7 +541,7 @@ class ExecLS(ExecUnit, AutoDoc):
))
lsseq.act("MEMl2",
NextValue(cpar, cpar ^ 1),
If(~interface.ack,
If(wishbone & ~interface.ack,
If(self.instruction.immediate[6], # post-inc
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] + 1),
),
@@ -499,7 +549,7 @@ class ExecLS(ExecUnit, AutoDoc):
NextValue(interface.cyc, 1),
NextValue(interface.stb, 1),
NextValue(interface.sel, 2**len(interface.sel)-1),
NextValue(interface.adr, (addresses[self.instruction.immediate[0:log2_int(width//32)]]) + 1),
NextValue(interface.adr, address + 1),
NextValue(interface.we, self.instruction.immediate[7]),
NextValue(timeout, 2047),
If(self.instruction.immediate[7],
@@ -513,20 +563,52 @@ class ExecLS(ExecUnit, AutoDoc):
NextState("MEM_EVEN1")
)
)
).Elif(~wishbone,
If(self.instruction.immediate[8],
memoryport.cmd.we.eq(self.instruction.immediate[7]),
memoryport.cmd.addr.eq(address[0:] + 1),
memoryport.cmd.valid.eq(1),
NextValue(timeout, 2047),
If(memoryport.cmd.ready,
If(self.instruction.immediate[6], # post-inc
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] + 1),
),
NextState("MEMh"),
)
).Else( # no high
If(self.instruction.immediate[6], # post-inc
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] + 1),
),
NextValue(lbuf[128:256], 0),
If(cpar, ## checkme
NextState("MEM_ODD")
).Else(
NextState("MEM_EVEN1")
)
),
))
lsseq.act("MEMh",
NextValue(cpar, cpar ^ 1),
If(interface.ack,
If(wishbone & interface.ack,
If(~self.instruction.immediate[7],
NextValue(lbuf[128:256], interface.dat_r)),
NextValue(interface.cyc, 0),
NextValue(interface.stb, 0),
NextState("MEMh2")
).Elif(interface.err,
).Elif(wishbone & interface.err,
NextValue(self.has_failure[1], 1),
NextValue(interface.cyc, 0),
NextValue(interface.stb, 0),
NextState("ERR"),
).Elif(~wishbone & ~self.instruction.immediate[7] & memoryport.rdata.valid,
NextValue(lbuf[128:256], memoryport.rdata.data),
NextState("MEMh2"),
).Elif(~wishbone & self.instruction.immediate[7],
memoryport.wdata.data.eq(self.b[128:256]),
memoryport.wdata.valid.eq(1),
If(memoryport.wdata.ready,
NextState("MEMh2"),
),
).Elif(timeout == 0,
NextValue(self.has_timeout[1], 1),
NextValue(interface.cyc, 0),
@@ -535,7 +617,7 @@ class ExecLS(ExecUnit, AutoDoc):
))
lsseq.act("MEMh2",
NextValue(cpar, cpar ^ 1),
If(~interface.ack,
If(wishbone & ~interface.ack,
If(self.instruction.immediate[6], # post-inc
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] + 1),
),
@@ -545,6 +627,15 @@ class ExecLS(ExecUnit, AutoDoc):
).Else(
NextState("MEM_EVEN1")
)
).Elif(~wishbone,
If(self.instruction.immediate[6], # post-inc
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] + 1),
),
If(cpar, ## checkme
NextState("MEM_ODD")
).Else(
NextState("MEM_EVEN1")
)
))
lsseq.act("MEM_ODD", # clock alignement cycle
NextState("MEM_EVEN1"))
@@ -617,7 +708,7 @@ class ExecLS(ExecUnit, AutoDoc):
class Jareth(Module, AutoCSR, AutoDoc):
def __init__(self, platform, prefix, sim=False, build_prefix=""):
def __init__(self, platform, prefix, memoryport, sim=False, build_prefix=""):
opdoc = "\n"
for mnemonic, description in opcodes.items():
opdoc += f" * **{mnemonic}** ({str(description[0])}) -- {description[1]} \n"
@@ -1141,7 +1232,7 @@ Here are the currently implemented opcodes for The Engine:
exec_units = {
"exec_logic" : ExecLogic(width=rf_width_raw),
"exec_addsub" : ExecAddSub(width=rf_width_raw),
"exec_ls" : ExecLS(width=rf_width_raw, interface=self.busls, r_dat_f=r_dat_f, r_dat_m=r_dat_m, granule=granule),
"exec_ls" : ExecLS(width=rf_width_raw, interface=self.busls, memoryport=memoryport, r_dat_f=r_dat_f, r_dat_m=r_dat_m, granule=granule),
}
exec_units_shift = {
"exec_logic": True,

View File

@@ -23,8 +23,6 @@ from sbus_to_fpga_fsmstat import *
from sbus_to_fpga_blk_dma import *
from sbus_to_fpga_trng import *
from litedram.frontend.dma import *
from engine import Engine
from migen.genlib.cdc import BusSynchronizer
from migen.genlib.resetsync import AsyncResetSynchronizer
@@ -551,7 +549,7 @@ class SBusFPGA(SoCCore):
if (jareth):
from jareth import Jareth;
self.submodules.jareth = ClockDomainsRenamer({"eng_clk":"clk50", "rf_clk":"clk200", "mul_clk":"clk100_gated"})(Jareth(platform=platform,prefix=self.mem_map.get("jareth", None))) # , "sys":"clk100"
self.submodules.jareth = ClockDomainsRenamer({"eng_clk":"clk50", "rf_clk":"clk200", "mul_clk":"clk100_gated"})(Jareth(platform=platform,prefix=self.mem_map.get("jareth", None), memoryport=self.sdram.crossbar.get_port(mode="both", data_width=128))) # , "sys":"clk100"
self.bus.add_slave("jareth", self.jareth.bus, SoCRegion(origin=self.mem_map.get("jareth", None), size=0x20000, cached=False))
self.bus.add_master(name="jarethLS", master=self.jareth.busls) # Jareth doesn't need the DVMA
if (not engine):