more jareth
This commit is contained in:
@@ -120,12 +120,24 @@ struct scrolltest {
|
||||
int pm;
|
||||
int rop;
|
||||
};
|
||||
|
||||
/* debug only, to remove */
|
||||
#define GOBLIN_SCROLL _IOW('X', 0, struct scrolltest)
|
||||
#define GOBLIN_FILL _IOW('X', 1, struct scrolltest)
|
||||
#define GOBLIN_FILLROP _IOW('X', 2, struct scrolltest)
|
||||
#define GOBLIN_COPY _IOW('X', 3, struct scrolltest)
|
||||
#define GOBLIN_COPYREV _IOW('X', 4, struct scrolltest)
|
||||
|
||||
#define JARETH_FN_NUM_FILL 0
|
||||
#define JARETH_FN_NUM_FILLROP 1
|
||||
#define JARETH_FN_NUM_COPY 2
|
||||
#define JARETH_FN_NUM_COPYREV 3
|
||||
struct jareth_fn {
|
||||
int off;
|
||||
int len;
|
||||
};
|
||||
#define JARETH_FN _IOWR('j', 0, struct jareth_fn)
|
||||
|
||||
static int goblin_ioctl(void *, void *, u_long, void *, int, struct lwp *);
|
||||
static paddr_t goblin_mmap(void *, void *, off_t, int);
|
||||
static void goblin_init_screen(void *, struct vcons_screen *, int, long *);
|
||||
@@ -168,14 +180,14 @@ static const uint32_t program_fillrop[42] = { 0x13800089,0x130000c9,0x01bc0014
|
||||
0x0180018d,0x801c0013,0x001c11e2,0xc03c7013,0x000e10c6,0x010000c9,0x00004005,0xf8000809,
|
||||
0x0000000a,0x0000000a };
|
||||
|
||||
static const uint32_t program_copy[48] = { 0x16800089,0x160000c9,0x01bc0014,0x0b00000d,0x013f0014,0x003f0054,0x002400c0,0x00180000,
|
||||
0x403c0192,0x80a00013,0x001c0013,0x001c0220,0x403c7013,0x00184185,0x00261246,0xfc000248,
|
||||
0x0026f007,0x00249c06,0x0024224f,0x00240250,0x00009005,0x00089086,0x0b800089,0x013f0814,
|
||||
0x00049045,0x003f0054,0x001af087,0x403c0012,0x00246086,0xa0a00013,0x02800249,0x001c0220,
|
||||
0x603c7013,0x00270246,0x20a08015,0xfd800248,0x0280018d,0x013c6814,0x001c0013,0x001c0220,
|
||||
0x403c7013,0x013f0814,0x000e10c6,0x010000c9,0x00004005,0xf6800809,0x0000000a,0x0000000a };
|
||||
static const uint32_t program_copy[48] = { 0x16800089,0x160000c9,0x01bc0014,0x013c2014,0x003f0054,0x0a00000d,0x002400c0,0x00180000,
|
||||
0x403c0192,0x80a00013,0x001c0013,0x001c0220,0x403c7013,0x00184185,0x00261246,0xfc000248,
|
||||
0x0026f007,0x00249c06,0x0024224f,0x00240250,0x00009005,0x00089086,0x0b800089,0x013f0814,
|
||||
0x00049045,0x003f0054,0x001af087,0x403c0012,0x00246086,0xa0a00013,0x02800249,0x001c0220,
|
||||
0x603c7013,0x00270246,0x20a08015,0xfd800248,0x0280018d,0x013c6814,0x001c0013,0x001c0220,
|
||||
0x403c7013,0x013f0814,0x000e10c6,0x010000c9,0x00004005,0xf6800809,0x0000000a,0x0000000a };
|
||||
|
||||
static const uint32_t program_copyrev[66] = { 0x1f800089,0x1f0000c9,0x01bc0014,0x003af007,0x00280000,0x002c0040,0x00340080,0x03800389,0x0038ec06,0x0038238f,0x00380390,0x0028e285,0x002ce2c5,0x0034e086,0x12800349,0x013f0814,0x003f02d4,0x001af347,0x003c6346,0x060003c9,0x003c03d0,0x0028f285,0x002cf2c5,0x00800188,0x002b0286,0x003ef2c7,0x020003c9,0x002f02c5,0x003c0c00,0x00bc03d0,0x0028f285,0x003000c0,0x403c0292,0x00246346,0x10a00013,0x0300018d,0x013c6814,0x10a08016,0x001c0013,0x001c0220,0x503c7013,0x013f0814,0x02800249,0x10a08016,0x001c0220,0x503c7013,0x00270246,0xfd000248,0x00321306,0x01000309,0x00284285,0xf6000809,0x05800389,0x013f0014,0x003f0054,0x002400c0,0x403c0012,0x80a00013,0x001c0013,0x001c0220,0x403c7013,0x00004005,0x00261246,0xfc000248,0x0000000a,0x0000000a };
|
||||
static const uint32_t program_copyrev[66] = { 0x1f800089,0x1f0000c9,0x01bc0014,0x00280000,0x002c0040,0x00340080,0x003af007,0x03800389,0x0038ec06,0x0038238f,0x00380390,0x0028e285,0x002ce2c5,0x0034e086,0x12800349,0x013f0814,0x003f02d4,0x001af347,0x003c6346,0x003c03d0,0x0028f285,0x002cf2c5,0x02000188,0x003c0c00,0x003c03d0,0x0028f286,0x002cf2c6,0x002f02c5,0x003c0c00,0x00bc03d0,0x0028f285,0x003000c0,0x403c0292,0x00246346,0x10a00013,0x0300018d,0x013c6814,0x10a08016,0x001c0013,0x001c0220,0x503c7013,0x013f0814,0x02800249,0x10a08016,0x001c0220,0x503c7013,0x00270246,0xfd000248,0x00321306,0x01000309,0x00284285,0xf6000809,0x05800389,0x013c2014,0x003f0054,0x002400c0,0x403c0012,0x80a00013,0x001c0013,0x001c0220,0x403c7013,0x00004005,0x00261246,0xfc000248,0x0000000a,0x0000000a };
|
||||
|
||||
static const uint32_t* programs[8] = { program_scroll128, program_fill128, program_fill256, program_fill,
|
||||
program_fillrop, program_copy, program_copyrev, NULL };
|
||||
@@ -430,6 +442,28 @@ goblinioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
|
||||
}
|
||||
break;
|
||||
|
||||
case JARETH_FN: {
|
||||
struct jareth_fn *fn = (struct jareth_fn *)data;
|
||||
int pidx = -1;
|
||||
if (!sc->sc_has_jareth) {
|
||||
return ENXIO;
|
||||
}
|
||||
switch (fn->off) {
|
||||
case JARETH_FN_NUM_FILL: pidx = 3; break;
|
||||
case JARETH_FN_NUM_FILLROP: pidx = 4; break;
|
||||
case JARETH_FN_NUM_COPY: pidx = 5; break;
|
||||
case JARETH_FN_NUM_COPYREV: pidx = 6; break;
|
||||
}
|
||||
if (pidx != -1) {
|
||||
fn->off = program_offset[pidx];
|
||||
fn->len = program_len[pidx];
|
||||
} else {
|
||||
fn->off = -1;
|
||||
fn->len = -1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
return (ENOTTY);
|
||||
}
|
||||
@@ -1143,7 +1177,10 @@ static int wait_job(struct goblin_softc *sc, uint32_t param, enum jareth_verbosi
|
||||
} else {
|
||||
//aprint_normal_dev(sc->sc_dev, "WAIT - Jareth status: 0x%08x [%d] ls_status: 0x%08x\n", status, count, jareth_ls_status_read(sc));
|
||||
}
|
||||
|
||||
|
||||
#if 1
|
||||
device_printf(sc->sc_dev, "last run took %d cycle (eng_clk)\n", jareth_cyc_counter_read(sc));
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -76,6 +76,10 @@ typedef struct {
|
||||
uint32_t fg;
|
||||
int xdir, ydir;
|
||||
uint32_t srcoff, srcpitch;
|
||||
int fill_off, fill_len;
|
||||
int fillrop_off, fillrop_len;
|
||||
int copy_off, copy_len;
|
||||
int copyrev_off, copyrev_len;
|
||||
} GoblinRec, *GoblinPtr;
|
||||
|
||||
extern int GoblinScreenPrivateIndex;
|
||||
@@ -106,8 +110,18 @@ int GOBLINEXAInit(ScreenPtr);
|
||||
#include <dev/sun/fbio.h>
|
||||
#include <sys/ioccom.h>
|
||||
#define GOBLIN_SET_PIXELMODE _IOW('M', 3, int)
|
||||
#define JARETH_FN_NUM_FILL 0
|
||||
#define JARETH_FN_NUM_FILLROP 1
|
||||
#define JARETH_FN_NUM_COPY 2
|
||||
#define JARETH_FN_NUM_COPYREV 3
|
||||
struct jareth_fn {
|
||||
int off;
|
||||
int len;
|
||||
};
|
||||
#define JARETH_FN _IOWR('j', 0, struct jareth_fn)
|
||||
#else
|
||||
#define GOBLIN_SET_PIXELMODE (('M' << 8) | 3)
|
||||
#error "toto"
|
||||
#endif
|
||||
|
||||
#endif /* GOBLIN_H */
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
|
||||
/* DGA stuff */
|
||||
|
||||
#define DEBUG_GOBLIN 1
|
||||
//#define DEBUG_GOBLIN 1
|
||||
|
||||
#ifdef DEBUG_GOBLIN
|
||||
#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
|
||||
@@ -249,6 +249,8 @@ GoblinWait(GoblinPtr pGoblin)
|
||||
|
||||
if (status & 1) {
|
||||
xf86Msg(X_ERROR, "Jareth wait for idle timed out %08x %08x\n", status);
|
||||
} else {
|
||||
xf86Msg(X_INFO, "Jareth: last operation took %d cycles (eng_clk)\n", pGoblin->jreg->cyc_counter);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -348,12 +350,12 @@ GoblinPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
|
||||
if ((alu == 0x3) && // GCcopy
|
||||
(planemask == 0xFFFFFFFF)) { // full pattern
|
||||
// fill
|
||||
pGoblin->jreg->mpstart = 37; // FIXME
|
||||
pGoblin->jreg->mplen = 38;
|
||||
pGoblin->jreg->mpstart = pGoblin->fill_off;
|
||||
pGoblin->jreg->mplen = pGoblin->fill_len;
|
||||
} else {
|
||||
// fillrop
|
||||
pGoblin->jreg->mpstart = 75; // FIXME
|
||||
pGoblin->jreg->mplen = 41;
|
||||
pGoblin->jreg->mpstart = pGoblin->fillrop_off;
|
||||
pGoblin->jreg->mplen = pGoblin->fillrop_len;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
@@ -429,18 +431,31 @@ GoblinPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap,
|
||||
pGoblin->last_mask = planemask;
|
||||
pGoblin->last_rop = alu;
|
||||
|
||||
if ((alu == 0x3) && // GCcopy
|
||||
(planemask == 0xFFFFFFFF)) { // full pattern
|
||||
// fill
|
||||
pGoblin->jreg->mpstart = 116; // FIXME
|
||||
pGoblin->jreg->mplen = 49;
|
||||
if (pGoblin->xdir > 0) {
|
||||
if ((alu == 0x3) && // GCcopy
|
||||
(planemask == 0xFFFFFFFF)) { // full pattern
|
||||
// fill
|
||||
pGoblin->jreg->mpstart = pGoblin->copy_off;
|
||||
pGoblin->jreg->mplen = pGoblin->copy_len;
|
||||
} else {
|
||||
// fillrop
|
||||
pGoblin->jreg->mpstart = pGoblin->copy_off; // FIXME
|
||||
pGoblin->jreg->mplen = pGoblin->copy_len;
|
||||
}
|
||||
} else {
|
||||
// fillrop
|
||||
pGoblin->jreg->mpstart = 116; // FIXME FIXME FIXME
|
||||
pGoblin->jreg->mplen = 49;
|
||||
if ((alu == 0x3) && // GCcopy
|
||||
(planemask == 0xFFFFFFFF)) { // full pattern
|
||||
// fill
|
||||
pGoblin->jreg->mpstart = pGoblin->copyrev_off;
|
||||
pGoblin->jreg->mplen = pGoblin->copyrev_len;
|
||||
} else {
|
||||
// fillrop
|
||||
pGoblin->jreg->mpstart = pGoblin->copyrev_off; // FIXME
|
||||
pGoblin->jreg->mplen = pGoblin->copyrev_len;
|
||||
}
|
||||
}
|
||||
|
||||
DPRINTF(X_ERROR, "PrepareCopy: alu %d, pm 0x%08\n", alu, planemask);
|
||||
DPRINTF(X_ERROR, "PrepareCopy: alu %d, pm 0x%08x, xdir/ydir %d/%d\n", alu, planemask, xdir, ydir);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
@@ -471,8 +486,6 @@ GoblinCopy(PixmapPtr pDstPixmap,
|
||||
dstpitch = -dstpitch;
|
||||
}
|
||||
|
||||
// FIXME: xdir < 0
|
||||
|
||||
// 32 bits
|
||||
w = w*4;
|
||||
|
||||
|
||||
@@ -554,7 +554,44 @@ GOBLINScreenInit(SCREEN_INIT_ARGS_DECL)
|
||||
xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "xf86MapSbusMem failed for Jareth\n");
|
||||
pGoblin->has_accel = FALSE;
|
||||
} else {
|
||||
struct jareth_fn jfn;
|
||||
xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Jareth successfully mapped\n");
|
||||
// get some functions
|
||||
jfn.off = JARETH_FN_NUM_FILL;
|
||||
if (ioctl (pGoblin->psdp->fd, JARETH_FN, &jfn) || (jfn.off == -1)) {
|
||||
xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Fill function retrieval failed for Jareth\n");
|
||||
pGoblin->has_accel = FALSE;
|
||||
} else {
|
||||
pGoblin->fill_off = jfn.off;
|
||||
pGoblin->fill_len = jfn.len;
|
||||
}
|
||||
jfn.off = JARETH_FN_NUM_FILLROP;
|
||||
if (ioctl (pGoblin->psdp->fd, JARETH_FN, &jfn) || (jfn.off == -1)) {
|
||||
xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Fillrop function retrieval failed for Jareth\n");
|
||||
pGoblin->has_accel = FALSE;
|
||||
} else {
|
||||
pGoblin->fillrop_off = jfn.off;
|
||||
pGoblin->fillrop_len = jfn.len;
|
||||
}
|
||||
jfn.off = JARETH_FN_NUM_COPY;
|
||||
if (ioctl (pGoblin->psdp->fd, JARETH_FN, &jfn) || (jfn.off == -1)) {
|
||||
xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Copy function retrieval failed for Jareth\n");
|
||||
pGoblin->has_accel = FALSE;
|
||||
} else {
|
||||
pGoblin->copy_off = jfn.off;
|
||||
pGoblin->copy_len = jfn.len;
|
||||
}
|
||||
jfn.off = JARETH_FN_NUM_COPYREV;
|
||||
if (ioctl (pGoblin->psdp->fd, JARETH_FN, &jfn) || (jfn.off == -1)) {
|
||||
xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Copyrev function retrieval failed for Jareth\n");
|
||||
pGoblin->has_accel = FALSE;
|
||||
} else {
|
||||
pGoblin->copyrev_off = jfn.off;
|
||||
pGoblin->copyrev_len = jfn.len;
|
||||
}
|
||||
xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Jareth functions: fill %d %d, fillrop %d %d, copy %d %d, copyrev %d %d\n",
|
||||
pGoblin->fill_off, pGoblin->fill_len, pGoblin->fillrop_off, pGoblin->fillrop_len,
|
||||
pGoblin->copy_off, pGoblin->copy_len, pGoblin->copyrev_off, pGoblin->copyrev_len);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -72,6 +72,7 @@ typedef struct jareth_reg {
|
||||
volatile uint32_t ev_enable;
|
||||
volatile uint32_t instruction;
|
||||
volatile uint32_t ls_status;
|
||||
volatile uint32_t cyc_counter;
|
||||
} JarethReg, *JarethRegPtr;
|
||||
|
||||
typedef struct jareth_microcode {
|
||||
|
||||
@@ -503,7 +503,7 @@ class ExecLS(ExecUnit, AutoDoc):
|
||||
If((self.instruction.opcode == opcodes["MEM"][0]) | (self.instruction.opcode == opcodes["LOADH"][0]) | (self.instruction.opcode == opcodes["LOADL"][0]),
|
||||
NextValue(cpar, 0),
|
||||
NextValue(address, addresses[self.instruction.immediate[0:log2_int(width//32)]]),
|
||||
NextValue(wishbone, ~(addresses[self.instruction.immediate[0:log2_int(width//32)]] == 0x8)),
|
||||
NextValue(wishbone, ~(addresses[self.instruction.immediate[0:log2_int(width//32)]][24:28] == 0x8)),
|
||||
NextState("DOMEM"),
|
||||
).Elif(self.instruction.opcode == opcodes["SETM"][0],
|
||||
Case(self.instruction.immediate[0:2],
|
||||
@@ -516,7 +516,8 @@ class ExecLS(ExecUnit, AutoDoc):
|
||||
NextState("MEM_ODD") ],
|
||||
0x2 : [ NextValue(r_dat_f[2], self.a[(granule_bits-3):len(r_dat_f[2])]),
|
||||
NextValue(offset, self.a[(granule_bits-3):len(r_dat_f[2])]),
|
||||
NextValue(offsetpsize, self.b[0:max_size_bits] + ((self.a[(granule_bits-3):len(r_dat_f[2])]) << (granule_bits-3)) ),
|
||||
#NextValue(offsetpsize, self.b[0:max_size_bits] + ((self.a[(granule_bits-3):len(r_dat_f[2])]) << (granule_bits-3)) ),
|
||||
NextValue(offsetpsize, self.b[0:max_size_bits]),
|
||||
NextState("GENMASK_R0"),
|
||||
],
|
||||
0x1 : [ NextValue(r_dat_f[1], self.a[(granule_bits-3):len(r_dat_f[1])]),
|
||||
@@ -606,27 +607,37 @@ class ExecLS(ExecUnit, AutoDoc):
|
||||
)
|
||||
)
|
||||
)
|
||||
for X in range(0, granule_num):
|
||||
lsseq.act("GENMASK_R" + str(X),
|
||||
NextValue(cpar, cpar ^ 1),
|
||||
If((offsetpsize > X) & (X >= offset),
|
||||
NextValue(r_dat_m[self.instruction.immediate[0:2]][X], 1),
|
||||
).Else(
|
||||
NextValue(r_dat_m[self.instruction.immediate[0:2]][X], 0),
|
||||
),
|
||||
If(X == (granule_num-1),
|
||||
If(cpar, ## checkme
|
||||
NextState("MEM_ODD")
|
||||
).Else(
|
||||
NextState("MEM_EVEN1")
|
||||
)
|
||||
).Else(
|
||||
NextState("GENMASK_R" + str(X+1)),
|
||||
),
|
||||
)
|
||||
lsseq.act("GENMASK_R"+str(granule_num), # avoids MiGen complaining, unreachable
|
||||
#for X in range(0, granule_num):
|
||||
# lsseq.act("GENMASK_R" + str(X),
|
||||
# NextValue(cpar, cpar ^ 1),
|
||||
# If((offsetpsize > X) & (X >= offset),
|
||||
# NextValue(r_dat_m[self.instruction.immediate[0:2]][X], 1),
|
||||
# ).Else(
|
||||
# NextValue(r_dat_m[self.instruction.immediate[0:2]][X], 0),
|
||||
# ),
|
||||
# If(X == (granule_num-1),
|
||||
# If(cpar,
|
||||
# NextState("MEM_ODD")
|
||||
# ).Else(
|
||||
# NextState("MEM_EVEN1")
|
||||
# )
|
||||
# ).Else(
|
||||
# NextState("GENMASK_R" + str(X+1)),
|
||||
# ),
|
||||
# )
|
||||
#lsseq.act("GENMASK_R"+str(granule_num), # avoids MiGen complaining, unreachable
|
||||
# NextValue(cpar, cpar ^ 1),
|
||||
# If(cpar,
|
||||
# NextState("MEM_ODD")
|
||||
# ).Else(
|
||||
# NextState("MEM_EVEN1")
|
||||
# )
|
||||
#)
|
||||
lsseq.act("GENMASK_R0",
|
||||
NextValue(cpar, cpar ^ 1),
|
||||
If(cpar, ## checkme
|
||||
NextValue(r_dat_m[self.instruction.immediate[0:2]],
|
||||
(((Signal(33, reset=1) << offsetpsize) - 1) << (offset))),
|
||||
If(cpar,
|
||||
NextState("MEM_ODD")
|
||||
).Else(
|
||||
NextState("MEM_EVEN1")
|
||||
@@ -665,7 +676,7 @@ class ExecLS(ExecUnit, AutoDoc):
|
||||
If(wishbone & ~interface.ack,
|
||||
If(self.instruction.immediate[6], # post-inc
|
||||
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] + 1),
|
||||
).Elif(self.instruction.immediate[5], # post-inc
|
||||
).Elif(self.instruction.immediate[5], # post-dec
|
||||
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] - 1),
|
||||
),
|
||||
If(self.instruction.immediate[8],
|
||||
@@ -682,7 +693,7 @@ class ExecLS(ExecUnit, AutoDoc):
|
||||
If(self.instruction.opcode == opcodes["MEM"][0],
|
||||
NextValue(lbuf[128:256], 0),
|
||||
),
|
||||
If(cpar, ## checkme
|
||||
If(cpar,
|
||||
NextState("MEM_ODD")
|
||||
).Else(
|
||||
NextState("MEM_EVEN1")
|
||||
@@ -697,7 +708,7 @@ class ExecLS(ExecUnit, AutoDoc):
|
||||
If(memoryport.cmd.ready,
|
||||
If(self.instruction.immediate[6], # post-inc
|
||||
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] + 1),
|
||||
).Elif(self.instruction.immediate[5], # post-inc
|
||||
).Elif(self.instruction.immediate[5], # post-dec
|
||||
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] - 1),
|
||||
),
|
||||
NextState("MEMh"),
|
||||
@@ -705,11 +716,13 @@ class ExecLS(ExecUnit, AutoDoc):
|
||||
).Else( # no high
|
||||
If(self.instruction.immediate[6], # post-inc
|
||||
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] + 1),
|
||||
).Elif(self.instruction.immediate[5], # post-inc
|
||||
).Elif(self.instruction.immediate[5], # post-dec
|
||||
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] - 1),
|
||||
),
|
||||
NextValue(lbuf[128:256], 0),
|
||||
If(cpar, ## checkme
|
||||
If(self.instruction.opcode == opcodes["MEM"][0],
|
||||
NextValue(lbuf[128:256], 0),
|
||||
),
|
||||
If(cpar,
|
||||
NextState("MEM_ODD")
|
||||
).Else(
|
||||
NextState("MEM_EVEN1")
|
||||
@@ -749,11 +762,11 @@ class ExecLS(ExecUnit, AutoDoc):
|
||||
If(wishbone & ~interface.ack,
|
||||
If(self.instruction.immediate[6], # post-inc
|
||||
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] + 1),
|
||||
).Elif(self.instruction.immediate[5], # post-inc
|
||||
).Elif(self.instruction.immediate[5], # post-dec
|
||||
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] - 1),
|
||||
),
|
||||
#NextValue(tries, 0),
|
||||
If(cpar, ## checkme
|
||||
If(cpar,
|
||||
NextState("MEM_ODD")
|
||||
).Else(
|
||||
NextState("MEM_EVEN1")
|
||||
@@ -761,10 +774,10 @@ class ExecLS(ExecUnit, AutoDoc):
|
||||
).Elif(~wishbone,
|
||||
If(self.instruction.immediate[6], # post-inc
|
||||
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] + 1),
|
||||
).Elif(self.instruction.immediate[5], # post-inc
|
||||
).Elif(self.instruction.immediate[5], # post-dec
|
||||
NextValue(addresses[self.instruction.immediate[0:log2_int(width//32)]], addresses[self.instruction.immediate[0:log2_int(width//32)]] - 1),
|
||||
),
|
||||
If(cpar, ## checkme
|
||||
If(cpar,
|
||||
NextState("MEM_ODD")
|
||||
).Else(
|
||||
NextState("MEM_EVEN1")
|
||||
@@ -785,7 +798,7 @@ class ExecLS(ExecUnit, AutoDoc):
|
||||
# NextValue(tries, 1),
|
||||
# NextState("IDLE")
|
||||
#).Else(NextValue(tries, 0), # no third attempt, give up
|
||||
If(cpar, ## checkme
|
||||
If(cpar,
|
||||
NextState("MEM_ODD")
|
||||
).Else(
|
||||
NextState("MEM_EVEN1")
|
||||
@@ -802,21 +815,21 @@ class ExecLS(ExecUnit, AutoDoc):
|
||||
self.q.eq(0), #self.a
|
||||
)
|
||||
).Elif(self.instruction.opcode == opcodes["SETM"][0],
|
||||
self.q.eq(0), #self.a
|
||||
self.q.eq(0), #self.a
|
||||
).Elif(self.instruction.opcode == opcodes["ADR"][0],
|
||||
If(~self.instruction.immediate[7],
|
||||
If(~self.instruction.immediate[7], # getadr
|
||||
[ self.q[x*32:(x+1)*32].eq(Cat(Signal(4, reset = 0), addresses[x])) for x in range(width//32) ],
|
||||
).Else(
|
||||
self.q.eq(0),
|
||||
)
|
||||
).Elif(self.instruction.opcode == opcodes["GETM"][0],
|
||||
self.q.eq(Cat(Cat(r_dat_f[0], Signal(28, reset = 0)),
|
||||
self.q.eq(Cat(Cat(r_dat_f[0], Signal(32-len(r_dat_f[0]), reset = 0)),
|
||||
r_dat_m[0],
|
||||
Cat(r_dat_f[1], Signal(28, reset = 0)),
|
||||
Cat(r_dat_f[1], Signal(32-len(r_dat_f[1]), reset = 0)),
|
||||
r_dat_m[1],
|
||||
Cat(r_dat_f[2], Signal(28, reset = 0)),
|
||||
Cat(r_dat_f[2], Signal(32-len(r_dat_f[2]), reset = 0)),
|
||||
r_dat_m[2],
|
||||
Cat(r_dat_f[3], Signal(28, reset = 0)),
|
||||
Cat(r_dat_f[3], Signal(32-len(r_dat_f[3]), reset = 0)),
|
||||
r_dat_m[3])),
|
||||
).Else(
|
||||
self.q.eq(0xBADD0000_BADD0000_BADD0000_BADD0000_BADD0000_BADD0000_BADD0000_BADD0000),
|
||||
@@ -1047,6 +1060,7 @@ Here are the currently implemented opcodes for The Engine:
|
||||
]
|
||||
|
||||
self.ls_status = CSRStatus(32, description="Status of the L/S unit")
|
||||
self.cyc_counter = CSRStatus(32, description="Cycle counter for each run")
|
||||
|
||||
### wishbone bus interface: decode the two address spaces and dispatch accordingly
|
||||
self.bus = bus = wishbone.Interface()
|
||||
@@ -1495,6 +1509,17 @@ Here are the currently implemented opcodes for The Engine:
|
||||
self.sync += abort.eq((abort & ~engine_go) | (self.exec_ls.has_failure[0] | self.exec_ls.has_failure[1] | self.exec_ls.has_timeout[0] | self.exec_ls.has_timeout[1]))
|
||||
self.comb += self.ls_status.status.eq(self.exec_ls.state)
|
||||
|
||||
cycctr = Signal(32)
|
||||
engine_go_old = Signal()
|
||||
self.sync.eng_clk += [
|
||||
engine_go_old.eq(engine_go),
|
||||
If(running,
|
||||
cycctr.eq(cycctr + 1)),
|
||||
If(engine_go & ~engine_go_old, # pos edge
|
||||
cycctr.eq(0)),
|
||||
]
|
||||
self.comb += self.cyc_counter.status.eq(cycctr)
|
||||
|
||||
##### TIMING CONSTRAINTS -- you want these. Trust me.
|
||||
|
||||
clk50 = "clk50"
|
||||
|
||||
@@ -491,14 +491,14 @@ fn main() -> std::io::Result<()> {
|
||||
brz32 done128, %3
|
||||
// reset masks
|
||||
resm %15
|
||||
// set alignement; we shift by the addr offset
|
||||
setmq %15, %0, %2
|
||||
setma %15, %1, #16
|
||||
// if $DST is aligned on 128 bits, jump to aligned loop
|
||||
brz4 start128, %0
|
||||
|
||||
// do the first column to align $DST
|
||||
startX:
|
||||
// set alignement; we shift by the addr offset
|
||||
setmq %15, %0, #16
|
||||
setma %15, %1, #16
|
||||
// copy Y
|
||||
psa %9, %3
|
||||
// copy $SRC / $DST
|
||||
@@ -614,6 +614,7 @@ fn main() -> std::io::Result<()> {
|
||||
// leftover X in %6
|
||||
// data in %7
|
||||
// masked data in %7
|
||||
// src data in %8
|
||||
// live X count in %9
|
||||
// $SRC / $DST in %10
|
||||
// $DST / $SRC in %11
|
||||
@@ -636,13 +637,13 @@ fn main() -> std::io::Result<()> {
|
||||
brz32 done128, %3
|
||||
// reset masks
|
||||
resm %15
|
||||
// compute how much the tail loop will handle (first column) (#15 is 15, #16 is 16)
|
||||
and %14, %0, #15
|
||||
// copy addresses
|
||||
psa %10, %0
|
||||
psa %11, %1
|
||||
// set todo X
|
||||
psa %13, %2
|
||||
// compute how much the tail loop will handle (first column) (#15 is 15, #16 is 16), first the offset
|
||||
and %14, %0, #15
|
||||
// if 0, then we don't need a tail loop, so skip extra computation (that would wrongly give 16)
|
||||
brz32 skip, %14
|
||||
|
||||
@@ -652,9 +653,9 @@ fn main() -> std::io::Result<()> {
|
||||
min32v %14, %14, %2
|
||||
// more than one address to increment
|
||||
bcast32 %14, %14
|
||||
// add the count to the addresses, SRC will now be aligned
|
||||
// add the count to the addresses, DST will now be aligned
|
||||
add32v %10, %10, %14
|
||||
// add the count to the addresses, DST will have the proper alignment to shift input in the aligned loop
|
||||
// add the count to the addresses, SRC will have the proper alignment to shift input in the aligned loop
|
||||
add32v %11, %11, %14
|
||||
// so, do we do everything there ?
|
||||
sub32v %13, %2, %14
|
||||
@@ -664,41 +665,50 @@ fn main() -> std::io::Result<()> {
|
||||
skip:
|
||||
// reset q mask (we will be aligned from now on)
|
||||
setmq %15, #0, #16
|
||||
// reset a mask to the proper shifting
|
||||
// set a mask to the proper shifting
|
||||
setma %15, %11, #16
|
||||
|
||||
// now we need to figure out where we start to go backward
|
||||
// currently we have the number of 'tail' (first column) elements in %14 (0 for aligned), number of 'loop' elements in %13,
|
||||
// and $SRC+%14 & $DST+%14 in $10/$11 we $SRC+%14 aligned.
|
||||
// currently we have the number of 'tail' (first column...) elements in %14 (0 for aligned), number of 'loop' elements in %13,
|
||||
// and $SRC+%14 & $DST+%14 in $10/$11 with $SRC+%14 aligned.
|
||||
// compute X leftovers (%13 modulo 16 -> #15 is 15) in %6, we will have to start with those
|
||||
and %6, %13, #15
|
||||
// compute the 'aligned' number of elements
|
||||
sub32v %15, %13, %6
|
||||
// if 0, jump to the main loop as we already have the proper addresses
|
||||
brz32 loop128_y, %15
|
||||
|
||||
bcast32 %15, %15
|
||||
|
||||
// add the aligned number of element to $SRC+%14 & $DST+%14
|
||||
add32v %10, %10, %15
|
||||
add32v %11, %11, %15
|
||||
|
||||
// if %6 is 0 (no leftovers), then $DST is pointing after the last element so need to remove 16 from $DST
|
||||
// if %6 is 0 (no leftovers), then $DST is pointing after the last element so need to remove 16 from $DST and $SRC
|
||||
brnz32 skip2, %6
|
||||
sub32v %10, %10, #16
|
||||
skip2: // if $SRC is not aligned, we also need to add 16 (for prefetch)
|
||||
and %15, %11, #15
|
||||
brz32 skip3, %15
|
||||
psa %15, #16
|
||||
bcast32 %15, %15
|
||||
sub32v %10, %10, %15
|
||||
sub32v %11, %11, %15
|
||||
|
||||
skip2: // // if $SRC+%13 is not aligned, we also need to add 16 (for prefetch)
|
||||
// add32v %15, %11, %6
|
||||
// and %15, %15, #15
|
||||
// brz32 skip3, %15
|
||||
|
||||
add32v %11, %11, #16
|
||||
psa %15, #16
|
||||
swap32 %15, %15
|
||||
add32v %10, %10, %15
|
||||
|
||||
// add32v %15, %6, #16
|
||||
// add32v %11, %11, %15
|
||||
// swap32 %15, %15
|
||||
// add32v %10, %10, %15
|
||||
|
||||
skip3:
|
||||
// copy Y count
|
||||
psa %12, %3
|
||||
|
||||
loop128_y:
|
||||
// set source and destination addresses for current Y // FIXME : +X, -1?
|
||||
// set source and destination addresses for current Y
|
||||
setadr %15, %10
|
||||
// then the rounded value in X
|
||||
sub32v %9, %13, %6
|
||||
@@ -755,7 +765,7 @@ fn main() -> std::io::Result<()> {
|
||||
// do the first column if we need to
|
||||
brz32 done128, %14
|
||||
// set alignement; we shift by the addr offset
|
||||
setmq %15, %0, #16
|
||||
setmq %15, %0, %2
|
||||
setma %15, %1, #16
|
||||
// copy Y
|
||||
psa %9, %3
|
||||
@@ -777,7 +787,7 @@ fn main() -> std::io::Result<()> {
|
||||
// if not zero, continue
|
||||
brnz32 loopX_y, %9
|
||||
|
||||
done128:
|
||||
done128:
|
||||
fin
|
||||
fin
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user